Carlos-Francisco Méndez-Cruz

Merge remote-tracking branch 'origin/master'

#!/bin/python2.7
out_labels = {
'</Air>': 'O',
'</Gtype>': 'O',
'</Gversion>': 'O',
'</Med>': 'O',
'</Orgn>': 'O',
'</Phase>': 'O',
'</Sample>': 'O',
'</Serie>': 'O',
'</Strain>': 'O',
'</Substrain>': 'O',
'</Supp>': 'O',
'</Technique>': 'O',
'</Temp>': 'O',
'</Name>': 'O',
'</OD>': 'O',
'</Anti>': 'O',
'</Agit>': 'O',
'</Vess>': 'O'}
in_labels = {
'<Air>': 'Air',
'<Gtype>': 'Gtype',
'<Gversion>': 'Gversion',
'<Med>': 'Med',
'<Orgn>': 'Orgn',
'<Phase>': 'Phase',
'<Sample>': 'Sample',
'<Serie>': 'Serie',
'<Strain>': 'Strain',
'<Substrain>': 'Substrain',
'<Supp>': 'Supp',
'<Technique>': 'Technique',
'<Temp>': 'Temp',
'<Name>': 'Name',
'<OD>': 'OD',
'<Anti>': 'Anti',
'<Agit>': 'Agit',
'<Vess>': 'Vess'}
import re
#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt"
inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll'
outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv'
flag = 'O'
with open(outpath, 'w') as out:
with open(inpath, 'r') as input_file:
for line in input_file:
if len(line.split('\t')) > 1:
w = line.split('\t')[1]
if w in in_labels or w in out_labels:
if w in in_labels.keys(): flag = in_labels[w]
if w in out_labels: flag = out_labels[w]
else:
if w == "PGCGROWTHCONDITIONS": out.write('\n')
else:
out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
#print('\t'.join(line.split('\t')[1:4])+'\t'+flag)
\ No newline at end of file
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.