Estefani Gaytan Nunez

CoreNLP parsed files and words labeled

#!/bin/python2.7
out_labels = {
'</Air>': 'O',
'</Gtype>': 'O',
'</Gversion>': 'O',
'</Med>': 'O',
'</Orgn>': 'O',
'</Phase>': 'O',
'</Sample>': 'O',
'</Serie>': 'O',
'</Strain>': 'O',
'</Substrain>': 'O',
'</Supp>': 'O',
'</Technique>': 'O',
'</Temp>': 'O',
'</Name>': 'O',
'</OD>': 'O',
'</Anti>': 'O',
'</Agit>': 'O',
'</Vess>': 'O'}
in_labels = {
'<Air>': 'Air',
'<Gtype>': 'Gtype',
'<Gversion>': 'Gversion',
'<Med>': 'Med',
'<Orgn>': 'Orgn',
'<Phase>': 'Phase',
'<Sample>': 'Sample',
'<Serie>': 'Serie',
'<Strain>': 'Strain',
'<Substrain>': 'Substrain',
'<Supp>': 'Supp',
'<Technique>': 'Technique',
'<Temp>': 'Temp',
'<Name>': 'Name',
'<OD>': 'OD',
'<Anti>': 'Anti',
'<Agit>': 'Agit',
'<Vess>': 'Vess'}
import re
#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt"
inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll'
outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv'
flag = 'O'
with open(outpath, 'w') as out:
with open(inpath, 'r') as input_file:
for line in input_file:
if len(line.split('\t')) > 1:
w = line.split('\t')[1]
if w in in_labels or w in out_labels:
if w in in_labels.keys(): flag = in_labels[w]
if w in out_labels: flag = out_labels[w]
else:
if w == "PGCGROWTHCONDITIONS": out.write('\n')
else:
out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
#print('\t'.join(line.split('\t')[1:4])+'\t'+flag)
\ No newline at end of file