Estefani Gaytan Nunez

CoreNLP parsed files and words labeled

1 + #!/bin/python2.7
2 +out_labels = {
3 + '</Air>': 'O',
4 + '</Gtype>': 'O',
5 + '</Gversion>': 'O',
6 + '</Med>': 'O',
7 + '</Orgn>': 'O',
8 + '</Phase>': 'O',
9 + '</Sample>': 'O',
10 + '</Serie>': 'O',
11 + '</Strain>': 'O',
12 + '</Substrain>': 'O',
13 + '</Supp>': 'O',
14 + '</Technique>': 'O',
15 + '</Temp>': 'O',
16 + '</Name>': 'O',
17 + '</OD>': 'O',
18 + '</Anti>': 'O',
19 + '</Agit>': 'O',
20 + '</Vess>': 'O'}
21 +in_labels = {
22 + '<Air>': 'Air',
23 + '<Gtype>': 'Gtype',
24 + '<Gversion>': 'Gversion',
25 + '<Med>': 'Med',
26 + '<Orgn>': 'Orgn',
27 + '<Phase>': 'Phase',
28 + '<Sample>': 'Sample',
29 + '<Serie>': 'Serie',
30 + '<Strain>': 'Strain',
31 + '<Substrain>': 'Substrain',
32 + '<Supp>': 'Supp',
33 + '<Technique>': 'Technique',
34 + '<Temp>': 'Temp',
35 + '<Name>': 'Name',
36 + '<OD>': 'OD',
37 + '<Anti>': 'Anti',
38 + '<Agit>': 'Agit',
39 + '<Vess>': 'Vess'}
40 +
41 +import re
42 +#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt"
43 +inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll'
44 +outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv'
45 +flag = 'O'
46 +with open(outpath, 'w') as out:
47 + with open(inpath, 'r') as input_file:
48 + for line in input_file:
49 + if len(line.split('\t')) > 1:
50 + w = line.split('\t')[1]
51 + if w in in_labels or w in out_labels:
52 + if w in in_labels.keys(): flag = in_labels[w]
53 + if w in out_labels: flag = out_labels[w]
54 +
55 + else:
56 + if w == "PGCGROWTHCONDITIONS": out.write('\n')
57 + else:
58 + out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
59 + #print('\t'.join(line.split('\t')[1:4])+'\t'+flag)
...\ No newline at end of file ...\ No newline at end of file