Showing
1 changed file
with
59 additions
and
0 deletions
1 | + #!/bin/python2.7 | ||
2 | +out_labels = { | ||
3 | + '</Air>': 'O', | ||
4 | + '</Gtype>': 'O', | ||
5 | + '</Gversion>': 'O', | ||
6 | + '</Med>': 'O', | ||
7 | + '</Orgn>': 'O', | ||
8 | + '</Phase>': 'O', | ||
9 | + '</Sample>': 'O', | ||
10 | + '</Serie>': 'O', | ||
11 | + '</Strain>': 'O', | ||
12 | + '</Substrain>': 'O', | ||
13 | + '</Supp>': 'O', | ||
14 | + '</Technique>': 'O', | ||
15 | + '</Temp>': 'O', | ||
16 | + '</Name>': 'O', | ||
17 | + '</OD>': 'O', | ||
18 | + '</Anti>': 'O', | ||
19 | + '</Agit>': 'O', | ||
20 | + '</Vess>': 'O'} | ||
21 | +in_labels = { | ||
22 | + '<Air>': 'Air', | ||
23 | + '<Gtype>': 'Gtype', | ||
24 | + '<Gversion>': 'Gversion', | ||
25 | + '<Med>': 'Med', | ||
26 | + '<Orgn>': 'Orgn', | ||
27 | + '<Phase>': 'Phase', | ||
28 | + '<Sample>': 'Sample', | ||
29 | + '<Serie>': 'Serie', | ||
30 | + '<Strain>': 'Strain', | ||
31 | + '<Substrain>': 'Substrain', | ||
32 | + '<Supp>': 'Supp', | ||
33 | + '<Technique>': 'Technique', | ||
34 | + '<Temp>': 'Temp', | ||
35 | + '<Name>': 'Name', | ||
36 | + '<OD>': 'OD', | ||
37 | + '<Anti>': 'Anti', | ||
38 | + '<Agit>': 'Agit', | ||
39 | + '<Vess>': 'Vess'} | ||
40 | + | ||
41 | +import re | ||
42 | +#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt" | ||
43 | +inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll' | ||
44 | +outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv' | ||
45 | +flag = 'O' | ||
46 | +with open(outpath, 'w') as out: | ||
47 | + with open(inpath, 'r') as input_file: | ||
48 | + for line in input_file: | ||
49 | + if len(line.split('\t')) > 1: | ||
50 | + w = line.split('\t')[1] | ||
51 | + if w in in_labels or w in out_labels: | ||
52 | + if w in in_labels.keys(): flag = in_labels[w] | ||
53 | + if w in out_labels: flag = out_labels[w] | ||
54 | + | ||
55 | + else: | ||
56 | + if w == "PGCGROWTHCONDITIONS": out.write('\n') | ||
57 | + else: | ||
58 | + out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ') | ||
59 | + #print('\t'.join(line.split('\t')[1:4])+'\t'+flag) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment