Carlos-Francisco Méndez-Cruz

Merge remote-tracking branch 'origin/master'

1 + #!/bin/python2.7
2 +out_labels = {
3 + '</Air>': 'O',
4 + '</Gtype>': 'O',
5 + '</Gversion>': 'O',
6 + '</Med>': 'O',
7 + '</Orgn>': 'O',
8 + '</Phase>': 'O',
9 + '</Sample>': 'O',
10 + '</Serie>': 'O',
11 + '</Strain>': 'O',
12 + '</Substrain>': 'O',
13 + '</Supp>': 'O',
14 + '</Technique>': 'O',
15 + '</Temp>': 'O',
16 + '</Name>': 'O',
17 + '</OD>': 'O',
18 + '</Anti>': 'O',
19 + '</Agit>': 'O',
20 + '</Vess>': 'O'}
21 +in_labels = {
22 + '<Air>': 'Air',
23 + '<Gtype>': 'Gtype',
24 + '<Gversion>': 'Gversion',
25 + '<Med>': 'Med',
26 + '<Orgn>': 'Orgn',
27 + '<Phase>': 'Phase',
28 + '<Sample>': 'Sample',
29 + '<Serie>': 'Serie',
30 + '<Strain>': 'Strain',
31 + '<Substrain>': 'Substrain',
32 + '<Supp>': 'Supp',
33 + '<Technique>': 'Technique',
34 + '<Temp>': 'Temp',
35 + '<Name>': 'Name',
36 + '<OD>': 'OD',
37 + '<Anti>': 'Anti',
38 + '<Agit>': 'Agit',
39 + '<Vess>': 'Vess'}
40 +
41 +import re
42 +#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt"
43 +inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll'
44 +outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv'
45 +flag = 'O'
46 +with open(outpath, 'w') as out:
47 + with open(inpath, 'r') as input_file:
48 + for line in input_file:
49 + if len(line.split('\t')) > 1:
50 + w = line.split('\t')[1]
51 + if w in in_labels or w in out_labels:
52 + if w in in_labels.keys(): flag = in_labels[w]
53 + if w in out_labels: flag = out_labels[w]
54 +
55 + else:
56 + if w == "PGCGROWTHCONDITIONS": out.write('\n')
57 + else:
58 + out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
59 + #print('\t'.join(line.split('\t')[1:4])+'\t'+flag)
...\ No newline at end of file ...\ No newline at end of file
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.