Showing
8 changed files
with
59 additions
and
0 deletions
data-set-analysis/.gitkeep
0 → 100644
File mode changed
training-evaluation-data-sets/.gitkeep
0 → 100644
File mode changed
training-evaluation-data-sets/bin/.gitkeep
0 → 100644
File mode changed
1 | + #!/bin/python2.7 | ||
2 | +out_labels = { | ||
3 | + '</Air>': 'O', | ||
4 | + '</Gtype>': 'O', | ||
5 | + '</Gversion>': 'O', | ||
6 | + '</Med>': 'O', | ||
7 | + '</Orgn>': 'O', | ||
8 | + '</Phase>': 'O', | ||
9 | + '</Sample>': 'O', | ||
10 | + '</Serie>': 'O', | ||
11 | + '</Strain>': 'O', | ||
12 | + '</Substrain>': 'O', | ||
13 | + '</Supp>': 'O', | ||
14 | + '</Technique>': 'O', | ||
15 | + '</Temp>': 'O', | ||
16 | + '</Name>': 'O', | ||
17 | + '</OD>': 'O', | ||
18 | + '</Anti>': 'O', | ||
19 | + '</Agit>': 'O', | ||
20 | + '</Vess>': 'O'} | ||
21 | +in_labels = { | ||
22 | + '<Air>': 'Air', | ||
23 | + '<Gtype>': 'Gtype', | ||
24 | + '<Gversion>': 'Gversion', | ||
25 | + '<Med>': 'Med', | ||
26 | + '<Orgn>': 'Orgn', | ||
27 | + '<Phase>': 'Phase', | ||
28 | + '<Sample>': 'Sample', | ||
29 | + '<Serie>': 'Serie', | ||
30 | + '<Strain>': 'Strain', | ||
31 | + '<Substrain>': 'Substrain', | ||
32 | + '<Supp>': 'Supp', | ||
33 | + '<Technique>': 'Technique', | ||
34 | + '<Temp>': 'Temp', | ||
35 | + '<Name>': 'Name', | ||
36 | + '<OD>': 'OD', | ||
37 | + '<Anti>': 'Anti', | ||
38 | + '<Agit>': 'Agit', | ||
39 | + '<Vess>': 'Vess'} | ||
40 | + | ||
41 | +import re | ||
42 | +#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt" | ||
43 | +inpath = '/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll' | ||
44 | +outpath = '/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv' | ||
45 | +flag = 'O' | ||
46 | +with open(outpath, 'w') as out: | ||
47 | + with open(inpath, 'r') as input_file: | ||
48 | + for line in input_file: | ||
49 | + if len(line.split('\t')) > 1: | ||
50 | + w = line.split('\t')[1] | ||
51 | + if w in in_labels or w in out_labels: | ||
52 | + if w in in_labels.keys(): flag = in_labels[w] | ||
53 | + if w in out_labels: flag = out_labels[w] | ||
54 | + | ||
55 | + else: | ||
56 | + if w == "PGCGROWTHCONDITIONS": out.write('\n') | ||
57 | + else: | ||
58 | + out.write('|'.join(line.split('\t')[1:4])+'|'+flag+' ') | ||
59 | + #print('\t'.join(line.split('\t')[1:4])+'\t'+flag) | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
File mode changed
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
This diff could not be displayed because it is too large.
-
Please register or login to post a comment