Estefani Gaytan Nunez

update

Showing 33 changed files with 138 additions and 837 deletions
1 -#!/bin/python3
2 -import os
3 -from itertools import chain
4 -from optparse import OptionParser
5 -from time import time
6 -from collections import Counter
7 -import re
8 -
9 -import nltk
10 -import sklearn
11 -import scipy.stats
12 -import sys
13 -
14 -from sklearn.externals import joblib
15 -from sklearn.metrics import make_scorer
16 -from sklearn.cross_validation import cross_val_score
17 -from sklearn.grid_search import RandomizedSearchCV
18 -
19 -import sklearn_crfsuite
20 -from sklearn_crfsuite import scorers
21 -from sklearn_crfsuite import metrics
22 -
23 -from nltk.corpus import stopwords
24 -import random
25 -
26 -
27 -# Objective
28 -# Labaled separated by '|' and split 70/30 sentences on training and tets files from CoreNLP-tagging
29 -#
30 -# Input parameters
31 -# --inputPath=PATH Path of inputfile
32 -# --outputPath=PATH Path to place output files
33 -# --trainingFile=testFile Output training data set
34 -# --testFile=testFile Output test data set
35 -#
36 -# Output
37 -# training and test data set
38 -#
39 -# Examples
40 -# python label-split_training_test_v1.py
41 -# --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/
42 -# --inputFile sentences.tsv_pakal_.conll
43 -# --trainingFile training-data-set-70.txt
44 -# --testFile test-data-set-30.txt
45 -# --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
46 -#
47 -#
48 -# python label-split_training_test_v1.py --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/ --inputFile sentences.tsv_pakal_.conll --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
49 -
50 -
51 -##########################################
52 -# MAIN PROGRAM #
53 -##########################################
54 -
55 -if __name__ == "__main__":
56 - # Defining parameters
57 - parser = OptionParser()
58 - parser.add_option("--inputPath", dest="inputPath",
59 - help="Path of output from CoreNLP", metavar="PATH")
60 - parser.add_option("--outputPath", dest="outputPath",
61 - help="Output path to place output files",
62 - metavar="PATH")
63 - parser.add_option("--inputFile", dest="inputFile",
64 - help="File with CoreNLP-tagging sentences", metavar="FILE")
65 - parser.add_option("--trainingFile", dest="trainingFile",
66 - help="File with training data set", metavar="FILE")
67 - parser.add_option("--testFile", dest="testFile",
68 - help="File with test data set", metavar="FILE")
69 -
70 - (options, args) = parser.parse_args()
71 - if len(args) > 0:
72 - parser.error("Any parameter given.")
73 - sys.exit(1)
74 -
75 - print('-------------------------------- PARAMETERS --------------------------------')
76 - print("Path of CoreNLP output: " + options.inputPath)
77 - print("File with CoreNLP-tagging sentences: " + str(options.inputFile))
78 - print("Path of training data set: " + str(options.outputPath))
79 - print("File with training data set: " + str(options.trainingFile))
80 - print("Path of test data set: " + str(options.outputPath))
81 - print("File with test data set: " + str(options.testFile))
82 - print('-------------------------------- PROCESSING --------------------------------')
83 - ## begin of tagging
84 - in_labels = {
85 - '<Gtype>': 'Gtype',
86 - '<Gversion>': 'Gversion',
87 - '<Med>': 'Med',
88 - '<Phase>': 'Phase',
89 - '<Sample>': 'Sample',
90 - '<Serie>': 'Serie',
91 - '<Substrain>': 'Substrain',
92 - '<Supp>': 'Supp',
93 - '<Technique>': 'Technique',
94 - '<Temp>': 'Temp',
95 - '<OD>': 'OD',
96 - '<Anti>': 'Anti',
97 - '<Agit>': 'Agit',
98 - '<Vess>': 'Vess'
99 - }
100 - ## End of tagging
101 - out_labels = {
102 - '</Air>': 'O',
103 - '</Gtype>': 'O',
104 - '</Gversion>': 'O',
105 - '</Med>': 'O',
106 - '</Phase>': 'O',
107 - '</Sample>': 'O',
108 - '</Serie>': 'O',
109 - '</Strain>': 'O',
110 - '<Strain>': 'O',
111 - '</Substrain>': 'O',
112 - '</Supp>': 'O',
113 - '</Technique>': 'O',
114 - '</Temp>': 'O',
115 - '</OD>': 'O',
116 - '</Anti>': 'O',
117 - '</Agit>': 'O',
118 - '<Name>': 'O',
119 - '</Name>': 'O',
120 - '<Orgn>': 'O',
121 - '</Orgn>': 'O',
122 - '</Vess>': 'O'}
123 -
124 - # Other label
125 - flag = 'O'
126 - # sentences counter
127 - n=0
128 - lista = []
129 - #First sentence
130 - sentence = ''
131 - with open(os.path.join(options.inputPath, options.inputFile), "r") as input_file:
132 - for line in input_file:
133 - if len(line.split('\t')) > 1:
134 - w = line.split('\t')[1]
135 - if w in in_labels or w in out_labels:
136 - #Tagging
137 - if w in in_labels.keys(): flag = in_labels[w]
138 - if w in out_labels: flag = out_labels[w]
139 - else:
140 - if w == "PGCGROWTHCONDITIONS":
141 - #End of sentence
142 - lista.append(sentence)
143 - #New setence
144 - sentence = ''
145 - n=n+1
146 - else:
147 - #Building and save tagging sentence
148 - sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
149 -
150 - print("Number of sentences: " + str(n))
151 -
152 - # Split 70 30 training and test sentences
153 - trainingIndex = random.sample(range(len(lista)-1), int(len(lista)*.70))
154 - testIndex = [n for n in range(len(lista)-1) if n not in trainingIndex]
155 - print(len(trainingIndex))
156 - print(len(testIndex))
157 -
158 - with open(os.path.join(options.outputPath, options.trainingFile), "w") as oFile:
159 - Data = [lista[i] for i in trainingIndex]
160 - oFile.write('\n'.join(Data))
161 -
162 - with open(os.path.join(options.outputPath, options.testFile), "w") as oFile:
163 - Data = [lista[i] for i in testIndex]
164 - oFile.write('\n'.join(Data))
165 -
166 - print("==================================END===================================")
1 -#!/bin/python3
2 -from optparse import OptionParser
3 -import re
4 -import os
5 -import random
6 -
7 -
8 -# Objective
9 -# Labaled separated by '|' and split 70/30 sentences on training and tets files from CoreNLP-tagging
10 -# make data sets using only sentences with at least one true-tag
11 -#
12 -# Input parameters
13 -# --inputPath=PATH Path of inputfile
14 -# --outputPath=PATH Path to place output files
15 -# --trainingFile=testFile Output training data set
16 -# --testFile=testFile Output test data set
17 -#
18 -# Output
19 -# training and test data set
20 -#
21 -# Examples
22 -# python label-split_training_test_v2.py
23 -# --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/
24 -# --inputFile sentences.tsv_pakal_.conll
25 -# --trainingFile training-data-set-70.txt
26 -# --testFile test-data-set-30.txt
27 -# --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
28 -#
29 -#
30 -# python label-split_training_test_v2.py --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/ --inputFile raw-metadata-senteneces.txt.conll --trainingFile training-data-set-70_v2.txt --testFile test-data-set-30_v2.txt --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
31 -
32 -
33 -##########################################
34 -# MAIN PROGRAM #
35 -##########################################
36 -
37 -if __name__ == "__main__":
38 - # Defining parameters
39 - parser = OptionParser()
40 - parser.add_option("--inputPath", dest="inputPath",
41 - help="Path of output from CoreNLP", metavar="PATH")
42 - parser.add_option("--outputPath", dest="outputPath",
43 - help="Output path to place output files",
44 - metavar="PATH")
45 - parser.add_option("--inputFile", dest="inputFile",
46 - help="File with CoreNLP-tagging sentences", metavar="FILE")
47 - parser.add_option("--trainingFile", dest="trainingFile",
48 - help="File with training data set", metavar="FILE")
49 - parser.add_option("--testFile", dest="testFile",
50 - help="File with test data set", metavar="FILE")
51 -
52 - (options, args) = parser.parse_args()
53 - if len(args) > 0:
54 - parser.error("Any parameter given.")
55 - sys.exit(1)
56 -
57 - print('-------------------------------- PARAMETERS --------------------------------')
58 - print("Path of CoreNLP output: " + options.inputPath)
59 - print("File with CoreNLP-tagging sentences: " + str(options.inputFile))
60 - print("Path of training data set: " + options.outputPath)
61 - print("File with training data set: " + str(options.trainingFile))
62 - print("Path of test data set: " + options.outputPath)
63 - print("File with test data set: " + str(options.testFile))
64 - print('-------------------------------- PROCESSING --------------------------------')
65 - ## begin of tagging
66 - in_labels = {
67 - '<Gtype>': 'Gtype',
68 - '<Gversion>': 'Gversion',
69 - '<Med>': 'Med',
70 - '<Phase>': 'Phase',
71 - '<Supp>': 'Supp',
72 - '<Technique>': 'Technique',
73 - '<Temp>': 'Temp',
74 - '<OD>': 'OD',
75 - '<Anti>': 'Anti'
76 - }
77 - ## End of tagging
78 - out_labels = {
79 - '<Air>': 'O',
80 - '</Air>': 'O',
81 - '</Gtype>': 'O',
82 - '</Gversion>': 'O',
83 - '</Med>': 'O',
84 - '</Phase>': 'O',
85 - '<Sample>': 'O',
86 - '</Sample>': 'O',
87 - '<Serie>': 'O',
88 - '</Serie>': 'O',
89 - '<Strain>': 'O',
90 - '</Strain>': 'O',
91 - '<Substrain>': 'O',
92 - '</Substrain>': 'O',
93 - '</Supp>': 'O',
94 - '</Technique>': 'O',
95 - '</Temp>': 'O',
96 - '</OD>': 'O',
97 - '<Agit>': 'O',
98 - '</Agit>': 'O',
99 - '<Name>': 'O',
100 - '</Name>': 'O',
101 - '<Orgn>': 'O',
102 - '</Orgn>': 'O',
103 - '</Anti>': 'O',
104 - '<Vess>': 'O',
105 - '</Vess>': 'O'}
106 -
107 - # Other label
108 - flag = 'O'
109 - # sentences counter
110 - lista = []
111 - #First sentence
112 - sentence = ''
113 - with open(os.path.join(options.inputPath, options.inputFile), "r") as input_file:
114 - for line in input_file:
115 - if len(line.split('\t')) > 1:
116 - w = line.split('\t')[1]
117 - if w in in_labels or w in out_labels:
118 - #Tagging
119 - if w in in_labels.keys(): flag = in_labels[w]
120 - if w in out_labels: flag = out_labels[w]
121 - else:
122 - if w == "PGCGROWTHCONDITIONS":
123 - words = sentence.split(' ')
124 - #End of sentence
125 - tags = [tag for tag in words if tag.split('|')[-1] in in_labels.values() ]
126 - #At least one true-tag on sentence
127 - if len(tags)> 0:
128 - lista.append(sentence)
129 - #New setence
130 - sentence = ''
131 - else:
132 - sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
133 -
134 - print("Number of sentences: " + str( len(lista) ) )
135 -
136 -
137 - # Split 70 30 training and test sentences
138 - trainingIndex = random.sample(range(len(lista)-1), int(len(lista)*.70))
139 - testIndex = [n for n in range(len(lista)-1) if n not in trainingIndex]
140 -
141 - with open(os.path.join(options.outputPath, options.trainingFile), "w") as oFile:
142 - Data = [lista[i] for i in trainingIndex]
143 - oFile.write('\n'.join(Data))
144 -
145 - with open(os.path.join(options.outputPath, options.testFile), "w") as oFile:
146 - Data = [lista[i] for i in testIndex]
147 - oFile.write('\n'.join(Data))
148 -
149 - print("==================================END===================================")
1 -#!/bin/python3
2 from optparse import OptionParser 1 from optparse import OptionParser
3 import re 2 import re
4 import os 3 import os
...@@ -7,7 +6,6 @@ import random ...@@ -7,7 +6,6 @@ import random
7 6
8 # Objective 7 # Objective
9 # Labaled separated by '|' and split 70/30 sentences on training and tets files from CoreNLP-tagging 8 # Labaled separated by '|' and split 70/30 sentences on training and tets files from CoreNLP-tagging
10 -# make data sets using only sentences with at least one true-tag
11 # 9 #
12 # Input parameters 10 # Input parameters
13 # --inputPath=PATH Path of inputfile 11 # --inputPath=PATH Path of inputfile
...@@ -19,15 +17,15 @@ import random ...@@ -19,15 +17,15 @@ import random
19 # training and test data set 17 # training and test data set
20 # 18 #
21 # Examples 19 # Examples
22 -# python label-split_training_test_v2.py 20 +# python label-split_training_test_v1.py
23 -# --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/ 21 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/
24 -# --inputFile sentences.tsv_pakal_.conll 22 +# --inputFile raw-metadata-senteneces_v2.txt.conll
25 -# --trainingFile training-data-set-70.txt 23 +# --trainingFile training-data-set-70_v4.txt
26 -# --testFile test-data-set-30.txt 24 +# --testFile test-data-set-30_v4.txt
27 -# --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets 25 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets
28 # 26 #
29 # 27 #
30 -# python label-split_training_test_v2.py --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/ --inputFile raw-metadata-senteneces.txt.conll --trainingFile training-data-set-70_v2.txt --testFile test-data-set-30_v2.txt --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets 28 +# python label-split_training_test_v1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/ --inputFile raw-metadata-senteneces_v2.txt.conll --trainingFile training-data-set-70._v4txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets
31 29
32 30
33 ########################################## 31 ##########################################
...@@ -67,78 +65,79 @@ if __name__ == "__main__": ...@@ -67,78 +65,79 @@ if __name__ == "__main__":
67 '<Gtype>': 'Gtype', 65 '<Gtype>': 'Gtype',
68 '<Gversion>': 'Gversion', 66 '<Gversion>': 'Gversion',
69 '<Med>': 'Med', 67 '<Med>': 'Med',
70 - '<Phase>': 'Phase', 68 + '<Phase>': 'Phase',
69 + '<Substrain>': 'Substrain',
71 '<Supp>': 'Supp', 70 '<Supp>': 'Supp',
71 + '<Strain>': 'Strain',
72 '<Technique>': 'Technique', 72 '<Technique>': 'Technique',
73 '<Temp>': 'Temp', 73 '<Temp>': 'Temp',
74 '<OD>': 'OD', 74 '<OD>': 'OD',
75 '<Anti>': 'Anti', 75 '<Anti>': 'Anti',
76 '<Agit>': 'Agit', 76 '<Agit>': 'Agit',
77 - '<Vess>': 'Vess' 77 + '<Air>': 'Air',
78 + '<Vess>': 'Vess',
79 + '<pH>': 'pH'
78 } 80 }
79 ## End of tagging 81 ## End of tagging
80 out_labels = { 82 out_labels = {
81 - '<Air>': 'O',
82 - '</Air>': 'O',
83 '</Gtype>': 'O', 83 '</Gtype>': 'O',
84 '</Gversion>': 'O', 84 '</Gversion>': 'O',
85 '</Med>': 'O', 85 '</Med>': 'O',
86 '</Phase>': 'O', 86 '</Phase>': 'O',
87 - '<Sample>': 'O',
88 - '</Sample>': 'O',
89 - '<Serie>': 'O',
90 - '</Serie>': 'O',
91 - '<Strain>': 'O',
92 - '</Strain>': 'O',
93 - '<Substrain>': 'O',
94 '</Substrain>': 'O', 87 '</Substrain>': 'O',
95 '</Supp>': 'O', 88 '</Supp>': 'O',
89 + '</Strain>': 'O',
96 '</Technique>': 'O', 90 '</Technique>': 'O',
97 '</Temp>': 'O', 91 '</Temp>': 'O',
98 '</OD>': 'O', 92 '</OD>': 'O',
99 '</Anti>': 'O', 93 '</Anti>': 'O',
100 '</Agit>': 'O', 94 '</Agit>': 'O',
101 - '<Name>': 'O', 95 + '</Air>': 'O',
102 - '</Name>': 'O', 96 + '</Vess>': 'O',
97 + '</pH>': 'O'}
98 + old_labels = {
103 '<Orgn>': 'O', 99 '<Orgn>': 'O',
104 - '</Orgn>': 'O', 100 + '</Orgn>': 'O'
105 - '</Vess>': 'O'} 101 + }
106 102
107 # Other label 103 # Other label
108 - flag = 'O' 104 + flag = 'O'
109 - # sentences counter
110 - n=0
111 lista = [] 105 lista = []
112 #First sentence 106 #First sentence
113 sentence = '' 107 sentence = ''
108 + n = 0
114 with open(os.path.join(options.inputPath, options.inputFile), "r") as input_file: 109 with open(os.path.join(options.inputPath, options.inputFile), "r") as input_file:
115 for line in input_file: 110 for line in input_file:
116 if len(line.split('\t')) > 1: 111 if len(line.split('\t')) > 1:
117 - w = line.split('\t')[1] 112 + w = line.split('\t')[1]
118 if w in in_labels or w in out_labels: 113 if w in in_labels or w in out_labels:
119 - #Tagging 114 + #Tagging
120 - if w in in_labels.keys(): flag = in_labels[w] 115 + if w in in_labels.keys(): flag = in_labels[w]
121 - if w in out_labels: flag = out_labels[w] 116 + if w in out_labels: flag = out_labels[w]
122 - else: 117 + else:
123 if w == "PGCGROWTHCONDITIONS": 118 if w == "PGCGROWTHCONDITIONS":
124 - words = sentence.split(' ') 119 + n=n+1
125 - tags = [tag for tag in words if word.split('|')[-1] in in_labels.values() ] 120 + words = sentence.split(' ')
126 - #At least one true-tag on sentence 121 + #End of sentence
127 - if len(tags)> 0: 122 + tags = [tag for tag in words if tag.split('|')[-1] in in_labels.values() ]
128 - lista.append(sentence) 123 + #At least one true-tag on sentence
129 - #New setence 124 + if len(tags)> 0:
130 - sentence = '' 125 + lista.append(sentence)
131 - n=n+1 126 + #New setence
132 - else: 127 + sentence = ''
133 - #Building and save tagging sentence 128 + elif w not in old_labels.keys():
129 + #Building and save tagging sentence
134 sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:4])+'|'+flag+' ') 130 sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
135 131
136 - print("Number of sentences: " + str(n) + str(len(lista)+1)) 132 + print("Number of sentences with at least one tag: " + str(len(lista)))
133 + print("Number of sentences from CoreNLP: " + str(n))
137 134
138 135
139 # Split 70 30 training and test sentences 136 # Split 70 30 training and test sentences
140 - trainingIndex = random.sample(range(len(lista)-1), int(len(lista)*.70)) 137 + trainingIndex = random.sample(range(len(lista)), int(len(lista)*.70))
141 - testIndex = [n for n in range(len(lista)-1) if n not in trainingIndex] 138 + testIndex = [n for n in range(len(lista)) if n not in trainingIndex]
139 + print("Number of sentences for training: " + str(len(trainingIndex)))
140 + print("Number of sentences for test: " + str(len(testIndex)))
142 141
143 with open(os.path.join(options.outputPath, options.trainingFile), "w") as oFile: 142 with open(os.path.join(options.outputPath, options.trainingFile), "w") as oFile:
144 Data = [lista[i] for i in trainingIndex] 143 Data = [lista[i] for i in trainingIndex]
......
1 -#!/bin/python3
2 -import os
3 -from itertools import chain
4 -from optparse import OptionParser
5 -from time import time
6 -from collections import Counter
7 -import re
8 -
9 -import nltk
10 -import sklearn
11 -import scipy.stats
12 -import sys
13 -
14 -from sklearn.externals import joblib
15 -from sklearn.metrics import make_scorer
16 -from sklearn.cross_validation import cross_val_score
17 -from sklearn.grid_search import RandomizedSearchCV
18 -
19 -import sklearn_crfsuite
20 -from sklearn_crfsuite import scorers
21 -from sklearn_crfsuite import metrics
22 -
23 -from nltk.corpus import stopwords
24 -
25 -import random
26 -
27 -
28 -# Objective
29 -# Labaled separated by '|' and split 70/30 sentences on training and tets files from CoreNLP-tagging
30 -#
31 -# Input parameters
32 -# --inputPath=PATH Path of inputfile
33 -# --outputPath=PATH Path to place output files
34 -# --trainingFile=testFile Output training data set
35 -# --testFile=testFile Output test data set
36 -#
37 -# Output
38 -# training and test data set
39 -#
40 -# Examples
41 -# python label-split_training_test_v1.py
42 -# --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/
43 -# --inputFile sentences.tsv_pakal_.conll
44 -# --trainingFile training-data-set-70.txt
45 -# --testFile test-data-set-30.txt
46 -# --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
47 -#
48 -#
49 -# python label-split_training_test_v1.py --inputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/ --inputFile sentences.tsv_pakal_.conll --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets
50 -
51 -
52 -##########################################
53 -# MAIN PROGRAM #
54 -##########################################
55 -
56 -if __name__ == "__main__":
57 - # Defining parameters
58 - parser = OptionParser()
59 - parser.add_option("--inputPath", dest="inputPath",
60 - help="Path of output from CoreNLP", metavar="PATH")
61 - parser.add_option("--outputPath", dest="outputPath",
62 - help="Output path to place output files",
63 - metavar="PATH")
64 - parser.add_option("--inputFile", dest="inputFile",
65 - help="File with CoreNLP-tagging sentences", metavar="FILE")
66 - parser.add_option("--trainingFile", dest="trainingFile",
67 - help="File with training data set", metavar="FILE")
68 - parser.add_option("--testFile", dest="testFile",
69 - help="File with test data set", metavar="FILE")
70 -
71 - (options, args) = parser.parse_args()
72 - if len(args) > 0:
73 - parser.error("Any parameter given.")
74 - sys.exit(1)
75 -
76 - print('-------------------------------- PARAMETERS --------------------------------')
77 - print("Path of CoreNLP output: " + str(options.inputPath))
78 - print("File with CoreNLP-tagging sentences: " + str(options.inputFile))
79 - print("Path of training data set: " + str(options.outputPath))
80 - print("File with training data set: " + str(options.trainingFile))
81 - print("Path of test data set: " + str(options.outputPath))
82 - print("File with test data set: " + str(options.testFile))
83 - print('-------------------------------- PROCESSING --------------------------------')
84 - ## begin of tagging
85 - in_labels = {
86 - '<Gtype>': 'Gtype',
87 - '<Gversion>': 'Gversion',
88 - '<Med>': 'Med',
89 - '<Phase>': 'Phase',
90 - '<Sample>': 'Sample',
91 - '<Serie>': 'Serie',
92 - '<Substrain>': 'Substrain',
93 - '<Supp>': 'Supp',
94 - '<Technique>': 'Technique',
95 - '<Temp>': 'Temp',
96 - '<OD>': 'OD',
97 - '<Anti>': 'Anti',
98 - '<Agit>': 'Agit',
99 - '<Vess>': 'Vess'
100 - }
101 - ## End of tagging
102 - out_labels = {
103 - '</Air>': 'O',
104 - '</Gtype>': 'O',
105 - '</Gversion>': 'O',
106 - '</Med>': 'O',
107 - '</Phase>': 'O',
108 - '</Sample>': 'O',
109 - '</Serie>': 'O',
110 - '</Strain>': 'O',
111 - '<Strain>': 'O',
112 - '</Substrain>': 'O',
113 - '</Supp>': 'O',
114 - '</Technique>': 'O',
115 - '</Temp>': 'O',
116 - '</OD>': 'O',
117 - '</Anti>': 'O',
118 - '</Agit>': 'O',
119 - '<Name>': 'O',
120 - '</Name>': 'O',
121 - '<Orgn>': 'O',
122 - '</Orgn>': 'O',
123 - '</Vess>': 'O'}
124 -
125 - # Other label
126 - flag = 'O'
127 - # sentences counter
128 - n=0
129 - lista = []
130 - #First sentence
131 - sentence = ''
132 - with open(os.path.join(options.inputPath, options.inputFile), "r") as input_file:
133 - for line in input_file:
134 - if len(line.split('\t')) > 1:
135 - w = line.split('\t')[1]
136 - if w in in_labels or w in out_labels:
137 - #Tagging
138 - if w in in_labels.keys(): flag = in_labels[w]
139 - if w in out_labels: flag = out_labels[w]
140 - else:
141 - if w == "PGCGROWTHCONDITIONS":
142 - #End of sentence
143 - lista.append(sentence)
144 - #New setence
145 - sentence = ''
146 - n=n+1
147 - else:
148 - #Building and save tagging sentence
149 - sentence = sentence + ' ' + ('|'.join(line.split('\t')[1:4])+'|'+flag+' ')
150 -
151 - print("Number of sentences: " + str(n))
152 - print('\n'.join(lista))
153 - # Split 70 30 training and test sentences
154 -# trainingIndex = random.sample(range(len(lista)-1), int(len(lista)*.70))
155 -# testIndex = [n for n in range(len(lista)-1) if n not in trainingIndex]
156 -
157 -# with open(os.path.join(options.outputPath, options.trainingFile), "w") as oFile:
158 -# Data = [lista[i] for i in trainingIndex]
159 -# oFile.write('\n'.join(Data))
160 -
161 -# with open(os.path.join(options.outputPath, options.testFile), "w") as oFile:
162 -# Data = [lista[i] for i in testIndex]
163 -# oFile.write('\n'.join(Data))
164 -
165 -# print("==================================END===================================")
166 -
This diff is collapsed. Click to expand it.
...@@ -32,7 +32,7 @@ from nltk.corpus import stopwords ...@@ -32,7 +32,7 @@ from nltk.corpus import stopwords
32 # --trainingFile File with training data set 32 # --trainingFile File with training data set
33 # --testFile File with test data set 33 # --testFile File with test data set
34 # --outputPath=PATH Output path to place output files 34 # --outputPath=PATH Output path to place output files
35 -# --reportFile Report Fileneme 35 +# --version Version Report
36 36
37 # Output 37 # Output
38 # 1) Best model 38 # 1) Best model
...@@ -43,31 +43,54 @@ from nltk.corpus import stopwords ...@@ -43,31 +43,54 @@ from nltk.corpus import stopwords
43 # --trainingFile training-data-set-70.txt 43 # --trainingFile training-data-set-70.txt
44 # --testFile test-data-set-30.txt 44 # --testFile test-data-set-30.txt
45 # --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/ 45 # --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/
46 -# --reportFile report_1 46 +# --version _v2
47 -# python3.4 training-validation_v5.py --inputPatTH /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/data-sets --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CRF/ 47 +# python3 training_validation_v7.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --version _v1
48 48
49 ################################# 49 #################################
50 # FUNCTIONS # 50 # FUNCTIONS #
51 ################################# 51 #################################
52 52
53 def isGreek(word): 53 def isGreek(word):
54 + #al greek letters
54 alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω', 55 alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω',
55 'α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω'] 56 'α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω']
56 if word in alphabet: 57 if word in alphabet:
57 return True 58 return True
58 else: 59 else:
59 return False 60 return False
61 +
62 +def hNumber(word):
63 + for l in word:
64 + if l.isdigit():
65 + return True
66 + return False
67 +
68 +def symb(word):
69 + n=0
70 + #at least a not alphanumeric character
71 + for l in word:
72 + if l.isdigit(): n = n+1
73 + if l.isalpha(): n = n+1
74 + #Exclude Greek letters
75 + if isGreek(l): n = n+1
76 +
77 + if n<len(word): return True
78 + else: return False
79 +
60 def hUpper(word): 80 def hUpper(word):
81 + #at least an uppers
61 for l in word: 82 for l in word:
62 if l.isupper(): return True 83 if l.isupper(): return True
63 return False 84 return False
64 85
65 def hLower(word): 86 def hLower(word):
87 + #at least a lower
66 for l in word: 88 for l in word:
67 if l.islower(): return True 89 if l.islower(): return True
68 return False 90 return False
69 91
70 def hGreek(word): 92 def hGreek(word):
93 + #at least an greek letter
71 for l in word: 94 for l in word:
72 if isGreek(l): return True 95 if isGreek(l): return True
73 return False 96 return False
...@@ -80,54 +103,69 @@ def word2features(sent, i, S1, S2): ...@@ -80,54 +103,69 @@ def word2features(sent, i, S1, S2):
80 postag = listElem[2] 103 postag = listElem[2]
81 ner = listElem[3] 104 ner = listElem[3]
82 105
106 + #====================== G1 ======================#
107 +
83 features = { 108 features = {
84 #General 109 #General
85 'lemma': lemma, 110 'lemma': lemma,
86 'postag': postag 111 'postag': postag
87 } 112 }
88 113
89 - if S1:
90 - #S1
91 - features['word']: word
92 - features['hUpper']: hUpper(word)
93 - features['hLower']: hUpper(word)
94 - features['hGreek']: hGreek(word)
95 - #features['hAlfNum']: hAlfNum(word)
96 -
97 - if S2:
98 - #S2
99 - features['isUpper']: word.isupper()
100 - features['isLower']: word.isLower()
101 - features['isGreek']: isGreek(word)
102 - features['isNumber']: word.isdigit()
103 -
104 if i > 0: 114 if i > 0:
105 - listElem = sent[i - 1].split('|') 115 + listElem = sent[i - 1].split('|')
106 - word1 = listElem[0]
107 lemma1 = listElem[1] 116 lemma1 = listElem[1]
108 postag1 = listElem[2] 117 postag1 = listElem[2]
109 - features.update({ 118 +
110 - #Word anterioir 119 + features.update({
111 - '-1:word': word1,
112 #LemaG posterior 120 #LemaG posterior
113 '-1:lemma': lemma1, 121 '-1:lemma': lemma1,
114 #PostG posterior 122 #PostG posterior
115 '-1:postag': postag1, 123 '-1:postag': postag1,
116 }) 124 })
117 125
118 - if i < len(sent) - 1: 126 + if i < len(sent) - 1:
119 - listElem = sent[i + 1].split('|') 127 + listElem = sent[i + 1].split('|')
120 - word1 = listElem[0]
121 lemma1 = listElem[1] 128 lemma1 = listElem[1]
122 postag1 = listElem[2] 129 postag1 = listElem[2]
123 - features.update({ 130 +
124 - #Word anterioir 131 + features.update({
125 - '+1:word': word1,
126 #LemaG posterior 132 #LemaG posterior
127 '+1:lemma': lemma1, 133 '+1:lemma': lemma1,
128 #PostG posterior 134 #PostG posterior
129 '+1:postag': postag1, 135 '+1:postag': postag1,
130 }) 136 })
137 +
138 + #====================== S1 ======================#
139 + if S1:
140 + listElem = sent[i - 1].split('|')
141 + lemma1 = listElem[1]
142 + postag1 = listElem[2]
143 +
144 + features['hUpper']: hUpper(word)
145 + features['hLower']: hUpper(word)
146 + features['hGreek']: hGreek(word)
147 + features['symb']: symb(word)
148 + #firstChar
149 + features['lemma1[:1]']: lemma1[:1]
150 + #secondChar
151 + features['postag[:1]']: lemma1[:1]
152 + features['postag[:2]']: lemma1[:2]
153 + features['lemma[:2]']: lemma1[:2]
154 +
155 + #====================== S2 ======================#
156 + if S2:
157 + #S2
158 + features['isUpper']: word.isupper()
159 + features['isLower']: word.isLower()
160 + features['isGreek']: isGreek(word)
161 + features['isNumber']: word.isdigit()
162 +
163 +
164 + '''
165 + #====================== S3 ======================#
166 + if S3:
167 + features['word']: word
168 + '''
131 return features 169 return features
132 170
133 171
...@@ -153,7 +191,7 @@ def print_state_features(state_features, f): ...@@ -153,7 +191,7 @@ def print_state_features(state_features, f):
153 f.write("{:0.6f} {:8} {}\n".format(weight, label, attr.encode("utf-8"))) 191 f.write("{:0.6f} {:8} {}\n".format(weight, label, attr.encode("utf-8")))
154 192
155 193
156 -__author__ = 'CMendezC' 194 +__author__ = 'egaytan'
157 195
158 ########################################## 196 ##########################################
159 # MAIN PROGRAM # 197 # MAIN PROGRAM #
...@@ -177,7 +215,7 @@ if __name__ == "__main__": ...@@ -177,7 +215,7 @@ if __name__ == "__main__":
177 parser.add_option("--excludeSymbols", default=False, 215 parser.add_option("--excludeSymbols", default=False,
178 action="store_true", dest="excludeSymbols", 216 action="store_true", dest="excludeSymbols",
179 help="Exclude punctuation marks") 217 help="Exclude punctuation marks")
180 - parser.add_option("--reportFile", dest="reportFile", 218 + parser.add_option("--version", dest="version",
181 help="Report file", metavar="FILE") 219 help="Report file", metavar="FILE")
182 parser.add_option("--S1", default=False, 220 parser.add_option("--S1", default=False,
183 action="store_true", dest="S1", 221 action="store_true", dest="S1",
...@@ -198,7 +236,7 @@ if __name__ == "__main__": ...@@ -198,7 +236,7 @@ if __name__ == "__main__":
198 print("File with test data set: " + str(options.testFile)) 236 print("File with test data set: " + str(options.testFile))
199 print("Exclude stop words: " + str(options.excludeStopWords)) 237 print("Exclude stop words: " + str(options.excludeStopWords))
200 print("Levels: " + str(options.S1) + " " + str(options.S2)) 238 print("Levels: " + str(options.S1) + " " + str(options.S2))
201 - print("Report file: " + str(options.reportFile)) 239 + print("Report file: " + str(options.version))
202 240
203 241
204 symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', 242 symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{',
...@@ -254,19 +292,14 @@ if __name__ == "__main__": ...@@ -254,19 +292,14 @@ if __name__ == "__main__":
254 292
255 print("Reading corpus done in: %fs" % (time() - t0)) 293 print("Reading corpus done in: %fs" % (time() - t0))
256 294
257 - if options.S1: S1 = 0 295 + print(sent2features(sentencesTrainingData[0], options.S1, options.S2)[0])
258 - else: S1 = 1 296 + print(sent2features(sentencesTestData[0], options.S1, options.S2)[0])
259 - if options.S2: S2 = 0
260 - else: S2 = 1
261 -
262 - print(sent2features(sentencesTrainingData[0], S1, S2)[0])
263 - print(sent2features(sentencesTestData[0], S1, S2)[0])
264 t0 = time() 297 t0 = time()
265 298
266 - X_train = [sent2features(s, S1, S2) for s in sentencesTrainingData] 299 + X_train = [sent2features(s, options.S1, options.S2) for s in sentencesTrainingData]
267 y_train = [sent2labels(s) for s in sentencesTrainingData] 300 y_train = [sent2labels(s) for s in sentencesTrainingData]
268 301
269 - X_test = [sent2features(s, S1, S2) for s in sentencesTestData] 302 + X_test = [sent2features(s, options.S1, options.S2) for s in sentencesTestData]
270 # print X_test 303 # print X_test
271 y_test = [sent2labels(s) for s in sentencesTestData] 304 y_test = [sent2labels(s) for s in sentencesTestData]
272 305
...@@ -292,7 +325,7 @@ if __name__ == "__main__": ...@@ -292,7 +325,7 @@ if __name__ == "__main__":
292 325
293 # Original: labels = list(crf.classes_) 326 # Original: labels = list(crf.classes_)
294 # Original: labels.remove('O') 327 # Original: labels.remove('O')
295 - labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Supp', 'Technique', 'Temp', 'OD', 'Anti']) 328 + labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH'])
296 329
297 # use the same metric for evaluation 330 # use the same metric for evaluation
298 f1_scorer = make_scorer(metrics.flat_f1_score, 331 f1_scorer = make_scorer(metrics.flat_f1_score,
...@@ -312,8 +345,10 @@ if __name__ == "__main__": ...@@ -312,8 +345,10 @@ if __name__ == "__main__":
312 # crf.fit(X_train, y_train) 345 # crf.fit(X_train, y_train)
313 346
314 # Best hiperparameters 347 # Best hiperparameters
315 - # crf = rs.best_estimator_ 348 + # crf = rs.best_estimator_
316 - nameReport = options.trainingFile.replace('.txt', str(options.reportFile) + '.txt') 349 +
350 +
351 + nameReport = str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '.txt'
317 with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile: 352 with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile:
318 oFile.write("********** TRAINING AND TESTING REPORT **********\n") 353 oFile.write("********** TRAINING AND TESTING REPORT **********\n")
319 oFile.write("Training file: " + options.trainingFile + '\n') 354 oFile.write("Training file: " + options.trainingFile + '\n')
...@@ -331,27 +366,13 @@ if __name__ == "__main__": ...@@ -331,27 +366,13 @@ if __name__ == "__main__":
331 # Saving model 366 # Saving model
332 print(" Saving training model...") 367 print(" Saving training model...")
333 t1 = time() 368 t1 = time()
334 - nameModel = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str( 369 + nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '.mod'
335 - options.excludeSymbols) + '.mod')
336 joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel)) 370 joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
337 print(" Saving training model done in: %fs" % (time() - t1)) 371 print(" Saving training model done in: %fs" % (time() - t1))
338 372
339 # Evaluation against test data 373 # Evaluation against test data
340 y_pred = crf.predict(X_test) 374 y_pred = crf.predict(X_test)
341 print("*********************************") 375 print("*********************************")
342 - name = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str(
343 - options.excludeSymbols) + '.txt')
344 - with open(os.path.join(options.outputPath, "reports", "y_pred_" + name), "w") as oFile:
345 - for y in y_pred:
346 - oFile.write(str(y) + '\n')
347 -
348 - print("*********************************")
349 - name = options.trainingFile.replace('.txt', '.fStopWords_' + str(options.excludeStopWords) + '.fSymbols_' + str(
350 - options.excludeSymbols) + '.txt')
351 - with open(os.path.join(options.outputPath, "reports", "y_test_" + name), "w") as oFile:
352 - for y in y_test:
353 - oFile.write(str(y) + '\n')
354 -
355 print("Prediction done in: %fs" % (time() - t0)) 376 print("Prediction done in: %fs" % (time() - t0))
356 377
357 # labels = list(crf.classes_) 378 # labels = list(crf.classes_)
...@@ -387,4 +408,3 @@ if __name__ == "__main__": ...@@ -387,4 +408,3 @@ if __name__ == "__main__":
387 print_state_features(Counter(crf.state_features_).most_common()[-200:], oFile) 408 print_state_features(Counter(crf.state_features_).most_common()[-200:], oFile)
388 oFile.write('\n') 409 oFile.write('\n')
389 410
390 -
......
...@@ -3,8 +3,6 @@ Gtype ...@@ -3,8 +3,6 @@ Gtype
3 Gversion 3 Gversion
4 Med 4 Med
5 Phase 5 Phase
6 -Sample
7 -Serie
8 Strain 6 Strain
9 Supp 7 Supp
10 Technique 8 Technique
...@@ -13,4 +11,5 @@ OD ...@@ -13,4 +11,5 @@ OD
13 Anti 11 Anti
14 Agit 12 Agit
15 Vess 13 Vess
16 - 14 +Substrain
15 +pH
......
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
No preview for this file type
No preview for this file type
No preview for this file type
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
This diff is collapsed. Click to expand it.
1 -['O', 'O', 'O', 'O', 'O']
2 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
3 -['O', 'O', 'O']
4 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
5 -['O', 'O', 'O', 'Gtype']
6 -['O', 'O', 'O', 'O']
7 -['Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
8 -['O', 'O', 'O', 'O', 'O']
9 -['O', 'O', 'O', 'O', 'O']
10 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
11 -['O', 'O', 'O', 'Med', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp']
12 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
13 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
14 -['O', 'O', 'O', 'O']
15 -['O', 'O', 'O', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp']
16 -['O', 'O', 'O', 'O', 'O']
17 -['O', 'O', 'Gtype']
18 -['O', 'O', 'O']
19 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
20 -['O', 'O', 'O', 'O', 'O']
21 -['O', 'O', 'O', 'O', 'O', 'O', 'Gversion', 'Gversion', 'Gversion', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
22 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
23 -['O', 'O', 'O', 'O', 'O']
24 -['O', 'O', 'O']
25 -['O', 'O', 'O', 'O', 'O']
26 -['O', 'O', 'Anti', 'Anti', 'Anti']
27 -['O', 'O', 'O']
28 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
29 -['O', 'O', 'O', 'O', 'O']
30 -['O', 'O', 'O']
31 -['O', 'O', 'O']
32 -['O', 'O', 'O']
33 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
34 -['O', 'O', 'O', 'O']
35 -['O', 'O', 'O', 'Supp']
36 -['O', 'O', 'O', 'Gtype', 'O', 'O', 'O', 'O', 'O']
37 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
38 -['O', 'O', 'O', 'O']
39 -['O', 'O', 'O']
40 -['O', 'O', 'O', 'O']
41 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
42 -['O', 'O', 'O']
43 -['O', 'O', 'O', 'O']
44 -['O', 'O', 'O', 'O', 'O']
45 -['O', 'O', 'Gtype']
46 -['O', 'O', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
47 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Gtype', 'Gtype']
48 -['O', 'O', 'O', 'O', 'O', 'O']
49 -['O', 'O', 'O', 'O', 'O']
50 -['O', 'O', 'Gtype']
51 -['O', 'O', 'O']
52 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'OD', 'O', 'O', 'Med', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp', 'O']
53 -['O', 'O', 'O']
54 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
55 -['O', 'O', 'O']
56 -['O', 'O', 'O']
57 -['O', 'O', 'O', 'Gtype']
58 -['O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
59 -['O', 'O', 'O']
60 -['O', 'O', 'O']
61 -['O', 'O', 'Gtype']
62 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'OD', 'O', 'O']
63 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'O']
64 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
65 -['O', 'O', 'O']
66 -['O', 'O', 'Gtype']
67 -['O', 'O', 'O', 'Anti', 'Anti', 'Anti', 'O']
68 -['O', 'O', 'O', 'O']
69 -['O', 'O', 'O', 'O']
70 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
71 -['O', 'O', 'O', 'Anti', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
72 -['O', 'O', 'O']
73 -['O', 'O', 'O']
74 -['O', 'O', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
75 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
76 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
77 -['O', 'O', 'Gtype']
78 -['O', 'O', 'O', 'O', 'O', 'Anti', 'Anti']
79 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
80 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD']
81 -['O', 'O', 'O', 'O', 'O', 'O']
82 -['O', 'O', 'O', 'O', 'O']
83 -['O', 'O', 'O', 'Anti']
84 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
85 -['O', 'O', 'Gtype']
86 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'Substrain']
87 -['O', 'O', 'Gtype']
88 -['O', 'O', 'Gtype']
89 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
90 -['O', 'O', 'O']
91 -['O', 'O', 'O']
92 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Gtype', 'Gtype', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
93 -['O', 'O', 'O', 'O', 'O', 'O']
94 -['O', 'O', 'Med']
95 -['O', 'O', 'O', 'O', 'O', 'O']
96 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
97 -['O', 'O', 'Gtype']
98 -['Gversion', 'Gversion']
99 -['O', 'O', 'O', 'O', 'O']
100 -['O', 'O', 'O']
101 -['O', 'O', 'O']
102 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
103 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
104 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
105 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Gversion', 'Gversion', 'Gversion', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
106 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'O']
107 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
108 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
109 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
110 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
111 -['O', 'O', 'Gtype']
112 -['O', 'O', 'O', 'O']
113 -['O', 'O', 'O', 'O', 'O']
114 -['O', 'O', 'Anti', 'Anti', 'Anti', 'Anti']
115 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
116 -['O', 'O', 'O', 'O']
117 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
118 -['O', 'O', 'O']
119 -['O', 'O', 'O', 'O']
120 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
121 -['Med', 'Med', 'Med', 'Med', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
1 -['O', 'O', 'O', 'O', 'O']
2 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
3 -['O', 'O', 'O']
4 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
5 -['O', 'O', 'O', 'Technique']
6 -['O', 'O', 'O', 'O']
7 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
8 -['O', 'O', 'O', 'O', 'O']
9 -['O', 'O', 'O', 'O', 'O']
10 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'Substrain']
11 -['O', 'O', 'O', 'Med', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp']
12 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'OD', 'OD', 'OD', 'O', 'O']
13 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
14 -['O', 'O', 'O', 'O']
15 -['O', 'O', 'O', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp']
16 -['O', 'O', 'O', 'O', 'O']
17 -['O', 'O', 'Gtype']
18 -['O', 'O', 'O']
19 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
20 -['O', 'O', 'O', 'O', 'O']
21 -['O', 'O', 'O', 'O', 'O', 'O', 'Gversion', 'Gversion', 'Gversion', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
22 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
23 -['O', 'O', 'Gtype', 'Gtype', 'Gtype']
24 -['O', 'O', 'O']
25 -['O', 'O', 'O', 'O', 'O']
26 -['O', 'O', 'Anti', 'Anti', 'Anti']
27 -['O', 'O', 'O']
28 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
29 -['O', 'O', 'O', 'O', 'O']
30 -['O', 'O', 'O']
31 -['O', 'O', 'O']
32 -['O', 'O', 'O']
33 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
34 -['O', 'O', 'O', 'O']
35 -['O', 'O', 'O', 'Supp']
36 -['O', 'O', 'O', 'Anti', 'O', 'O', 'O', 'O', 'O']
37 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
38 -['O', 'O', 'O', 'O']
39 -['O', 'O', 'O']
40 -['O', 'O', 'O', 'O']
41 -['O', 'O', 'O', 'O', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
42 -['O', 'O', 'O']
43 -['O', 'O', 'O', 'O']
44 -['O', 'O', 'O', 'O', 'O']
45 -['O', 'O', 'Gtype']
46 -['O', 'O', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
47 -['Substrain', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
48 -['O', 'O', 'O', 'O', 'O', 'O']
49 -['O', 'O', 'Gtype', 'O', 'O']
50 -['O', 'O', 'Gtype']
51 -['O', 'O', 'O']
52 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'OD', 'O', 'O', 'Med', 'Med', 'Med', 'Med', 'O', 'Supp', 'Supp', 'Supp', 'O']
53 -['O', 'O', 'O']
54 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
55 -['O', 'O', 'O']
56 -['O', 'O', 'O']
57 -['O', 'O', 'O', 'Anti']
58 -['O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
59 -['O', 'O', 'O']
60 -['O', 'O', 'O']
61 -['O', 'O', 'Gtype']
62 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'OD', 'OD', 'OD', 'OD', 'O', 'O']
63 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
64 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
65 -['O', 'O', 'O']
66 -['O', 'O', 'Gtype']
67 -['O', 'O', 'O', 'Anti', 'Anti', 'Anti', 'O']
68 -['O', 'O', 'O', 'O']
69 -['O', 'O', 'O', 'O']
70 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
71 -['O', 'O', 'O', 'Anti', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
72 -['O', 'O', 'O']
73 -['O', 'O', 'O']
74 -['O', 'O', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
75 -['Substrain', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype', 'Gtype']
76 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
77 -['O', 'O', 'Gtype']
78 -['O', 'O', 'O', 'Anti', 'Anti', 'Anti', 'Anti']
79 -['OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
80 -['OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD', 'OD']
81 -['O', 'O', 'O', 'O', 'O', 'O']
82 -['O', 'O', 'O', 'O', 'O']
83 -['O', 'O', 'O', 'Anti']
84 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
85 -['O', 'O', 'Gtype']
86 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
87 -['O', 'O', 'Gtype']
88 -['O', 'O', 'Supp']
89 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
90 -['O', 'O', 'O']
91 -['O', 'O', 'O']
92 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Gtype', 'Gtype', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'OD', 'OD', 'OD', 'OD', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
93 -['O', 'O', 'O', 'O', 'O', 'O']
94 -['O', 'O', 'Med']
95 -['O', 'O', 'O', 'O', 'O', 'O']
96 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
97 -['O', 'O', 'Supp']
98 -['O', 'O']
99 -['O', 'O', 'O', 'O', 'O']
100 -['O', 'O', 'O']
101 -['O', 'O', 'O']
102 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
103 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
104 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
105 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Gversion', 'Gversion', 'Gversion', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
106 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
107 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
108 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
109 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']
110 -['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Phase', 'Phase', 'O', 'O', 'O', 'Temp', 'Temp', 'O', 'Med', 'Med', 'Med', 'O', 'O', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'O', 'O', 'O', 'O', 'O', 'O', 'Supp', 'Supp', 'O']
111 -['O', 'O', 'Gtype']
112 -['O', 'O', 'O', 'O']
113 -['O', 'O', 'O', 'O', 'O']
114 -['O', 'O', 'Anti', 'Anti', 'Anti', 'Anti']
115 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
116 -['O', 'O', 'O', 'O']
117 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
118 -['O', 'O', 'O']
119 -['O', 'O', 'O', 'O']
120 -['O', 'O', 'O', 'O', 'O', 'O', 'O']
121 -['Med', 'Med', 'Med', 'Med', 'O', 'O', 'O', 'O', 'O', 'O', 'Temp', 'Temp', 'Temp', 'O', 'O', 'Agit', 'Agit', 'Agit', 'Agit', 'Agit', 'Agit', 'Agit']
1 -cd /home/kevinml/automatic-extraction-growth-conditions/data-sets/tagged-xml-data 1 +
2 +
3 +# Orgiginal files
4 +#cd /home/egaytan/automatic-extraction-growth-conditions/data-sets/report-manually-tagged-gcs/
5 +
6 +# Re-tagged
7 +cd /home/egaytan/automatic-extraction-growth-conditions/data-sets/tagged-xml-data/
2 echo 8 echo
3 echo 9 echo
4 echo 10 echo
...@@ -18,9 +24,9 @@ echo ...@@ -18,9 +24,9 @@ echo
18 echo 24 echo
19 echo "Filter all paragraphs with tags..." 25 echo "Filter all paragraphs with tags..."
20 echo "Add sentence-end-tag PGCGROWTHCONDITIONS..." 26 echo "Add sentence-end-tag PGCGROWTHCONDITIONS..."
21 -grep -E "<[^<]*>" * | grep -E '!'| cut -f2 -d'='|sort|uniq|awk '{ print $_" PGCGROWTHCONDITIONS"; }' > /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt 27 +grep -E "<[^<]*>" * | grep -E '!'| cut -f2 -d'='|sort|uniq|awk '{ print $_" PGCGROWTHCONDITIONS"; }' > /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/raw-metadata-senteneces_v2.txt
22 echo 28 echo
23 -echo "Number of total tag sentences: "$(wc /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt -l); 29 +echo "Number of total tag sentences: "$(wc /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/raw-metadata-senteneces_v2.txt -l);
24 echo 30 echo
25 echo 31 echo
26 -echo "Saving file: /home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt"; 32 +echo "Saving file: /home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/raw-metadata-senteneces_v2.txt";
......
...@@ -4,8 +4,8 @@ echo "==============================Run CoreNLP================================= ...@@ -4,8 +4,8 @@ echo "==============================Run CoreNLP=================================
4 echo 4 echo
5 echo 5 echo
6 6
7 -input="/home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/input/raw-metadata-senteneces.txt"; 7 +input="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/input/raw-metadata-senteneces_v2.txt";
8 -output="/home/egaytan/GROWTH-CONDITIONS-GEO-EXTRACTION/CoreNLP/output/"; 8 +output="/home/egaytan/automatic-extraction-growth-conditions/CoreNLP/output/";
9 echo "input file: "$input; 9 echo "input file: "$input;
10 echo 10 echo
11 echo "output directory: "$output; 11 echo "output directory: "$output;
......
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.