Showing
10 changed files
with
61 additions
and
86 deletions
data.json
0 → 100644
1 | +{"key1": "keyinfo", "key2": "keyinfo2"} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
... | @@ -25,6 +25,8 @@ from nltk.corpus import stopwords | ... | @@ -25,6 +25,8 @@ from nltk.corpus import stopwords |
25 | 25 | ||
26 | import training_validation_v14 as training | 26 | import training_validation_v14 as training |
27 | 27 | ||
28 | +import json | ||
29 | + | ||
28 | #------------------------------------------------------------------------------- | 30 | #------------------------------------------------------------------------------- |
29 | # Objective | 31 | # Objective |
30 | # Tagging transformed file with CRF model with sklearn-crfsuite. | 32 | # Tagging transformed file with CRF model with sklearn-crfsuite. |
... | @@ -61,10 +63,7 @@ import training_validation_v14 as training | ... | @@ -61,10 +63,7 @@ import training_validation_v14 as training |
61 | # --variant 13 | 63 | # --variant 13 |
62 | 64 | ||
63 | #Examples | 65 | #Examples |
64 | -#python3 tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI.txt --outputFileII annot-input_bg_outputII.txt --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx.txt --variant 13 --S4 --S1 > ../../reports/output_tagging_report.txt | 66 | +#predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v5.txt --outputFileII annot-input_bg_outputII_v5 --outputFileIII annot-input_bg_outputIII_v5 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 10 --S2 > predict-annot/reports/annot-input_bg_report_v4.txt > predict-annot/reports/output_tagging_report_v5.txt |
65 | -#python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/output_tagging_report_v4.txt | ||
66 | -#python3 predict-annot/bin/tagging/tagging.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ --outputFileI annot-input_bg_outputI_v4.txt --outputFileII annot-input_bg_outputII_v4 --outputFileII annot-input_bg_outputIII_v4 --modelPath /home/egaytan/automatic-extraction-growth-conditions/CRF/models --modelName model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 --infoPath /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping --infoFile bg_sentences_midx_v4.txt --variant 13 --S4 --S1 > predict-annot/reports/annot-input_bg_report_v4.txt | ||
67 | - | ||
68 | __author__ = 'egaytan' | 67 | __author__ = 'egaytan' |
69 | 68 | ||
70 | ########################################## | 69 | ########################################## |
... | @@ -84,12 +83,10 @@ if __name__ == "__main__": | ... | @@ -84,12 +83,10 @@ if __name__ == "__main__": |
84 | parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH") | 83 | parser.add_option("--infoPath", dest="infoPath", help="Path of GSE-GSM index file", metavar="PATH") |
85 | parser.add_option("--infoFile", dest="idx", help="GSE-GSM index file", metavar="FILE") | 84 | parser.add_option("--infoFile", dest="idx", help="GSE-GSM index file", metavar="FILE") |
86 | parser.add_option("--variant", dest="variant", help="Run variant", metavar="FILE") | 85 | parser.add_option("--variant", dest="variant", help="Run variant", metavar="FILE") |
87 | - parser.add_option("--S1", dest="S1", help="General features", action="store_true", default=False) | 86 | + parser.add_option("--S1", dest="S1", help="Inner word features", action="store_true", default=False) |
88 | - parser.add_option("--S2", dest="S2", help="Inner/Complete word features", action="store_true", default=False) | 87 | + parser.add_option("--S2", dest="S2", help="Complete word features", action="store_true", default=False) |
89 | parser.add_option("--S3", dest="S3", help="Extended context features", action="store_true", default=False) | 88 | parser.add_option("--S3", dest="S3", help="Extended context features", action="store_true", default=False) |
90 | parser.add_option("--S4", dest="S4", help="Semantic features", action="store_true", default=False) | 89 | parser.add_option("--S4", dest="S4", help="Semantic features", action="store_true", default=False) |
91 | - parser.add_option("--filterStopWords", dest="filterStopWords", help="Filtering stop words", action="store_true", default=False) | ||
92 | - parser.add_option("--filterSymbols", dest="filterSymbols", help="Filtering punctuation marks", action="store_true", default=False) | ||
93 | 90 | ||
94 | (options, args) = parser.parse_args() | 91 | (options, args) = parser.parse_args() |
95 | if len(args) > 0: | 92 | if len(args) > 0: |
... | @@ -109,39 +106,29 @@ if __name__ == "__main__": | ... | @@ -109,39 +106,29 @@ if __name__ == "__main__": |
109 | print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath )) | 106 | print("--infoPath Path of GSE-GSM index file : " + str(options.infoPath )) |
110 | print("--infoFile GSE-GSM index file : " + str(options.idx )) | 107 | print("--infoFile GSE-GSM index file : " + str(options.idx )) |
111 | print("--variant Run variant : " + str(options.variant )) | 108 | print("--variant Run variant : " + str(options.variant )) |
112 | - print("--S1 General features : " + str(options.S1 )) | 109 | + print("--S1 Inner word features set : " + str(options.S1 )) |
113 | - print("--S2 Inner/Complete word features : " + str(options.S2 )) | 110 | + print("--S2 Complete word features : " + str(options.S2 )) |
114 | print("--S3 Extended context features : " + str(options.S3 )) | 111 | print("--S3 Extended context features : " + str(options.S3 )) |
115 | print("--S4 Semantic features : " + str(options.S4 )) | 112 | print("--S4 Semantic features : " + str(options.S4 )) |
116 | - print("--filteringStopWords Filtering stop words : " + str(options.filterStopWords )) | ||
117 | - print("--filterSymbols Filtering punctuation marks : " + str(options.filterSymbols )) | ||
118 | - | ||
119 | 113 | ||
120 | - symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{', | ||
121 | - '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...'] | ||
122 | - #print("Filtering symbols " + str(symbols) + ': ' + str(options.filterSymbols)) | ||
123 | ########################################### PROCESSING ########################################## | 114 | ########################################### PROCESSING ########################################## |
124 | print('-------------------------------- PROCESSING --------------------------------') | 115 | print('-------------------------------- PROCESSING --------------------------------') |
125 | 116 | ||
126 | - stopwords = [word for word in stopwords.words('english')] | ||
127 | # Read index mapping GSE file information | 117 | # Read index mapping GSE file information |
128 | idx = open(os.path.join(options.infoPath, options.idx), "r").readlines() | 118 | idx = open(os.path.join(options.infoPath, options.idx), "r").readlines() |
129 | 119 | ||
130 | - | ||
131 | ########################################### Read CRF model ########################################## | 120 | ########################################### Read CRF model ########################################## |
132 | t0 = time() | 121 | t0 = time() |
133 | print('Reading CRF model...') | 122 | print('Reading CRF model...') |
134 | crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod')) | 123 | crf = joblib.load(os.path.join(options.modelPath, options.modelName + '.mod')) |
135 | print("Reading CRF model done in: %fs" % (time() - t0)) | 124 | print("Reading CRF model done in: %fs" % (time() - t0)) |
136 | 125 | ||
137 | - | ||
138 | ########################################### Reading sentences ########################################## | 126 | ########################################### Reading sentences ########################################## |
139 | print('Processing corpus...') | 127 | print('Processing corpus...') |
140 | t0 = time() | 128 | t0 = time() |
141 | labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH']) | 129 | labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH']) |
142 | 130 | ||
143 | - | 131 | + ########################################### Preprocessing ########################################### |
144 | - ##################################################################################### | ||
145 | # Walk directory to read files | 132 | # Walk directory to read files |
146 | for path, dirs, files in os.walk(options.inputPath): | 133 | for path, dirs, files in os.walk(options.inputPath): |
147 | # For each file in dir | 134 | # For each file in dir |
... | @@ -149,74 +136,60 @@ if __name__ == "__main__": | ... | @@ -149,74 +136,60 @@ if __name__ == "__main__": |
149 | print("Preprocessing file..." + str(file)) | 136 | print("Preprocessing file..." + str(file)) |
150 | sentencesInputData = [] | 137 | sentencesInputData = [] |
151 | sentencesOutputDataI = [] | 138 | sentencesOutputDataI = [] |
152 | - sentencesOutputDataII = [] | 139 | + # Preprocessing input sentences |
153 | with open(os.path.join(options.inputPath, file), "r") as iFile: | 140 | with open(os.path.join(options.inputPath, file), "r") as iFile: |
154 | - lines = iFile.readlines() | 141 | + sentencesInputData = [ line.strip('\n').split() for line in iFile] |
155 | - for line in lines: | 142 | + # Save input sentences |
156 | - listLine = [] | ||
157 | - for token in line.strip('\n').split(): | ||
158 | - if options.filterStopWords: | ||
159 | - listToken = token.split('|') | ||
160 | - lemma = listToken[1] | ||
161 | - if lemma in stopwords: | ||
162 | - continue | ||
163 | - if options.filterSymbols: | ||
164 | - listToken = token.split('|') | ||
165 | - lemma = listToken[1] | ||
166 | - if lemma in symbols: | ||
167 | - if lemma == ',': | ||
168 | - print("Coma , identificada") | ||
169 | - continue | ||
170 | - listLine.append(token) | ||
171 | - sentencesInputData.append(listLine) | ||
172 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] | 143 | X_input = [training.sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesInputData] |
173 | print("Sentences input data: " + str(len(sentencesInputData))) | 144 | print("Sentences input data: " + str(len(sentencesInputData))) |
174 | 145 | ||
175 | - | ||
176 | ########################################### Predicting tags ########################################### | 146 | ########################################### Predicting tags ########################################### |
177 | t1 = time() | 147 | t1 = time() |
178 | print("Predicting tags with model...") | 148 | print("Predicting tags with model...") |
179 | y_pred = crf.predict(X_input) | 149 | y_pred = crf.predict(X_input) |
180 | - print("Prediction done in: %fs" % (time() - t1)) | ||
181 | 150 | ||
151 | + print("Prediction done in: %fs" % (time() - t1)) | ||
182 | 152 | ||
183 | ########################################### Tagging with CRF model ########################################### | 153 | ########################################### Tagging with CRF model ########################################### |
184 | print("Tagging file...") | 154 | print("Tagging file...") |
185 | lidx = 0 | 155 | lidx = 0 |
186 | - for line, tagLine in zip(lines, y_pred): | 156 | + for line, tagLine in zip(iFile.readlines(), y_pred): |
157 | + # unique tags | ||
187 | Ltags = set(labels).intersection(set(tagLine)) | 158 | Ltags = set(labels).intersection(set(tagLine)) |
188 | - outputLine = '' | 159 | + # Skip untagged sentence |
160 | + if Ltags == {'O'}: continue | ||
189 | line = line.strip('\n') | 161 | line = line.strip('\n') |
190 | - | 162 | + # starting empty sentence |
163 | + outputLine = '' | ||
164 | + # tag behind | ||
191 | tb = 'O' | 165 | tb = 'O' |
166 | + # per sentence word count | ||
192 | i = 0 | 167 | i = 0 |
193 | - ########################## one word sentences ########################## | 168 | + # Exception for one word sentences |
194 | - if len(tagLine)==1: | 169 | + if len(tagLine) == 1: |
195 | if tagLine[0] in labels: | 170 | if tagLine[0] in labels: |
171 | + # add start tagging signature | ||
196 | start = '<' + tagLine[0] + '> ' | 172 | start = '<' + tagLine[0] + '> ' |
197 | - end = '</' + tagLine[0] + '/>' | 173 | + # add end tagging signature |
174 | + end = '</' + tagLine[0] + '>' | ||
198 | word = line.split('|')[0] + ' ' | 175 | word = line.split('|')[0] + ' ' |
176 | + # save output tagged sentence | ||
199 | outputLine = start + word + end | 177 | outputLine = start + word + end |
200 | else: | 178 | else: |
201 | outputLine = line.split(' ')[0] | 179 | outputLine = line.split(' ')[0] |
202 | - ########################## Saving Sentence Ouput I ########################## | 180 | + # Saving Sentence Ouput I |
203 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) | 181 | sentencesOutputDataI.append(idx[lidx].replace('\n','\t') + outputLine + '\t' + ', '.join(Ltags)) |
204 | - ########################## Saving Sentence Ouput II ########################## | 182 | + # Increase sentence counter |
205 | - sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + word.split('|')[0] + '\t' + tag) | ||
206 | lidx += 1 | 183 | lidx += 1 |
184 | + # Continue with the next sentence | ||
207 | continue | 185 | continue |
208 | - | 186 | + # Tagging sentences |
209 | - sentence = '' | ||
210 | - sb = False | ||
211 | for word,tag in zip(line.split(' '), tagLine): | 187 | for word,tag in zip(line.split(' '), tagLine): |
212 | # start tagging | 188 | # start tagging |
213 | if tag in labels and tb != tag: | 189 | if tag in labels and tb != tag: |
214 | - # start tagging | ||
215 | outputLine += '<' + tag + '> ' | 190 | outputLine += '<' + tag + '> ' |
216 | - sb = True | ||
217 | - sentence = word.split('|')[0] + ' ' | ||
218 | - tb = tag | ||
219 | outputLine += word.split('|')[0] + ' ' | 191 | outputLine += word.split('|')[0] + ' ' |
192 | + tb = tag | ||
220 | i += 1 | 193 | i += 1 |
221 | continue | 194 | continue |
222 | # end tagging | 195 | # end tagging |
... | @@ -224,32 +197,24 @@ if __name__ == "__main__": | ... | @@ -224,32 +197,24 @@ if __name__ == "__main__": |
224 | if i+1==len(tagLine): | 197 | if i+1==len(tagLine): |
225 | # end sentence | 198 | # end sentence |
226 | outputLine += word.split('|')[0] + ' ' | 199 | outputLine += word.split('|')[0] + ' ' |
227 | - outputLine += '</' + tag + '/> ' | 200 | + outputLine += '</' + tag + '> ' |
228 | - ########################## Saving Sentence Ouput II ########################## | ||
229 | - sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) | ||
230 | - sb = False | ||
231 | tb = 'O' | 201 | tb = 'O' |
232 | i += 1 | 202 | i += 1 |
233 | continue | 203 | continue |
234 | elif tag!=tagLine[i+1]: | 204 | elif tag!=tagLine[i+1]: |
235 | # start new tag | 205 | # start new tag |
236 | outputLine += word.split('|')[0] + ' ' | 206 | outputLine += word.split('|')[0] + ' ' |
237 | - outputLine += '</' + tag + '/> ' | 207 | + outputLine += '</' + tag + '> ' |
238 | - ########################## Saving Sentence Ouput II ########################## | ||
239 | - sentencesOutputDataII.append(idx[lidx].replace('\n', '\t') + sentence + word.split('|')[0] + '\t' +tag) | ||
240 | - sb = False | ||
241 | tb = 'O' | 208 | tb = 'O' |
242 | i += 1 | 209 | i += 1 |
243 | continue | 210 | continue |
244 | # word tagged | 211 | # word tagged |
245 | outputLine += word.split('|')[0] + ' ' | 212 | outputLine += word.split('|')[0] + ' ' |
246 | i += 1 | 213 | i += 1 |
247 | - if sb: | 214 | + # Saving Sentence Ouput I |
248 | - sentence+= word.split('|')[0] + ' ' | ||
249 | - ########################## Saving Sentence Ouput I ########################## | ||
250 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) | 215 | sentencesOutputDataI.append(idx[lidx].replace('\n', '\t') + outputLine+ '\t' +', '.join(Ltags)) |
251 | lidx += 1 | 216 | lidx += 1 |
252 | - | 217 | + print("\n".join(sentencesOutputDataI[1:3])) |
253 | ########################################### Save Output I ########################################## | 218 | ########################################### Save Output I ########################################## |
254 | print("Saving Ouput I...") | 219 | print("Saving Ouput I...") |
255 | with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: | 220 | with open(os.path.join(options.outputPath, options.outFileI + '_' + options.modelName + '.tsv'), "w") as oFileI: |
... | @@ -259,19 +224,29 @@ if __name__ == "__main__": | ... | @@ -259,19 +224,29 @@ if __name__ == "__main__": |
259 | oline = line.replace('LDR','(') | 224 | oline = line.replace('LDR','(') |
260 | oline = oline.replace('RDR',')') | 225 | oline = oline.replace('RDR',')') |
261 | oFileI.write(oline + '\n') | 226 | oFileI.write(oline + '\n') |
227 | + | ||
262 | ########################################### Save Output II ########################################## | 228 | ########################################### Save Output II ########################################## |
263 | print("Saving Ouput II...") | 229 | print("Saving Ouput II...") |
264 | with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII: | 230 | with open(os.path.join(options.outputPath, options.outFileII + '_' + options.modelName + '.tsv'), "w") as oFileII: |
265 | - for line in sentencesOutputDataII: | 231 | + for line in sentencesOutputDataI: |
266 | - #print(line) | ||
267 | oline = line.replace('LDR','(') | 232 | oline = line.replace('LDR','(') |
268 | oline = oline.replace('RDR',')') | 233 | oline = oline.replace('RDR',')') |
269 | - oFileII.write(oline + '\n') | 234 | + for ttex, tag in re.findall(r'<[^>]+>([^<]+)</([^>]+)>', oline): |
235 | + lline = oline.split('\t')[0:-2] + [ttex, tag] | ||
236 | + nline = '\t'.join(lline) | ||
237 | + oFileII.write(nline + '\n') | ||
238 | + | ||
270 | ########################################### Save Output III ########################################## | 239 | ########################################### Save Output III ########################################## |
271 | print("Saving Ouput III...") | 240 | print("Saving Ouput III...") |
272 | with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: | 241 | with open(os.path.join(options.outputPath, options.outFileIII + '_' + options.modelName + '.tsv'), "w") as oFileIII: |
273 | - for line, tagLine in zip(lines, y_pred): | 242 | + for line, tagLine in zip(iFile.readlines(), y_pred): |
274 | oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] | 243 | oline = [ w.split('|')[0].replace('LDR','(').replace('LDR','(')+'|'+tag for w,tag in zip(line.split(' '), tagLine)] |
275 | 244 | ||
276 | oFileIII.write(' '.join(oline) + '\n') | 245 | oFileIII.write(' '.join(oline) + '\n') |
246 | + | ||
247 | + ########################################### Save Probs ########################################## | ||
248 | + y_probs = crf.predict_marginals(X_input) | ||
249 | + # from https://stackoverflow.com/questions/7100125/storing-python-dictionaries | ||
250 | + with open(os.path.join(options.outputPath, 'crf_probs.json'), 'w') as fp: | ||
251 | + json.dump(y_probs, fp) | ||
277 | print("Processing corpus done in: %fs" % (time() - t0)) | 252 | print("Processing corpus done in: %fs" % (time() - t0)) | ... | ... |
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
File mode changed
This diff could not be displayed because it is too large.
File mode changed
predict-annot/output/crf_probs.json
0 → 100644
This file is too large to display.
1 | -------------------------------- PARAMETERS -------------------------------- | 1 | -------------------------------- PARAMETERS -------------------------------- |
2 | --inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ | 2 | --inputPath Path of training data set : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/input/ |
3 | --outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ | 3 | --outputPath Output path to place output files: /home/egaytan/automatic-extraction-growth-conditions/predict-annot/output/ |
4 | ---outputFileI Output tagged file I : annot-input_bg_outputI_v4 | 4 | +--outputFileI Output tagged file I : annot-input_bg_outputI_v5.txt |
5 | ---outputFileII Output tagged file II : annot-input_bg_outputII_v4 | 5 | +--outputFileII Output tagged file II : annot-input_bg_outputII_v5 |
6 | ---outputFileII Output tagged file III : annot-input_bg_outputIII_v4 | 6 | +--outputFileII Output tagged file III : annot-input_bg_outputIII_v5 |
7 | --modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models | 7 | --modelPath Path to read CRF model : /home/egaytan/automatic-extraction-growth-conditions/CRF/models |
8 | --modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 | 8 | --modelName Model name : model_Run3_v10_S1_False_S2_True_S3_False_S4_False_Run3_v10 |
9 | --infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping | 9 | --infoPath Path of GSE-GSM index file : /home/egaytan/automatic-extraction-growth-conditions/predict-annot/mapping |
10 | --infoFile GSE-GSM index file : bg_sentences_midx_v4.txt | 10 | --infoFile GSE-GSM index file : bg_sentences_midx_v4.txt |
11 | ---variant Run variant : 13 | 11 | +--variant Run variant : 10 |
12 | ---S1 General features : True | 12 | +--S1 Inner word features set : False |
13 | ---S2 Inner/Complete word features : False | 13 | +--S2 Complete word features : True |
14 | --S3 Extended context features : False | 14 | --S3 Extended context features : False |
15 | ---S4 Semantic features : True | 15 | +--S4 Semantic features : False |
16 | ---filteringStopWords Filtering stop words : False | ||
17 | ---filterSymbols Filtering punctuation marks : False | ||
18 | -------------------------------- PROCESSING -------------------------------- | 16 | -------------------------------- PROCESSING -------------------------------- |
19 | Reading CRF model... | 17 | Reading CRF model... |
20 | -Reading CRF model done in: 0.009463s | 18 | +Reading CRF model done in: 0.009485s |
21 | Processing corpus... | 19 | Processing corpus... |
22 | Preprocessing file...annot-input_bg_v4.txt | 20 | Preprocessing file...annot-input_bg_v4.txt |
23 | Sentences input data: 90688 | 21 | Sentences input data: 90688 |
24 | Predicting tags with model... | 22 | Predicting tags with model... |
25 | -Prediction done in: 26.367272s | 23 | +Prediction done in: 27.326342s |
26 | Tagging file... | 24 | Tagging file... |
25 | + | ||
27 | Saving Ouput I... | 26 | Saving Ouput I... |
28 | Saving Ouput II... | 27 | Saving Ouput II... |
29 | Saving Ouput III... | 28 | Saving Ouput III... |
30 | -Processing corpus done in: 56.584394s | 29 | +Processing corpus done in: 247.353067s | ... | ... |
-
Please register or login to post a comment