Estefani Gaytan Nunez

up

1 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run1 --version _v1
2 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run2 --version _v1 --S1
3 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run3 --version _v1 --S2
4 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run4 --version _v1 --S1 --S2
5 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run5 --version _v1 --S3
6 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run6 --version _v1 --S1 --S3
7 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run7 --version _v1 --S2 --S3
8 +python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run8 --version _v1 --S1 --S2 --S3
...@@ -123,6 +123,7 @@ def word2features(sent, i, S1, S2): ...@@ -123,6 +123,7 @@ def word2features(sent, i, S1, S2):
123 '-1:postag': postag1, 123 '-1:postag': postag1,
124 }) 124 })
125 125
126 +
126 if i < len(sent) - 1: 127 if i < len(sent) - 1:
127 listElem = sent[i + 1].split('|') 128 listElem = sent[i + 1].split('|')
128 lemma1 = listElem[1] 129 lemma1 = listElem[1]
...@@ -135,6 +136,35 @@ def word2features(sent, i, S1, S2): ...@@ -135,6 +136,35 @@ def word2features(sent, i, S1, S2):
135 '+1:postag': postag1, 136 '+1:postag': postag1,
136 }) 137 })
137 138
139 + '''
140 + #================== S6 ======================#
141 +
142 + if i > 1:
143 + listElem = sent[i - 2].split('|')
144 + lemma1 = listElem[1]
145 + postag1 = listElem[2]
146 +
147 + features.update({
148 + #LemaG posterior
149 + '-2:lemma': lemma1,
150 + #PostG posterior
151 + '-2:postag': postag1,
152 + })
153 +
154 + if len(sent) - 2:
155 + listElem = sent[i + 2].split('|')
156 + lemma1 = listElem[1]
157 + postag1 = listElem[2]
158 +
159 + features.update({
160 + #LemaG posterior
161 + '+2:lemma': lemma1,
162 + #PostG posterior
163 + '+2:postag': postag1,
164 + })
165 +
166 + '''
167 +
138 #====================== S1 ======================# 168 #====================== S1 ======================#
139 if S1: 169 if S1:
140 print("S1") 170 print("S1")
......
1 +# -*- coding: UTF-8 -*-
2 +
3 +import os
4 +from itertools import chain
5 +from optparse import OptionParser
6 +from time import time
7 +from collections import Counter
8 +import re
9 +
10 +import nltk
11 +import sklearn
12 +import scipy.stats
13 +import sys
14 +
15 +from sklearn.externals import joblib
16 +from sklearn.metrics import make_scorer
17 +from sklearn.cross_validation import cross_val_score
18 +from sklearn.grid_search import RandomizedSearchCV
19 +
20 +import sklearn_crfsuite
21 +from sklearn_crfsuite import scorers
22 +from sklearn_crfsuite import metrics
23 +
24 +from nltk.corpus import stopwords
25 +
26 +
27 +# Objective
28 +# Training and evaluation of CRFs with sklearn-crfsuite.
29 +#
30 +# Input parameters
31 +# --inputPath=PATH Path of training and test data set
32 +# --trainingFile File with training data set
33 +# --testFile File with test data set
34 +# --outputPath=PATH Output path to place output files
35 +# --nameGrid Number of run
36 +# --version Version Report
37 +
38 +# Output
39 +# 1) Best model
40 +# 2) Report
41 +
42 +# Examples
43 +# python training_validation_v9.py
44 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets
45 +# --trainingFile training-data-set-70.txt
46 +# --testFile test-data-set-30.txt
47 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/
48 +# --version _v1
49 +# python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nameGrid Run1 --version _v1 --S1 --S2 --S3
50 +
51 +##################################################################
52 +# FEATURES #
53 +##################################################################
54 +
55 +#================== COMPLETE WORD FEATURES ======================#
56 +
57 +def isGreek(word):
58 + ## Complete word are greek letters
59 + alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω',
60 + 'α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω']
61 + if word in alphabet:
62 + return True
63 + else:
64 + return False
65 +
66 +#================ INNER OF THE WORD FEATURES ====================#
67 +
68 +def hNumber(word):
69 + ## Al leats has one greek letter
70 + for l in word:
71 + if l.isdigit():
72 + return True
73 + return False
74 +
75 +def symb(word):
76 + n=0
77 + ## At least a not alphanumeric character
78 + for l in word:
79 + if l.isdigit(): n = n+1
80 + if l.isalpha(): n = n+1
81 + #Exclude Greek letters
82 + if isGreek(l): n = n+1
83 +
84 + if n<len(word): return True
85 + else: return False
86 +
87 +def hUpper(word):
88 + ## At least an upper letter
89 + for l in word:
90 + if l.isupper(): return True
91 + return False
92 +
93 +def hLower(word):
94 + ## At least a lower letter
95 + for l in word:
96 + if l.islower(): return True
97 + return False
98 +
99 +def hGreek(word):
100 + ## At least a greek letter
101 + for l in word:
102 + if isGreek(l): return True
103 + return False
104 +
105 +#================================================================#
106 +
107 +def word2features(sent, i, S1, S2, S3):
108 + ## Getting word features
109 +
110 + ## Saving CoreNLP annotations
111 + listElem = sent[i].split('|')
112 + word = listElem[0]
113 + lemma = listElem[1]
114 + postag = listElem[2]
115 + #ner = listElem[4]
116 +
117 + #=========================== G =============================#
118 + ## NAME LEVEL G
119 + ## FUTURE TYPE General features
120 +
121 + features = {
122 + ## basal features
123 + 'lemma': lemma,
124 + 'postag': postag
125 + }
126 +
127 + ## more tha one word in sentence
128 + if i > 0:
129 + ## Anterior word
130 + listElem = sent[i - 1].split('|')
131 +
132 + ## Saving CoreNLP annotations
133 + lemma0 = listElem[1]
134 + postag0 = listElem[2]
135 + features.update({
136 + #LemaG anterior
137 + '-1:lemma': lemma0,
138 + #PostG anterior
139 + '-1:postag': postag0,
140 + })
141 +
142 + ## is not the last word
143 + if i < len(sent) - 1:
144 + ## Posterior word
145 + listElem = sent[i + 1].split('|')
146 + ## Saving CoreNLP annotations
147 + lemma2 = listElem[1]
148 + postag2 = listElem[2]
149 +
150 + features.update({
151 + #LemaG posterior
152 + '+1:lemma': lemma2,
153 + #PostG posterior
154 + '+1:postag': postag2,
155 + })
156 +
157 + #=========================== S1 =============================#
158 + ## NAME LEVEL S1
159 + ## FEATURE TYPE Inner word features
160 +
161 + if S1:
162 + #Add features
163 + features['hUpper']: hUpper(word)
164 + features['hLower']: hUpper(word)
165 + features['hGreek']: hGreek(word)
166 + features['symb']: symb(word)
167 + #lemma and post firstChar
168 + features['lemma[:1]']: lemma[:1]
169 + features['postag[:1]']: post[:1]
170 + #lemma and post secondChar
171 + features['lemma[:2]']: lemma[:2]
172 + features['postag[:2]']: post[:2]
173 +
174 + #=========================== S2 =============================#
175 + ## NAME LEVEL S2
176 + ## FEATURE TYPE Complete word features
177 +
178 + if S2:
179 + #Add features
180 + features['word']: word
181 + features['isUpper']: word.isupper()
182 + features['isLower']: word.isLower()
183 + features['isGreek']: isGreek(word)
184 + features['isNumber']: word.isdigit()
185 + ## more tha one word in sentence
186 + if i > 0:
187 + ## Anterior word
188 + listElem = sent[i - 1].split('|')
189 + ## Saving CoreNLP annotations
190 + word0 = listElem[0]
191 + features['-1:word']: word0
192 +
193 + ## is not the last word
194 + if i < len(sent)-1:
195 + listElem = sent[i + 1].split('|')
196 + ## Saving CoreNLP annotations
197 + word2 = word[0]
198 + ## Posterior word
199 + features['+1:word']: word2
200 +
201 + #=========================== S3 =============================#
202 + ## NAME LEVEL S3
203 + ## FEATURE TYPE Extended context features
204 + if S3:
205 + ## more tha two words in sentence
206 + if i > 1:
207 + ## two anterior lemma and post
208 + listElem = sent[i - 2].split('|')
209 + ## Saving CoreNLP annotations
210 + lemma01 = listElem[1]
211 + post01 = listElem[2]
212 + features['-2:lemma']: lemma01
213 + features['-2:post']: post01
214 +
215 + ## is not the penultimate word
216 + if i < len(sent) - 2:
217 + listElem = sent[i + 2].split('|')
218 + ## Saving CoreNLP annotations
219 + lemma2 = listElem[1]
220 + post2 = listElem[2]
221 + ## two posterior lemma and post
222 + features['+2:lemma']: lemma2
223 + features['+2:post']: post02
224 +
225 +
226 + return features
227 +
228 +def sent2features(sent, S1, S2, S3):
229 + ## Itering in sentence for each word and saving its features
230 + return [word2features(sent, i, S1, S2, S3) for i in range(len(sent))]
231 +
232 +def sent2labels(sent):
233 + ## 3rd position by word is the label
234 + return [elem.split('|')[3] for elem in sent]
235 +
236 +def sent2tokens(sent):
237 + return [token for token, postag, label in sent]
238 +
239 +def print_transitions(trans_features, f):
240 + for (label_from, label_to), weight in trans_features:
241 + f.write("{:6} -> {:7} {:0.6f}\n".format(label_from, label_to, weight))
242 +
243 +def print_state_features(state_features, f):
244 + for (attr, label), weight in state_features:
245 + f.write("{:0.6f} {:8} {}\n".format(weight, label, attr.encode("utf-8")))
246 +
247 +
248 +__author__ = 'egaytan'
249 +
250 +##################################################################
251 +# MAIN PROGRAM #
252 +##################################################################
253 +
254 +if __name__ == "__main__":
255 + ## Defining parameters
256 + parser = OptionParser()
257 + parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH")
258 + parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH")
259 + parser.add_option("--trainingFile", dest="trainingFile", help="File with training data set", metavar="FILE")
260 + parser.add_option("--testFile", dest="testFile", help="File with test data set", metavar="FILE")
261 + parser.add_option("--Gridname", dest="Gridname", help="Report number run", metavar="FILE")
262 + parser.add_option("--version", dest="version", help="Report file", metavar="FILE")
263 + parser.add_option("--S1", dest="S1", help="Future Type", action="store_true", default=False)
264 + parser.add_option("--S2", dest="S2", help="Future Type", action="store_true", default=False)
265 + parser.add_option("--S3", dest="S3", help="Future Type", action="store_true", default=False)
266 + parser.add_option("--excludeStopWords", dest="excludeStopWords",help="Exclude stop words", action="store_true", default=False)
267 + parser.add_option("--excludeSymbols", dest="excludeSymbols", help="Exclude punctuation marks", action="store_true", default=False)
268 +
269 +
270 + (options, args) = parser.parse_args()
271 + if len(args) > 0:
272 + parser.error("Any parameter given.")
273 + sys.exit(1)
274 +
275 + print('-------------------------------- PARAMETERS --------------------------------')
276 + print("Path of training data set: " + options.inputPath)
277 + print("File with training data set: " + str(options.trainingFile))
278 + print("Path of test data set: " + options.inputPath)
279 + print("File with test data set: " + str(options.testFile))
280 + print("Exclude stop words: " + str(options.excludeStopWords))
281 + print("Levels: " + str(options.S1) + " " + str(options.S2))
282 + print("Report file: " + str(options.version))
283 +
284 +
285 + symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{',
286 + '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']
287 + print("Exclude symbols: " + str(options.excludeSymbols))
288 +
289 + print('-------------------------------- PROCESSING --------------------------------')
290 + print('Reading corpus...')
291 + t0 = time()
292 +
293 + sentencesTrainingData = []
294 + sentencesTestData = []
295 +
296 + stopwords = [word for word in stopwords.words('english')]
297 +
298 + with open(os.path.join(options.inputPath, options.trainingFile), "r") as iFile:
299 + for line in iFile.readlines():
300 + listLine = []
301 + line = line.strip('\n')
302 + for token in line.split():
303 + if options.excludeStopWords:
304 + listToken = token.split('|')
305 + lemma = listToken[1]
306 + if lemma in stopwords:
307 + continue
308 + if options.excludeSymbols:
309 + listToken = token.split('|')
310 + lemma = listToken[1]
311 + if lemma in symbols:
312 + continue
313 + listLine.append(token)
314 + sentencesTrainingData.append(listLine)
315 + print(" Sentences training data: " + str(len(sentencesTrainingData)))
316 +
317 + with open(os.path.join(options.inputPath, options.testFile), "r") as iFile:
318 + for line in iFile.readlines():
319 + listLine = []
320 + line = line.strip('\n')
321 + for token in line.split():
322 + if options.excludeStopWords:
323 + listToken = token.split('|')
324 + lemma = listToken[1]
325 + if lemma in stopwords:
326 + continue
327 + if options.excludeSymbols:
328 + listToken = token.split('|')
329 + lemma = listToken[1]
330 + if lemma in symbols:
331 + continue
332 + listLine.append(token)
333 + sentencesTestData.append(listLine)
334 + print(" Sentences test data: " + str(len(sentencesTestData)))
335 +
336 + print("Reading corpus done in: %fs" % (time() - t0))
337 +
338 + print(sent2features(sentencesTrainingData[0], options.S1, options.S2, options.S3)[0])
339 + print(sent2features(sentencesTestData[0], options.S1, options.S2, options.S3)[0])
340 + t0 = time()
341 +
342 + X_train = [sent2features(s, options.S1, options.S2, options.S3) for s in sentencesTrainingData]
343 + y_train = [sent2labels(s) for s in sentencesTrainingData]
344 +
345 + X_test = [sent2features(s, options.S1, options.S2, options.S3) for s in sentencesTestData]
346 + # print X_test
347 + y_test = [sent2labels(s) for s in sentencesTestData]
348 +
349 + # Fixed parameters
350 + # crf = sklearn_crfsuite.CRF(
351 + # algorithm='lbfgs',
352 + # c1=0.1,
353 + # c2=0.1,
354 + # max_iterations=100,
355 + # all_possible_transitions=True
356 + # )
357 +
358 + # Hyperparameter Optimization
359 + crf = sklearn_crfsuite.CRF(
360 + algorithm='lbfgs',
361 + max_iterations=100,
362 + all_possible_transitions=True
363 + )
364 + params_space = {
365 + 'c1': scipy.stats.expon(scale=0.5),
366 + 'c2': scipy.stats.expon(scale=0.05),
367 + }
368 +
369 + # Original: labels = list(crf.classes_)
370 + # Original: labels.remove('O')
371 + labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH'])
372 +
373 + # use the same metric for evaluation
374 + f1_scorer = make_scorer(metrics.flat_f1_score,
375 + average='weighted', labels=labels)
376 +
377 + # search
378 + rs = RandomizedSearchCV(crf, params_space,
379 + cv=10,
380 + verbose=3,
381 + n_jobs=-1,
382 + n_iter=20,
383 + # n_iter=50,
384 + scoring=f1_scorer)
385 + rs.fit(X_train, y_train)
386 +
387 + # Fixed parameters
388 + # crf.fit(X_train, y_train)
389 +
390 + # Best hiperparameters
391 + # crf = rs.best_estimator_
392 +
393 + nameReport = str(options.Gridname) + str(options.version) + '.txt'
394 + with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile:
395 + oFile.write("********** TRAINING AND TESTING REPORT **********\n")
396 + oFile.write("Training file: " + options.trainingFile + '\n')
397 + oFile.write('\n')
398 + oFile.write('best params:' + str(rs.best_params_) + '\n')
399 + oFile.write('best CV score:' + str(rs.best_score_) + '\n')
400 + oFile.write('model size: {:0.2f}M\n'.format(rs.best_estimator_.size_ / 1000000))
401 +
402 + print("Training done in: %fs" % (time() - t0))
403 + t0 = time()
404 +
405 + # Update best crf
406 + crf = rs.best_estimator_
407 +
408 + # Saving model
409 + print(" Saving training model...")
410 + t1 = time()
411 + nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '.mod'
412 + joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
413 + print(" Saving training model done in: %fs" % (time() - t1))
414 +
415 + # Evaluation against test data
416 + y_pred = crf.predict(X_test)
417 + print("*********************************")
418 + print("Prediction done in: %fs" % (time() - t0))
419 +
420 + # labels = list(crf.classes_)
421 + # labels.remove('O')
422 +
423 + with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="a") as oFile:
424 + oFile.write('\n')
425 + oFile.write("Flat F1: " + str(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)))
426 + oFile.write('\n')
427 + # labels = list(crf.classes_)
428 + sorted_labels = sorted(
429 + labels,
430 + key=lambda name: (name[1:], name[0])
431 + )
432 + oFile.write(metrics.flat_classification_report(
433 + y_test, y_pred, labels=sorted_labels, digits=3
434 + ))
435 + oFile.write('\n')
436 +
437 + oFile.write("\nTop likely transitions:\n")
438 + print_transitions(Counter(crf.transition_features_).most_common(50), oFile)
439 + oFile.write('\n')
440 +
441 + oFile.write("\nTop unlikely transitions:\n")
442 + print_transitions(Counter(crf.transition_features_).most_common()[-50:], oFile)
443 + oFile.write('\n')
444 +
445 + oFile.write("\nTop positive:\n")
446 + print_state_features(Counter(crf.state_features_).most_common(200), oFile)
447 + oFile.write('\n')
448 +
449 + oFile.write("\nTop negative:\n")
450 + print_state_features(Counter(crf.state_features_).most_common()[-200:], oFile)
451 + oFile.write('\n')
452 +
No preview for this file type
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.025778789677348212, 'c2': 0.0018714562014074947}
5 +best CV score:0.8085092726655764
6 +model size: 0.08M
7 +
8 +Flat F1: 0.8137583902797803
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.880 1.000 0.936 22
14 + Med 0.852 0.912 0.881 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.920 0.742 0.821 62
20 + Anti 1.000 0.444 0.615 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.905 0.811 0.856 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.883 0.669 0.762 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.904 0.760 0.814 480
28 +
29 +
30 +Top likely transitions:
31 +OD -> OD 6.458139
32 +Agit -> Agit 6.258931
33 +Anti -> Anti 6.237003
34 +Temp -> Temp 6.159227
35 +Med -> Med 5.670034
36 +O -> O 5.195209
37 +Air -> Air 5.178796
38 +Gversion -> Gversion 5.056321
39 +Gtype -> Gtype 4.897516
40 +Phase -> Phase 4.866866
41 +Technique -> Technique 4.086604
42 +Supp -> Supp 3.969422
43 +pH -> pH 2.272089
44 +Substrain -> Gtype 1.512606
45 +O -> Supp 1.281866
46 +Gtype -> Supp 1.254695
47 +Technique -> Air 1.119472
48 +O -> Technique 1.098084
49 +O -> Anti 0.739732
50 +O -> Gtype 0.595581
51 +O -> Gversion 0.361965
52 +O -> Temp 0.359101
53 +Gtype -> pH 0.252154
54 +O -> Strain 0.203795
55 +Air -> O 0.106227
56 +Gtype -> Air 0.052216
57 +O -> pH 0.007039
58 +Gversion -> Supp -0.000001
59 +pH -> Supp -0.000232
60 +Strain -> O -0.001882
61 +Phase -> Supp -0.055890
62 +Phase -> Air -0.056319
63 +OD -> Phase -0.082524
64 +O -> Phase -0.121225
65 +O -> OD -0.123240
66 +Gtype -> Phase -0.163289
67 +Temp -> O -0.187600
68 +Med -> O -0.241653
69 +Gtype -> Gversion -0.242298
70 +pH -> O -0.266321
71 +OD -> Supp -0.290317
72 +Air -> Gtype -0.354319
73 +Phase -> Temp -0.371875
74 +Anti -> O -0.373128
75 +Phase -> pH -0.374040
76 +Phase -> Gtype -0.388060
77 +Technique -> Gtype -0.398116
78 +Gversion -> O -0.453455
79 +Air -> Phase -0.500367
80 +Supp -> Phase -0.539395
81 +
82 +
83 +Top unlikely transitions:
84 +Med -> O -0.241653
85 +Gtype -> Gversion -0.242298
86 +pH -> O -0.266321
87 +OD -> Supp -0.290317
88 +Air -> Gtype -0.354319
89 +Phase -> Temp -0.371875
90 +Anti -> O -0.373128
91 +Phase -> pH -0.374040
92 +Phase -> Gtype -0.388060
93 +Technique -> Gtype -0.398116
94 +Gversion -> O -0.453455
95 +Air -> Phase -0.500367
96 +Supp -> Phase -0.539395
97 +O -> Med -0.558012
98 +Technique -> O -0.563680
99 +Technique -> pH -0.572119
100 +OD -> Air -0.575190
101 +Phase -> O -0.591227
102 +Med -> Anti -0.624886
103 +Anti -> OD -0.642709
104 +O -> Air -0.644641
105 +Phase -> Technique -0.707027
106 +Supp -> pH -0.714103
107 +Gtype -> O -0.767223
108 +Supp -> Gversion -0.770430
109 +Supp -> Anti -0.833703
110 +Supp -> Technique -0.866289
111 +Gtype -> Med -0.879350
112 +Supp -> OD -0.892817
113 +Agit -> O -0.961537
114 +Phase -> Med -0.969283
115 +Temp -> Med -1.069687
116 +Air -> Temp -1.156985
117 +Air -> Supp -1.269357
118 +OD -> O -1.314886
119 +Supp -> Air -1.325800
120 +Technique -> OD -1.524590
121 +OD -> Med -1.583386
122 +Supp -> Gtype -1.630169
123 +Gtype -> OD -1.657535
124 +Supp -> O -1.680041
125 +Supp -> Temp -1.709825
126 +Air -> Med -1.823169
127 +OD -> Gtype -1.852919
128 +Gtype -> Anti -1.909177
129 +Substrain -> O -1.923623
130 +Air -> OD -2.860427
131 +Med -> Supp -2.964145
132 +Supp -> Med -3.290588
133 +Phase -> OD -4.227426
134 +
135 +
136 +Top positive:
137 +11.662855 Supp b'lemma:Iron'
138 +10.511953 O b'lemma:_'
139 +10.230310 Phase b'lemma:stationary'
140 +9.183606 Air b'lemma:anaerobic'
141 +9.126291 O b'lemma:1'
142 +8.974301 Med b'lemma:MOPS'
143 +8.702900 O b'lemma:rpob'
144 +8.676761 Phase b'lemma:mid-log'
145 +8.568886 Air b'lemma:aerobic'
146 +8.230748 Technique b'lemma:chipseq'
147 +8.195390 Technique b'lemma:ChIP-exo'
148 +8.058605 O b'-1:lemma:tag'
149 +7.748813 Supp b'lemma:nitrate'
150 +7.383352 Med b'lemma:LB'
151 +7.258212 O b'lemma:2'
152 +7.185114 Gversion b'lemma:asm584v2'
153 +7.171158 O b'lemma:3'
154 +7.148092 Strain b'lemma:k-12'
155 +7.115910 Supp b'lemma:pq'
156 +7.026371 O b'lemma:for'
157 +7.014323 Gtype b'lemma:arca8myc'
158 +7.008078 Supp b'+1:lemma:\xc2\xb5m'
159 +6.977598 Gtype b'lemma:\xce\xb4cra'
160 +6.880186 Gtype b'lemma:flag-tag'
161 +6.880186 Gtype b'-1:lemma:c-terminal'
162 +6.658138 Gtype b'lemma:delta-arca'
163 +6.634178 O b'lemma:rep3'
164 +6.604885 O b'lemma:rep1'
165 +6.417248 O b'lemma:rep2'
166 +6.415638 O b'lemma:Custom'
167 +6.361925 Gtype b'lemma:fnr8myc'
168 +6.326103 Supp b'lemma:acetate'
169 +6.318547 O b'-1:lemma:ChIP-exo'
170 +6.305986 OD b'+1:lemma:stationary'
171 +6.292137 Air b'lemma:Aerobic'
172 +6.239583 Supp b'-1:lemma:Cra'
173 +6.162599 Air b'-1:lemma:ChIP-Seq'
174 +6.138632 O b'-1:lemma:0.3-0.35'
175 +6.069303 O b'lemma:b'
176 +6.025530 Substrain b'lemma:mg1655'
177 +6.018017 OD b'lemma:od450'
178 +5.968029 Anti b'lemma:none'
179 +5.951239 Technique b'lemma:rna-seq'
180 +5.870411 Gtype b'lemma:delta-fnr'
181 +5.859331 Supp b'lemma:rifampicin'
182 +5.853962 O b'lemma:with'
183 +5.787035 Supp b'lemma:Fe'
184 +5.696867 O b'lemma:Cra'
185 +5.666336 Gtype b'lemma:wt'
186 +5.634727 O b'lemma:\xcf\x8332'
187 +5.556439 Anti b'lemma:seqa'
188 +5.495699 Technique b'lemma:rnaseq'
189 +5.475249 O b'lemma:-'
190 +5.461891 O b'postag::'
191 +5.444655 Gtype b'-1:lemma:\xe2\x88\x86'
192 +5.439532 Supp b'lemma:nh4cl'
193 +5.347691 Med b'+1:lemma:0.4'
194 +5.306864 Gtype b'+1:lemma:type'
195 +5.249286 Supp b'lemma:arginine'
196 +5.228272 Anti b'lemma:anti-myc'
197 +5.189623 Gtype b'lemma:\xce\xb4fur'
198 +5.112523 Med b'lemma:lb'
199 +5.068530 O b'lemma:chip'
200 +5.060776 O b'lemma:CEL'
201 +5.009251 O b'lemma:a'
202 +4.960032 Temp b'-1:lemma:\xcf\x8332'
203 +4.958666 O b'-1:lemma:Aerobic'
204 +4.897653 Supp b'-1:lemma:+'
205 +4.868444 O b'lemma:.'
206 +4.868444 O b'postag:.'
207 +4.830243 Supp b'lemma:glucose'
208 +4.820752 Supp b'lemma:no3'
209 +4.796496 Supp b'lemma:dpd'
210 +4.791818 O b'+1:postag:RB'
211 +4.789121 O b'-1:lemma:anaerobic'
212 +4.777256 Temp b'-1:lemma:37'
213 +4.758659 Technique b'-1:lemma:IP'
214 +4.751186 Gtype b'lemma:wild-type'
215 +4.743291 Supp b'+1:lemma:1'
216 +4.741868 Vess b'lemma:flask'
217 +4.741868 Vess b'-1:lemma:warm'
218 +4.683816 O b'-1:lemma:glucose'
219 +4.643413 Anti b'lemma:anti-rpos'
220 +4.633816 Technique b'lemma:chip-seq'
221 +4.629451 O b'lemma:oxyr'
222 +4.567353 Gtype b'lemma:\xe2\x88\x86'
223 +4.557296 Supp b'lemma:fructose'
224 +4.556286 O b'lemma:affyexp'
225 +4.553249 OD b'lemma:od600'
226 +4.531615 Gtype b'lemma:type'
227 +4.489891 O b'-1:lemma:0.3'
228 +4.480766 O b'lemma:2-3'
229 +4.479384 Gversion b'lemma:nc'
230 +4.467099 Technique b'+1:lemma:chip-exo'
231 +4.434426 Gtype b'lemma:\xce\xb4soxs'
232 +4.430899 O b'-1:lemma:dpd'
233 +4.412502 Strain b'+1:lemma:substr'
234 +4.408579 O b'lemma:or'
235 +4.394512 O b'+1:lemma:o.d.'
236 +4.374031 O b'+1:lemma:sparging'
237 +4.312161 Phase b'+1:lemma:for'
238 +4.261060 O b'lemma:chip-arca'
239 +4.215222 Air b'lemma:anerobically'
240 +4.197581 Technique b'+1:lemma:rna-seq'
241 +4.178924 Supp b'+1:lemma:_'
242 +4.178128 Gversion b'lemma:chip-seq'
243 +4.162694 Technique b'lemma:ChIP-Seq'
244 +4.136869 Supp b'+1:lemma:hour'
245 +4.135692 O b'+1:lemma:od600'
246 +4.120055 O b'postag:IN'
247 +4.088450 Gtype b'lemma:nsrr'
248 +4.087944 Med b'-1:lemma:ml'
249 +4.053773 Gtype b'+1:lemma:with'
250 +4.022074 Gtype b'lemma:\xce\xb4ompr'
251 +4.019457 Gtype b'lemma:WT'
252 +3.994335 Gversion b'-1:lemma:nc'
253 +3.986008 pH b'lemma:ph5'
254 +3.986008 pH b'+1:lemma:.5'
255 +3.963426 O b'lemma:ompr'
256 +3.957893 Med b'+1:lemma:2.0'
257 +3.957602 O b'-1:lemma:lb'
258 +3.947299 Med b'+1:lemma:contain'
259 +3.938942 Gtype b'lemma:pk4854'
260 +3.936645 Supp b'lemma:Leu'
261 +3.914299 Supp b'+1:lemma:2'
262 +3.906482 O b'lemma:Fur'
263 +3.904505 Gversion b'lemma:u00096'
264 +3.904505 Gversion b'+1:lemma:.2'
265 +3.885731 Technique b'-1:lemma:input'
266 +3.877441 O b'-1:lemma:\xc2\xb0c'
267 +3.864432 Gversion b'lemma:.2'
268 +3.864432 Gversion b'-1:lemma:u00096'
269 +3.854046 Substrain b'+1:lemma:phtpg'
270 +3.853262 O b'lemma:s'
271 +3.836382 O b'-1:lemma:type'
272 +3.827763 O b'lemma:soxs'
273 +3.827763 O b'lemma:soxr'
274 +3.789271 Technique b'-1:lemma:chip-exo'
275 +3.787785 Gtype b'lemma:deltaseqa'
276 +3.787785 Gtype b'-1:lemma:old'
277 +3.767682 O b'+1:lemma:43'
278 +3.736088 Air b'lemma:Anaerobic'
279 +3.723469 O b'lemma:argr'
280 +3.707152 Med b'lemma:L'
281 +3.707152 Med b'+1:lemma:broth'
282 +3.693116 O b'lemma:purr'
283 +3.692833 Gtype b'lemma:\xce\xb4oxyr'
284 +3.687510 Gtype b'-1:lemma:ptac'
285 +3.668468 Gtype b'lemma:\xce\xb4soxr'
286 +3.661904 Supp b'lemma:Adenine'
287 +3.646336 O b'+1:lemma:chip-seq'
288 +3.633392 O b'lemma:Lrp'
289 +3.623734 Supp b'lemma:nacl'
290 +3.589169 O b'lemma:chip-fnr'
291 +3.581632 Med b'lemma:glucose'
292 +3.560733 Supp b'lemma:iptg'
293 +3.552695 O b'lemma:delta'
294 +3.509922 O b'postag:VBN'
295 +3.445521 Med b'lemma:m63'
296 +3.434419 Temp b'lemma:37'
297 +3.388210 Temp b'-1:lemma:43'
298 +3.386139 Gtype b'+1:lemma:knock-out'
299 +3.379573 Temp b'-1:lemma:sample'
300 +3.363114 Anti b'+1:lemma:antibody'
301 +3.344587 Gtype b'+1:lemma:flagtag'
302 +3.343995 Gtype b'lemma:dfnr'
303 +3.336085 Med b'+1:lemma:minimal'
304 +3.329171 Gtype b'-1:lemma:rpob'
305 +3.326850 Supp b'+1:lemma:Deficient'
306 +3.310593 Phase b'-1:lemma:until'
307 +3.304072 O b'lemma:supplement'
308 +3.302560 O b'+1:lemma:37'
309 +3.285761 Gtype b'lemma:ptac'
310 +3.282886 OD b'lemma:0.3'
311 +3.280884 Gversion b'lemma:000913'
312 +3.277170 Temp b'lemma:\xc2\xb0c'
313 +3.267178 pH b'lemma:.5'
314 +3.267178 pH b'-1:lemma:ph5'
315 +3.266678 Air b'lemma:anaerobically'
316 +3.258256 Med b'-1:lemma:LB'
317 +3.245906 O b'lemma:at'
318 +3.227428 Temp b'lemma:43'
319 +3.174057 O b'postag:DT'
320 +3.124069 O b'-1:lemma:000913'
321 +3.122174 O b'-1:lemma:min'
322 +3.116791 Air b'postag:RB'
323 +3.107732 O b'lemma:pt7'
324 +3.040595 Gtype b'lemma:\xce\xb4gadw'
325 +3.006824 O b'-1:lemma:stpa'
326 +2.984377 OD b'-1:lemma:~'
327 +2.941221 Supp b'-1:lemma:\xc2\xb5m'
328 +2.928646 Gversion b'lemma:_'
329 +2.927017 Gtype b'lemma:soxs-8myc'
330 +2.927017 Gtype b'lemma:soxr-8myc'
331 +2.916978 O b'+1:lemma:mid-log'
332 +2.899360 OD b'lemma:0.35'
333 +2.886577 Supp b'+1:lemma:iptg'
334 +2.876433 Supp b'lemma:leucine'
335 +2.857437 Gtype b'lemma:purr-8myc'
336 +2.854621 O b'postag:SYM'
337 +
338 +
339 +Top negative:
340 +-0.011735 OD b'+1:lemma:0.4'
341 +-0.012143 O b'+1:lemma:0.4'
342 +-0.012737 OD b'+1:postag:DT'
343 +-0.013567 Gtype b'+1:postag:IN'
344 +-0.014658 Supp b'-1:postag:NN'
345 +-0.015388 O b'+1:lemma:co2'
346 +-0.019498 O b'+1:lemma:25'
347 +-0.024384 O b'-1:lemma:mm'
348 +-0.026473 OD b'+1:lemma:~'
349 +-0.029376 Supp b'-1:lemma:and'
350 +-0.030278 O b'lemma:\xc2\xb0c'
351 +-0.033870 Gtype b'-1:lemma:mg1655'
352 +-0.035964 O b'+1:lemma:rep1'
353 +-0.036870 Anti b'-1:postag:NN'
354 +-0.037674 Med b'-1:lemma:m63'
355 +-0.041457 O b'-1:lemma:for'
356 +-0.042315 O b'-1:lemma:in'
357 +-0.043253 O b'+1:lemma:5'
358 +-0.054536 O b'+1:lemma:antibody'
359 +-0.056675 OD b'+1:postag:CD'
360 +-0.057484 O b'-1:postag:VBN'
361 +-0.057779 O b'postag:RB'
362 +-0.061765 O b'-1:lemma:at'
363 +-0.063699 O b'-1:lemma:of'
364 +-0.073053 Phase b'+1:postag:NN'
365 +-0.081144 O b'-1:lemma:o2'
366 +-0.083185 O b'+1:lemma:-lcb-'
367 +-0.085303 O b'lemma:o2'
368 +-0.090545 Gversion b'-1:postag:NN'
369 +-0.090810 Gtype b'lemma:fnr'
370 +-0.094072 O b'-1:postag:IN'
371 +-0.095560 O b'+1:lemma:300'
372 +-0.095835 O b'+1:lemma:culture'
373 +-0.099835 Gtype b'-1:postag:NN'
374 +-0.104011 Gtype b'-1:postag:DT'
375 +-0.104295 O b'-1:lemma:-lrb-'
376 +-0.104392 O b'+1:postag:CD'
377 +-0.106605 O b'lemma:e.'
378 +-0.114926 O b'lemma:aerobically'
379 +-0.123054 Supp b'-1:lemma:-'
380 +-0.124613 O b'-1:postag:-LRB-'
381 +-0.130629 O b'lemma:lb'
382 +-0.135809 O b'-1:lemma:delta'
383 +-0.137484 O b'+1:lemma:delta'
384 +-0.145435 O b'-1:lemma:30'
385 +-0.147772 Phase b'lemma:pahse'
386 +-0.151829 O b'-1:lemma:e.'
387 +-0.152470 Phase b'+1:lemma:pahse'
388 +-0.153511 O b'+1:lemma:arginine'
389 +-0.156235 Temp b'-1:postag:IN'
390 +-0.165736 O b'lemma:n2'
391 +-0.167320 O b'lemma:m63'
392 +-0.171593 O b'lemma:mg1655'
393 +-0.171854 O b'lemma:mg/ml'
394 +-0.171854 O b'-1:lemma:150'
395 +-0.172097 O b'+1:lemma:grow'
396 +-0.174184 O b'+1:lemma:-rrb-'
397 +-0.176692 O b'lemma:medium'
398 +-0.190407 O b'-1:lemma:n2'
399 +-0.193709 Technique b'-1:lemma::'
400 +-0.197195 Supp b'+1:lemma:rifampicin'
401 +-0.199110 Supp b'+1:lemma:acetate'
402 +-0.211904 Supp b'lemma:and'
403 +-0.212434 O b'-1:lemma:fresh'
404 +-0.215208 Med b'+1:postag:CC'
405 +-0.223718 Supp b'+1:postag:VBN'
406 +-0.225529 Gtype b'+1:lemma:\xe2\x88\x86'
407 +-0.227506 O b'-1:lemma:iptg'
408 +-0.228252 O b'lemma:grow'
409 +-0.244665 O b'+1:postag:-RRB-'
410 +-0.248414 Phase b'-1:lemma:at'
411 +-0.252297 O b'lemma:co2'
412 +-0.262234 O b'+1:lemma:until'
413 +-0.264339 Med b'+1:postag:NNS'
414 +-0.271018 Supp b'lemma:2'
415 +-0.271420 O b'+1:lemma:mm'
416 +-0.274442 Air b'-1:lemma:or'
417 +-0.275533 Air b'+1:postag:NNP'
418 +-0.282985 O b'lemma:k-12'
419 +-0.283867 pH b'postag:NN'
420 +-0.289565 O b'+1:postag:IN'
421 +-0.292519 Gtype b'postag:CD'
422 +-0.299546 Supp b'-1:postag:VBG'
423 +-0.302599 Supp b'-1:postag:NNP'
424 +-0.304446 OD b'+1:postag:NN'
425 +-0.304579 O b'-1:lemma:0.1'
426 +-0.308132 O b'lemma:30'
427 +-0.315401 O b'+1:postag:VBG'
428 +-0.328419 O b'lemma:\xe2\x88\x86'
429 +-0.334410 O b'+1:lemma:phase'
430 +-0.339050 Med b'postag:CD'
431 +-0.339573 O b'lemma:cell'
432 +-0.349122 Gversion b'+1:postag:NN'
433 +-0.350590 O b'+1:lemma:cell'
434 +-0.353274 O b'-1:lemma:from'
435 +-0.356173 O b'-1:lemma:minimal'
436 +-0.365952 Technique b'-1:postag::'
437 +-0.377481 O b'lemma:m9'
438 +-0.387016 O b'-1:lemma:um'
439 +-0.387016 O b'+1:lemma:paraquat'
440 +-0.396762 O b'lemma:minimal'
441 +-0.399415 Med b'+1:postag:IN'
442 +-0.400174 O b'-1:lemma:.'
443 +-0.400174 O b'-1:postag:.'
444 +-0.405899 Gtype b'+1:lemma:fnr'
445 +-0.420549 Supp b'+1:postag:IN'
446 +-0.426457 Temp b'postag:NN'
447 +-0.443672 Med b'+1:postag:NN'
448 +-0.454874 O b'-1:lemma:with'
449 +-0.455975 O b'-1:lemma:od600'
450 +-0.460725 O b'lemma:od600'
451 +-0.476927 O b'lemma:purify'
452 +-0.513975 O b'lemma:glucose'
453 +-0.514369 O b'+1:lemma:shake'
454 +-0.516980 O b'lemma:anaerobic'
455 +-0.543673 O b'+1:lemma:dissolve'
456 +-0.575481 OD b'+1:postag:CC'
457 +-0.575983 O b'+1:lemma:%'
458 +-0.578458 O b'+1:lemma:\xc2\xb0c'
459 +-0.579234 O b'+1:lemma:.'
460 +-0.579234 O b'+1:postag:.'
461 +-0.590831 O b'-1:lemma:until'
462 +-0.593646 O b'+1:lemma:minimal'
463 +-0.598050 O b'-1:lemma:rifampicin'
464 +-0.599013 Anti b'+1:postag:JJ'
465 +-0.600816 O b'-1:lemma:grow'
466 +-0.609501 O b'+1:postag:NNS'
467 +-0.616694 O b'lemma:mid-log'
468 +-0.622505 OD b'lemma:-lrb-'
469 +-0.681173 Temp b'postag:JJ'
470 +-0.682190 O b'lemma:\xce\xb4fur'
471 +-0.683721 Phase b'-1:postag:NN'
472 +-0.699842 O b'-1:lemma:cra'
473 +-0.699863 O b'+1:lemma:or'
474 +-0.706770 OD b'postag:-LRB-'
475 +-0.726480 Anti b'+1:lemma:anti-fur'
476 +-0.739377 O b'+1:lemma:0.3'
477 +-0.759701 O b'lemma:0.2'
478 +-0.767286 O b'-1:lemma:mid-log'
479 +-0.769781 O b'lemma:dissolve'
480 +-0.773573 O b'+1:lemma:_'
481 +-0.796461 OD b'postag:JJ'
482 +-0.804019 O b'+1:lemma:fecl2'
483 +-0.817322 O b'+1:lemma:c'
484 +-0.823112 O b'-1:lemma:0.2'
485 +-0.856156 O b'lemma:37'
486 +-0.874673 O b'-1:lemma:ml'
487 +-0.897793 O b'lemma:dpd'
488 +-0.928954 O b'lemma:0.3'
489 +-0.935642 O b'lemma:media'
490 +-0.953224 Supp b'postag:CC'
491 +-0.962174 O b'lemma:150'
492 +-0.962174 O b'+1:lemma:mg/ml'
493 +-0.991546 Temp b'+1:lemma:to'
494 +-0.991546 Temp b'+1:postag:TO'
495 +-0.996229 O b'-1:lemma:co2'
496 +-1.000642 O b'lemma:of'
497 +-1.005860 O b'-1:postag::'
498 +-1.099013 O b'lemma:20'
499 +-1.117230 O b'-1:lemma:~'
500 +-1.139604 Air b'postag:NN'
501 +-1.144681 OD b'+1:lemma:and'
502 +-1.201437 O b'-1:lemma:dissolve'
503 +-1.201437 O b'+1:lemma:methanol'
504 +-1.236667 O b'-1:lemma:sample'
505 +-1.244866 O b'lemma:wt'
506 +-1.252945 O b'+1:lemma:g/l'
507 +-1.255778 O b'-1:lemma:37'
508 +-1.264177 Med b'-1:postag:IN'
509 +-1.288204 Supp b'-1:lemma:%'
510 +-1.302552 O b'lemma:0.1'
511 +-1.371688 O b'lemma:anaerobically'
512 +-1.410598 O b'lemma:phase'
513 +-1.489551 O b'+1:lemma:+'
514 +-1.506032 O b'+1:lemma:supplement'
515 +-1.532398 O b'lemma:2h'
516 +-1.532398 O b'-1:lemma:additional'
517 +-1.534894 Air b'+1:postag:JJ'
518 +-1.548281 O b'+1:lemma:in'
519 +-1.565843 Anti b'postag:NNP'
520 +-1.638452 O b'postag:VBP'
521 +-1.657980 O b'lemma:rifampicin'
522 +-1.822232 O b'+1:lemma:at'
523 +-1.836234 O b'-1:postag:VBG'
524 +-1.854980 O b'-1:lemma:IP'
525 +-1.925222 O b'-1:lemma:2'
526 +-1.936575 O b'lemma:fecl2'
527 +-1.945977 Air b'-1:postag:JJ'
528 +-2.044512 O b'-1:lemma:nsrr'
529 +-2.267485 Phase b'-1:postag:JJ'
530 +-2.297463 O b'-1:lemma:rpob'
531 +-2.299343 O b'+1:lemma:hour'
532 +-2.321612 O b'lemma:methanol'
533 +-2.467134 O b'-1:lemma:ompr'
534 +-2.690673 Supp b'postag:JJ'
535 +-3.133082 O b'+1:lemma:2'
536 +-3.547425 Phase b'postag:JJ'
537 +-3.603436 O b'+1:lemma:1'
538 +-4.365017 O b'-1:lemma:_'
539 +-5.016691 O b'-1:lemma::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.20645497959678813, 'c2': 0.017572644849766363}
5 +best CV score:0.7977309817293199
6 +model size: 0.06M
7 +
8 +Flat F1: 0.784234402165812
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.800 0.842 0.821 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.895 0.944 19
19 + Air 0.807 0.742 0.773 62
20 + Anti 1.000 0.444 0.615 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.866 0.792 0.828 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.818 0.662 0.732 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.859 0.740 0.784 480
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 6.425591
32 +OD -> OD 5.826527
33 +Temp -> Temp 5.548786
34 +Med -> Med 5.385181
35 +Anti -> Anti 5.274493
36 +Air -> Air 5.273813
37 +Phase -> Phase 4.728944
38 +Gtype -> Gtype 4.437920
39 +Gversion -> Gversion 4.349816
40 +O -> O 4.268725
41 +Supp -> Supp 3.971396
42 +Technique -> Technique 3.834387
43 +pH -> pH 2.157022
44 +O -> Supp 1.775419
45 +Gtype -> Supp 1.744617
46 +Substrain -> Gtype 1.701513
47 +Air -> O 1.650192
48 +O -> Gtype 1.270693
49 +O -> Technique 1.253973
50 +Technique -> Air 0.977999
51 +O -> Anti 0.884095
52 +O -> Temp 0.762865
53 +Med -> O 0.608226
54 +OD -> Phase 0.415201
55 +Temp -> O 0.355964
56 +Gtype -> Air 0.349440
57 +O -> Gversion 0.343072
58 +Gtype -> pH 0.331974
59 +O -> Med 0.236453
60 +Phase -> O 0.133978
61 +Supp -> O 0.132584
62 +O -> Strain 0.015869
63 +O -> Phase 0.000848
64 +O -> OD -0.004725
65 +Technique -> OD -0.009324
66 +Technique -> O -0.010626
67 +Technique -> pH -0.028457
68 +Supp -> Air -0.068848
69 +OD -> Air -0.084459
70 +Supp -> OD -0.119773
71 +OD -> Med -0.182306
72 +Gtype -> Med -0.204270
73 +Technique -> Gtype -0.212005
74 +Gtype -> O -0.327673
75 +Gtype -> Anti -0.356226
76 +O -> Air -0.606839
77 +OD -> O -0.668006
78 +Agit -> O -0.720674
79 +Substrain -> O -0.778994
80 +Supp -> Med -0.779854
81 +
82 +
83 +Top unlikely transitions:
84 +Med -> Med 5.385181
85 +Anti -> Anti 5.274493
86 +Air -> Air 5.273813
87 +Phase -> Phase 4.728944
88 +Gtype -> Gtype 4.437920
89 +Gversion -> Gversion 4.349816
90 +O -> O 4.268725
91 +Supp -> Supp 3.971396
92 +Technique -> Technique 3.834387
93 +pH -> pH 2.157022
94 +O -> Supp 1.775419
95 +Gtype -> Supp 1.744617
96 +Substrain -> Gtype 1.701513
97 +Air -> O 1.650192
98 +O -> Gtype 1.270693
99 +O -> Technique 1.253973
100 +Technique -> Air 0.977999
101 +O -> Anti 0.884095
102 +O -> Temp 0.762865
103 +Med -> O 0.608226
104 +OD -> Phase 0.415201
105 +Temp -> O 0.355964
106 +Gtype -> Air 0.349440
107 +O -> Gversion 0.343072
108 +Gtype -> pH 0.331974
109 +O -> Med 0.236453
110 +Phase -> O 0.133978
111 +Supp -> O 0.132584
112 +O -> Strain 0.015869
113 +O -> Phase 0.000848
114 +O -> OD -0.004725
115 +Technique -> OD -0.009324
116 +Technique -> O -0.010626
117 +Technique -> pH -0.028457
118 +Supp -> Air -0.068848
119 +OD -> Air -0.084459
120 +Supp -> OD -0.119773
121 +OD -> Med -0.182306
122 +Gtype -> Med -0.204270
123 +Technique -> Gtype -0.212005
124 +Gtype -> O -0.327673
125 +Gtype -> Anti -0.356226
126 +O -> Air -0.606839
127 +OD -> O -0.668006
128 +Agit -> O -0.720674
129 +Substrain -> O -0.778994
130 +Supp -> Med -0.779854
131 +Gtype -> OD -1.018491
132 +Med -> Supp -1.466475
133 +Phase -> OD -1.648011
134 +
135 +
136 +Top positive:
137 +7.908476 O b'lemma:_'
138 +7.519798 Supp b'lemma:Iron'
139 +6.893122 Air b'lemma:anaerobic'
140 +6.757207 O b'lemma:1'
141 +6.526738 Air b'lemma:aerobic'
142 +6.246963 Phase b'lemma:stationary'
143 +6.216490 Strain b'lemma:k-12'
144 +6.159501 Technique b'lemma:ChIP-exo'
145 +5.949001 Supp b'lemma:nitrate'
146 +5.557355 O b'lemma:rpob'
147 +5.556016 Phase b'lemma:mid-log'
148 +5.526361 O b'lemma:2'
149 +5.249738 Technique b'lemma:chipseq'
150 +5.235064 Air b'-1:lemma:ChIP-Seq'
151 +5.174037 O b'lemma:3'
152 +5.050373 O b'postag:IN'
153 +4.985897 Supp b'lemma:pq'
154 +4.921604 Substrain b'lemma:mg1655'
155 +4.872348 Gtype b'lemma:\xce\xb4cra'
156 +4.847236 Gversion b'lemma:asm584v2'
157 +4.762705 O b'-1:lemma:tag'
158 +4.726906 OD b'lemma:od600'
159 +4.660975 Gtype b'lemma:type'
160 +4.628321 O b'lemma:Custom'
161 +4.553702 O b'lemma:rep1'
162 +4.515219 Med b'lemma:MOPS'
163 +4.507185 Gversion b'lemma:nc'
164 +4.464398 Air b'lemma:Aerobic'
165 +4.453084 Med b'lemma:LB'
166 +4.447532 Supp b'+1:lemma:\xc2\xb5m'
167 +4.437438 O b'-1:lemma:ChIP-exo'
168 +4.404385 O b'lemma:rep2'
169 +4.373164 Supp b'lemma:nh4cl'
170 +4.365179 Gtype b'lemma:flag-tag'
171 +4.365179 Gtype b'-1:lemma:c-terminal'
172 +4.354053 O b'postag::'
173 +4.297412 O b'lemma:\xcf\x8332'
174 +4.284115 Gtype b'+1:lemma:type'
175 +4.278996 O b'lemma:b'
176 +4.258389 Gtype b'-1:lemma:\xe2\x88\x86'
177 +4.241615 Gtype b'lemma:wt'
178 +4.228158 Med b'lemma:lb'
179 +4.218565 Gtype b'lemma:arca8myc'
180 +4.218302 Technique b'lemma:ChIP-Seq'
181 +4.180599 Supp b'lemma:glucose'
182 +4.160405 O b'lemma:rep3'
183 +4.081991 O b'lemma:Cra'
184 +3.932540 O b'lemma:a'
185 +3.844330 Technique b'lemma:rna-seq'
186 +3.814781 Gtype b'lemma:delta-arca'
187 +3.626994 Gtype b'lemma:fnr8myc'
188 +3.606800 Supp b'lemma:acetate'
189 +3.573033 Supp b'lemma:Fe'
190 +3.567339 Technique b'lemma:chip-seq'
191 +3.563430 Supp b'lemma:rifampicin'
192 +3.511453 OD b'lemma:od450'
193 +3.511078 O b'lemma:.'
194 +3.511078 O b'postag:.'
195 +3.438725 Temp b'-1:lemma:sample'
196 +3.393434 Supp b'lemma:dpd'
197 +3.386786 Supp b'lemma:no3'
198 +3.357689 Anti b'lemma:none'
199 +3.339507 Med b'+1:lemma:0.4'
200 +3.299536 Gtype b'lemma:\xe2\x88\x86'
201 +3.288791 O b'+1:lemma:od600'
202 +3.273758 Technique b'lemma:rnaseq'
203 +3.258770 Gtype b'lemma:delta-fnr'
204 +3.254592 O b'postag:VBN'
205 +3.228840 Supp b'lemma:fructose'
206 +3.219194 Gtype b'lemma:\xce\xb4fur'
207 +3.208266 O b'lemma:CEL'
208 +3.206942 Gtype b'lemma:nsrr'
209 +3.177189 O b'-1:lemma:Aerobic'
210 +3.129133 Technique b'-1:lemma:IP'
211 +3.128048 O b'-1:lemma:0.3-0.35'
212 +3.112441 O b'-1:lemma:type'
213 +3.098709 Gversion b'-1:lemma:nc'
214 +3.051729 Vess b'lemma:flask'
215 +3.051729 Vess b'-1:lemma:warm'
216 +3.008997 Supp b'+1:lemma:1'
217 +2.987213 pH b'lemma:ph5'
218 +2.987213 pH b'+1:lemma:.5'
219 +2.977412 Anti b'lemma:seqa'
220 +2.955347 Gtype b'lemma:wild-type'
221 +2.951444 O b'lemma:-'
222 +2.937212 O b'+1:postag:RB'
223 +2.916028 Supp b'lemma:arginine'
224 +2.840642 O b'-1:lemma:glucose'
225 +2.820501 Gtype b'+1:lemma:ph5'
226 +2.791333 Anti b'+1:lemma:antibody'
227 +2.774879 Gversion b'lemma:chip-seq'
228 +2.771457 Anti b'lemma:anti-myc'
229 +2.765907 O b'lemma:chip'
230 +2.735612 Gtype b'+1:lemma:flagtag'
231 +2.722421 Med b'lemma:m63'
232 +2.721104 O b'lemma:or'
233 +2.699589 Temp b'lemma:\xc2\xb0c'
234 +2.681423 O b'lemma:with'
235 +2.669444 Med b'+1:lemma:minimal'
236 +2.666834 Gversion b'lemma:u00096'
237 +2.666834 Gversion b'+1:lemma:.2'
238 +2.660974 Gtype b'+1:lemma:pq'
239 +2.654887 OD b'+1:lemma:stationary'
240 +2.644825 Gtype b'+1:lemma:with'
241 +2.637258 Supp b'+1:lemma:2'
242 +2.578615 Gtype b'lemma:\xce\xb4ompr'
243 +2.569922 O b'-1:lemma:anaerobic'
244 +2.559477 Temp b'+1:lemma:\xc2\xb0c'
245 +2.558177 Supp b'-1:lemma:Cra'
246 +2.555473 Gversion b'lemma:.2'
247 +2.555473 Gversion b'-1:lemma:u00096'
248 +2.519787 O b'postag:CC'
249 +2.515611 Gtype b'-1:lemma:ptac'
250 +2.474668 Technique b'-1:lemma:chip-exo'
251 +2.465084 Gversion b'lemma:000913'
252 +2.459269 O b'+1:lemma:pq'
253 +2.454369 Strain b'+1:lemma:substr'
254 +2.453079 O b'postag:DT'
255 +2.447446 Temp b'-1:lemma:\xcf\x8332'
256 +2.434523 Technique b'+1:lemma:chip-exo'
257 +2.415981 Supp b'lemma:nacl'
258 +2.400381 O b'+1:postag:NNP'
259 +2.395375 O b'lemma:s'
260 +2.378114 pH b'lemma:.5'
261 +2.378114 pH b'-1:lemma:ph5'
262 +2.360956 Med b'+1:lemma:2.0'
263 +2.360197 Temp b'-1:lemma:43'
264 +2.354343 O b'-1:lemma:lb'
265 +2.333302 Gtype b'-1:lemma:rpob'
266 +2.331044 Supp b'+1:lemma:_'
267 +2.265756 O b'-1:lemma:l1'
268 +2.256561 Supp b'-1:lemma:\xc2\xb5m'
269 +2.253390 Air b'-1:lemma:-'
270 +2.244110 O b'lemma:oxyr'
271 +2.231965 Air b'lemma:anaerobically'
272 +2.211436 Anti b'lemma:anti-rpos'
273 +2.204750 O b'-1:lemma:stpa'
274 +2.194353 OD b'-1:lemma:~'
275 +2.189113 Gtype b'lemma:\xce\xb4soxs'
276 +2.183419 Air b'postag:RB'
277 +2.170778 Supp b'-1:lemma:+'
278 +2.151725 Temp b'lemma:43'
279 +2.127054 Temp b'lemma:37'
280 +2.124907 Supp b'lemma:20'
281 +2.115142 O b'+1:lemma:o.d.'
282 +2.111143 Supp b'lemma:Leu'
283 +2.107663 O b'lemma:Lrp'
284 +2.107472 Temp b'-1:lemma:37'
285 +2.094728 Gtype b'lemma:ptac'
286 +2.073944 O b'+1:lemma:chip-seq'
287 +2.062388 Supp b'lemma:iptg'
288 +2.042062 O b'lemma:affyexp'
289 +2.040790 Med b'lemma:media'
290 +2.031837 O b'lemma:culture'
291 +2.024586 O b'postag:VBG'
292 +2.010374 Vess b'-1:postag:VBN'
293 +2.009201 Gtype b'lemma:pk4854'
294 +2.002974 Med b'lemma:glucose'
295 +1.998019 Gtype b'+1:lemma:_'
296 +1.980119 Supp b'+1:lemma:hour'
297 +1.975173 O b'lemma:chip-arca'
298 +1.959423 Supp b'-1:lemma:with'
299 +1.922869 Technique b'-1:lemma:input'
300 +1.921526 O b'lemma:ompr'
301 +1.910950 Med b'lemma:broth'
302 +1.910950 Med b'-1:lemma:L'
303 +1.910014 Gtype b'lemma:deltaseqa'
304 +1.910014 Gtype b'-1:lemma:old'
305 +1.905010 O b'-1:lemma:0.3'
306 +1.901042 OD b'lemma:0.3'
307 +1.900964 Med b'lemma:L'
308 +1.900964 Med b'+1:lemma:broth'
309 +1.899018 Supp b'+1:lemma:Deficient'
310 +1.893912 Air b'lemma:Anaerobic'
311 +1.893451 Med b'-1:lemma:ml'
312 +1.870173 O b'lemma:Fur'
313 +1.848155 pH b'+1:postag:CD'
314 +1.823256 O b'+1:lemma:coli'
315 +1.818674 Gtype b'-1:lemma:nsrr'
316 +1.808290 Substrain b'+1:lemma:phtpg'
317 +1.805456 Phase b'-1:lemma:mid-log'
318 +1.797810 Med b'-1:lemma:glucose'
319 +1.796653 Supp b'lemma:methanol'
320 +1.792416 O b'-1:lemma:\xc2\xb0c'
321 +1.781537 Supp b'lemma:Adenine'
322 +1.779649 Air b'-1:lemma:grow'
323 +1.777565 Technique b'+1:lemma:rna-seq'
324 +1.776039 OD b'lemma:phase'
325 +1.775438 O b'lemma:condition'
326 +1.763711 Air b'-1:lemma:phase'
327 +1.762988 Gversion b'lemma:_'
328 +1.759448 O b'-1:lemma:dpd'
329 +1.736984 O b'lemma:genotype/variation'
330 +1.716841 O b'lemma:argr'
331 +1.709815 O b'postag:VBD'
332 +1.687639 Air b'-1:lemma:co2'
333 +1.686138 Gversion b'postag:CD'
334 +1.676377 Phase b'-1:lemma:until'
335 +1.672693 Gtype b'lemma:WT'
336 +1.671406 Phase b'+1:lemma:for'
337 +
338 +
339 +Top negative:
340 +0.015104 Vess b'+1:postag:IN'
341 +0.014949 OD b'-1:lemma:to'
342 +0.014949 OD b'-1:postag:TO'
343 +0.013481 O b'+1:lemma:nitrate'
344 +0.012911 O b'-1:lemma:mm'
345 +0.012853 O b'-1:lemma:m63'
346 +0.012845 Phase b'+1:lemma:aerobically'
347 +0.012531 Phase b'+1:postag:RB'
348 +0.010531 OD b'+1:lemma:coli'
349 +0.010197 Air b'postag:-LRB-'
350 +0.007967 O b'+1:lemma:mm'
351 +0.007668 Air b'-1:postag:-LRB-'
352 +0.007530 O b'+1:lemma:wt'
353 +0.007412 O b'+1:lemma:for'
354 +0.006443 Med b'-1:lemma:g/l'
355 +0.005977 Temp b'-1:postag:NN'
356 +0.005028 OD b'-1:lemma:growth'
357 +0.004138 Technique b'-1:postag:NN'
358 +0.003008 O b'lemma:fresh'
359 +0.002709 O b'+1:postag:SYM'
360 +0.002549 Supp b'+1:lemma:deficient'
361 +0.002275 Technique b'+1:postag:-RRB-'
362 +0.002219 O b'+1:lemma:_'
363 +0.001749 O b'lemma:dpd'
364 +0.001500 Temp b'postag:VB'
365 +0.001375 Gtype b'lemma:cra-8myc-tagged'
366 +0.001375 Gtype b'lemma:fur-8myc'
367 +0.000712 O b'-1:postag:VBZ'
368 +0.000676 O b'+1:lemma:minute'
369 +0.000589 Temp b'+1:postag:NNS'
370 +0.000468 Supp b'lemma:um'
371 +0.000451 Gtype b'lemma:Combined'
372 +0.000451 Gtype b'+1:lemma:input'
373 +0.000346 O b'postag:VBZ'
374 +0.000331 Supp b'-1:lemma:250'
375 +0.000251 O b'lemma:-80'
376 +0.000219 Temp b'-1:postag:NNS'
377 +0.000204 O b'+1:lemma:total'
378 +0.000185 Air b'+1:postag::'
379 +0.000154 O b'-1:lemma:to'
380 +0.000154 O b'-1:postag:TO'
381 +0.000148 Gversion b'-1:postag::'
382 +0.000078 O b'+1:lemma:dpd'
383 +0.000049 Gtype b'lemma:inducible'
384 +0.000049 Gtype b'-1:lemma:carrying'
385 +0.000049 Gtype b'+1:lemma:ptrc'
386 +0.000030 OD b'+1:lemma:0.35'
387 +0.000027 Anti b'+1:lemma:tag'
388 +0.000010 OD b'lemma:mg1655'
389 +0.000009 OD b'-1:lemma:k-12'
390 +-0.000003 Gtype b'+1:postag:NNS'
391 +-0.000145 O b'lemma:m63'
392 +-0.000217 O b'-1:lemma:um'
393 +-0.000217 O b'+1:lemma:paraquat'
394 +-0.000321 Temp b'-1:lemma:\xc2\xb0c'
395 +-0.000324 Agit b'postag:NN'
396 +-0.000658 Gtype b'+1:lemma:\xe2\x88\x86'
397 +-0.000729 Air b'-1:lemma:,'
398 +-0.000729 Air b'-1:postag:,'
399 +-0.001373 Gtype b'postag:CD'
400 +-0.001602 Supp b'+1:lemma:-rrb-'
401 +-0.001692 Supp b'+1:postag:-RRB-'
402 +-0.002077 O b'lemma:lb'
403 +-0.002511 O b'-1:lemma:,'
404 +-0.002511 O b'-1:postag:,'
405 +-0.003565 OD b'+1:postag:-LRB-'
406 +-0.004827 O b'-1:lemma:g/l'
407 +-0.007036 Air b'+1:lemma:25'
408 +-0.007105 O b'+1:lemma:rep1'
409 +-0.007327 Supp b'lemma:mm'
410 +-0.008864 O b'+1:lemma:c'
411 +-0.009788 Air b'-1:lemma:aerobically'
412 +-0.011509 O b'-1:postag:VBN'
413 +-0.011813 O b'+1:lemma:shake'
414 +-0.014323 Air b'lemma:25'
415 +-0.017276 Air b'+1:lemma:-lrb-'
416 +-0.017627 O b'+1:lemma:aerobically'
417 +-0.018161 O b'lemma:e.'
418 +-0.020084 Med b'-1:postag:CD'
419 +-0.021369 O b'lemma:glucose'
420 +-0.023787 Gtype b'-1:postag:NN'
421 +-0.034266 OD b'postag:JJ'
422 +-0.035836 Med b'postag:CD'
423 +-0.043749 O b'-1:lemma:o2'
424 +-0.049597 Gtype b'-1:postag:DT'
425 +-0.050879 O b'+1:lemma:300'
426 +-0.051292 Phase b'-1:postag:JJ'
427 +-0.052811 Phase b'-1:postag:NN'
428 +-0.057812 O b'-1:lemma:rifampicin'
429 +-0.064533 O b'-1:lemma:e.'
430 +-0.067797 O b'+1:lemma:until'
431 +-0.076803 O b'lemma:150'
432 +-0.076803 O b'+1:lemma:mg/ml'
433 +-0.080714 O b'+1:lemma:5'
434 +-0.090840 Gversion b'+1:postag:NN'
435 +-0.099773 OD b'+1:postag:CD'
436 +-0.103778 O b'-1:lemma:-lrb-'
437 +-0.104888 Anti b'+1:postag:JJ'
438 +-0.112142 Technique b'-1:lemma::'
439 +-0.119753 Air b'postag:CC'
440 +-0.127293 O b'-1:lemma:0.1'
441 +-0.127394 O b'+1:lemma:.'
442 +-0.127394 O b'+1:postag:.'
443 +-0.129048 Air b'-1:lemma:or'
444 +-0.129819 O b'-1:lemma:from'
445 +-0.138472 O b'-1:postag:IN'
446 +-0.146711 Air b'-1:postag:CC'
447 +-0.147481 O b'+1:lemma:-rrb-'
448 +-0.155126 O b'+1:lemma:antibody'
449 +-0.155172 O b'lemma:co2'
450 +-0.168963 Gtype b'-1:postag:CD'
451 +-0.169956 O b'+1:postag:NNS'
452 +-0.174119 Supp b'postag:CC'
453 +-0.179294 O b'-1:lemma:1'
454 +-0.191426 O b'+1:lemma:arginine'
455 +-0.196031 O b'-1:lemma:cra'
456 +-0.199976 O b'lemma:anaerobic'
457 +-0.204578 O b'lemma:30'
458 +-0.224096 Med b'-1:postag:NN'
459 +-0.230003 Phase b'+1:postag:NN'
460 +-0.236956 O b'+1:lemma:%'
461 +-0.241481 O b'-1:lemma:30'
462 +-0.264234 Air b'postag:CD'
463 +-0.270046 O b'lemma:\xce\xb4fur'
464 +-0.280726 O b'-1:lemma:ml'
465 +-0.288044 O b'-1:lemma:of'
466 +-0.297076 Med b'+1:postag:NN'
467 +-0.299805 Med b'+1:postag:IN'
468 +-0.302610 Supp b'+1:lemma:rifampicin'
469 +-0.306596 O b'-1:lemma:~'
470 +-0.307778 O b'-1:postag:-LRB-'
471 +-0.310654 O b'lemma:phase'
472 +-0.311969 pH b'postag:NN'
473 +-0.315543 O b'+1:lemma:0.3'
474 +-0.318646 O b'postag:VBP'
475 +-0.335513 OD b'+1:postag:NN'
476 +-0.365484 Temp b'postag:JJ'
477 +-0.402954 O b'lemma:dissolve'
478 +-0.424513 O b'lemma:20'
479 +-0.429534 O b'+1:postag:IN'
480 +-0.437134 O b'-1:lemma:grow'
481 +-0.457631 Phase b'-1:lemma:at'
482 +-0.461721 Temp b'+1:lemma:to'
483 +-0.461721 Temp b'+1:postag:TO'
484 +-0.486817 Supp b'-1:lemma:%'
485 +-0.495015 O b'lemma:0.3'
486 +-0.505567 Temp b'postag:NN'
487 +-0.513730 O b'lemma:od600'
488 +-0.526067 O b'+1:postag:-RRB-'
489 +-0.537886 O b'+1:postag:VBG'
490 +-0.541924 O b'-1:lemma:od600'
491 +-0.549617 O b'-1:lemma:rpob'
492 +-0.561323 O b'-1:lemma:0.2'
493 +-0.568252 OD b'lemma:-lrb-'
494 +-0.568961 Technique b'-1:postag::'
495 +-0.587228 O b'-1:lemma:sample'
496 +-0.604535 O b'+1:lemma:fecl2'
497 +-0.605395 O b'-1:lemma:37'
498 +-0.633419 O b'lemma:anaerobically'
499 +-0.640315 Air b'-1:postag:JJ'
500 +-0.646518 O b'lemma:\xe2\x88\x86'
501 +-0.700705 O b'lemma:fecl2'
502 +-0.713765 O b'-1:postag::'
503 +-0.750660 O b'lemma:media'
504 +-0.798765 O b'+1:lemma:supplement'
505 +-0.840866 O b'-1:postag:VBG'
506 +-0.867716 O b'lemma:0.2'
507 +-0.905677 O b'+1:lemma:cell'
508 +-0.915154 O b'lemma:2h'
509 +-0.915154 O b'-1:lemma:additional'
510 +-0.930586 Supp b'+1:lemma:acetate'
511 +-0.930611 O b'lemma:37'
512 +-0.940854 O b'+1:lemma:g/l'
513 +-0.942447 O b'-1:lemma:co2'
514 +-0.944499 O b'lemma:of'
515 +-0.951108 O b'-1:lemma:dissolve'
516 +-0.951108 O b'+1:lemma:methanol'
517 +-0.962496 O b'+1:lemma:hour'
518 +-0.976720 Phase b'postag:JJ'
519 +-0.983487 O b'lemma:0.1'
520 +-1.035888 O b'-1:lemma:ompr'
521 +-1.064088 O b'+1:lemma:at'
522 +-1.115638 OD b'postag:-LRB-'
523 +-1.121251 O b'lemma:wt'
524 +-1.195033 O b'lemma:mid-log'
525 +-1.218852 O b'+1:lemma:+'
526 +-1.237063 O b'-1:lemma:IP'
527 +-1.311143 O b'-1:lemma:nsrr'
528 +-1.325930 Anti b'postag:NNP'
529 +-1.428005 O b'lemma:rifampicin'
530 +-1.465722 Air b'+1:postag:JJ'
531 +-1.474845 Supp b'postag:JJ'
532 +-1.503128 O b'lemma:methanol'
533 +-1.517666 O b'+1:lemma:in'
534 +-1.685015 Air b'postag:NN'
535 +-2.153686 O b'+1:lemma:2'
536 +-2.261742 O b'-1:lemma:2'
537 +-2.412156 O b'+1:lemma:1'
538 +-4.073767 O b'-1:lemma::'
539 +-4.265465 O b'-1:lemma:_'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.09081091923387723, 'c2': 0.00038627878700387623}
5 +best CV score:0.7938633983009262
6 +model size: 0.07M
7 +
8 +Flat F1: 0.786870840829875
9 + precision recall f1-score support
10 +
11 + OD 0.789 0.405 0.536 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.776 0.912 0.839 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.754 0.742 0.748 62
20 + Anti 0.571 0.444 0.500 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.860 0.811 0.835 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.819 0.699 0.754 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.824 0.765 0.787 480
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 7.136396
32 +OD -> OD 6.491625
33 +Temp -> Temp 6.283086
34 +Anti -> Anti 6.213232
35 +Med -> Med 5.995903
36 +Phase -> Phase 5.618998
37 +Gtype -> Gtype 5.237438
38 +Air -> Air 5.151876
39 +Gversion -> Gversion 4.933604
40 +O -> O 4.847296
41 +Technique -> Technique 4.762002
42 +Supp -> Supp 4.623411
43 +Gtype -> Supp 1.707940
44 +pH -> pH 1.691677
45 +O -> Technique 1.635878
46 +O -> Supp 1.559728
47 +O -> Gtype 1.022656
48 +Substrain -> Gtype 0.995104
49 +O -> Anti 0.583807
50 +O -> Temp 0.467612
51 +O -> Gversion 0.453034
52 +Technique -> Air 0.359052
53 +Air -> O 0.273813
54 +O -> Med 0.235768
55 +Temp -> O 0.115990
56 +Gtype -> Air 0.085028
57 +Med -> O 0.063901
58 +Phase -> Air -0.013623
59 +Strain -> O -0.015319
60 +Air -> Phase -0.018991
61 +O -> OD -0.096097
62 +Supp -> Air -0.152980
63 +Supp -> Gtype -0.172882
64 +O -> Phase -0.230569
65 +Gtype -> Anti -0.419137
66 +Technique -> O -0.465846
67 +Air -> Supp -0.504152
68 +OD -> Air -0.526665
69 +Gtype -> O -0.592689
70 +Supp -> O -0.617787
71 +Technique -> OD -0.675268
72 +Phase -> O -0.805307
73 +OD -> Med -0.848210
74 +O -> Air -0.948079
75 +Supp -> Med -0.972720
76 +Agit -> O -1.032994
77 +OD -> O -1.111875
78 +Gtype -> OD -1.121470
79 +Air -> OD -1.294245
80 +Substrain -> O -1.829982
81 +
82 +
83 +Top unlikely transitions:
84 +Temp -> Temp 6.283086
85 +Anti -> Anti 6.213232
86 +Med -> Med 5.995903
87 +Phase -> Phase 5.618998
88 +Gtype -> Gtype 5.237438
89 +Air -> Air 5.151876
90 +Gversion -> Gversion 4.933604
91 +O -> O 4.847296
92 +Technique -> Technique 4.762002
93 +Supp -> Supp 4.623411
94 +Gtype -> Supp 1.707940
95 +pH -> pH 1.691677
96 +O -> Technique 1.635878
97 +O -> Supp 1.559728
98 +O -> Gtype 1.022656
99 +Substrain -> Gtype 0.995104
100 +O -> Anti 0.583807
101 +O -> Temp 0.467612
102 +O -> Gversion 0.453034
103 +Technique -> Air 0.359052
104 +Air -> O 0.273813
105 +O -> Med 0.235768
106 +Temp -> O 0.115990
107 +Gtype -> Air 0.085028
108 +Med -> O 0.063901
109 +Phase -> Air -0.013623
110 +Strain -> O -0.015319
111 +Air -> Phase -0.018991
112 +O -> OD -0.096097
113 +Supp -> Air -0.152980
114 +Supp -> Gtype -0.172882
115 +O -> Phase -0.230569
116 +Gtype -> Anti -0.419137
117 +Technique -> O -0.465846
118 +Air -> Supp -0.504152
119 +OD -> Air -0.526665
120 +Gtype -> O -0.592689
121 +Supp -> O -0.617787
122 +Technique -> OD -0.675268
123 +Phase -> O -0.805307
124 +OD -> Med -0.848210
125 +O -> Air -0.948079
126 +Supp -> Med -0.972720
127 +Agit -> O -1.032994
128 +OD -> O -1.111875
129 +Gtype -> OD -1.121470
130 +Air -> OD -1.294245
131 +Substrain -> O -1.829982
132 +Phase -> OD -2.186140
133 +Med -> Supp -2.693786
134 +
135 +
136 +Top positive:
137 +12.023128 Supp b'lemma:Iron'
138 +11.905802 Phase b'lemma:stationary'
139 +10.567265 O b'lemma:_'
140 +10.151152 Air b'-1:lemma:ChIP-Seq'
141 +10.090491 Air b'lemma:anaerobic'
142 +9.857898 Supp b'lemma:nitrate'
143 +9.629833 Air b'lemma:aerobic'
144 +9.577578 Technique b'lemma:ChIP-exo'
145 +9.036653 Phase b'lemma:mid-log'
146 +8.721963 Technique b'lemma:chipseq'
147 +8.698132 Med b'lemma:MOPS'
148 +8.668936 O b'lemma:1'
149 +8.103505 O b'-1:lemma:ChIP-exo'
150 +7.864122 O b'lemma:rpob'
151 +7.451473 Strain b'lemma:k-12'
152 +7.338543 O b'lemma:\xcf\x8332'
153 +7.198959 Air b'lemma:Aerobic'
154 +7.109572 Gtype b'lemma:flag-tag'
155 +7.109572 Gtype b'-1:lemma:c-terminal'
156 +7.044978 Supp b'lemma:pq'
157 +6.899921 O b'lemma:2'
158 +6.897717 Gtype b'lemma:\xce\xb4cra'
159 +6.862453 O b'lemma:Custom'
160 +6.848216 O b'-1:lemma:tag'
161 +6.753546 Gversion b'lemma:asm584v2'
162 +6.751054 O b'lemma:b'
163 +6.726115 Technique b'lemma:ChIP-Seq'
164 +6.340232 O b'lemma:a'
165 +6.324324 Med b'lemma:LB'
166 +6.242158 O b'lemma:3'
167 +6.225748 O b'lemma:rep1'
168 +6.183760 Gversion b'-1:lemma:nc'
169 +6.139494 O b'lemma:for'
170 +6.136397 O b'lemma:rep2'
171 +6.112373 O b'-1:lemma:Aerobic'
172 +6.052426 Gtype b'lemma:delta-arca'
173 +6.021691 Supp b'+1:lemma:\xc2\xb5m'
174 +6.014827 O b'lemma:rep3'
175 +5.998278 Gtype b'lemma:arca8myc'
176 +5.864622 Gtype b'lemma:fnr8myc'
177 +5.838163 Supp b'lemma:Fe'
178 +5.756687 Substrain b'lemma:mg1655'
179 +5.628610 O b'lemma:Cra'
180 +5.609519 Gtype b'+1:lemma:type'
181 +5.519720 Gtype b'lemma:type'
182 +5.487434 OD b'+1:lemma:stationary'
183 +5.465596 Gversion b'lemma:nc'
184 +5.326426 Gtype b'lemma:delta-fnr'
185 +5.265842 Anti b'lemma:none'
186 +5.236408 Supp b'lemma:nh4cl'
187 +5.195640 O b'postag::'
188 +5.136998 Med b'lemma:lb'
189 +5.129499 OD b'lemma:od450'
190 +5.106518 Technique b'lemma:rna-seq'
191 +5.084990 O b'-1:lemma:anaerobic'
192 +4.986922 Gtype b'lemma:wt'
193 +4.943309 Supp b'lemma:acetate'
194 +4.879656 Gtype b'lemma:\xce\xb4fur'
195 +4.846650 Technique b'lemma:chip-seq'
196 +4.825495 O b'-1:lemma:0.3-0.35'
197 +4.812352 Gtype b'-1:lemma:\xe2\x88\x86'
198 +4.769607 Supp b'lemma:glucose'
199 +4.759235 O b'-1:lemma:glucose'
200 +4.740317 Supp b'lemma:arginine'
201 +4.738816 Supp b'lemma:rifampicin'
202 +4.701038 Gversion b'lemma:chip-seq'
203 +4.693852 Gtype b'lemma:nsrr'
204 +4.666819 O b'lemma:chip'
205 +4.666777 Air b'lemma:anerobically'
206 +4.665470 O b'lemma:or'
207 +4.635435 Temp b'-1:lemma:sample'
208 +4.605663 O b'lemma:.'
209 +4.605663 O b'postag:.'
210 +4.574656 OD b'lemma:od600'
211 +4.562061 O b'postag:IN'
212 +4.550601 Anti b'lemma:anti-myc'
213 +4.537492 Gtype b'+1:lemma:ph5'
214 +4.523129 O b'-1:lemma:type'
215 +4.511181 Strain b'+1:lemma:substr'
216 +4.493105 O b'+1:lemma:od600'
217 +4.453065 Vess b'lemma:flask'
218 +4.453065 Vess b'-1:lemma:warm'
219 +4.444663 Supp b'lemma:no3'
220 +4.439485 Technique b'+1:lemma:chip-exo'
221 +4.416181 Gtype b'+1:lemma:flagtag'
222 +4.415325 Supp b'-1:lemma:Cra'
223 +4.396509 Supp b'-1:lemma:+'
224 +4.383866 Gtype b'-1:lemma:ptac'
225 +4.365928 O b'+1:lemma:sparging'
226 +4.354534 O b'+1:postag:RB'
227 +4.348783 Med b'+1:lemma:0.4'
228 +4.341814 O b'-1:lemma:0.3'
229 +4.336456 O b'+1:lemma:chip-seq'
230 +4.269719 O b'lemma:with'
231 +4.219193 Anti b'lemma:seqa'
232 +4.194527 Supp b'+1:lemma:hour'
233 +4.154345 Gtype b'lemma:\xe2\x88\x86'
234 +4.105460 O b'lemma:-'
235 +4.045385 Temp b'-1:lemma:\xcf\x8332'
236 +4.041775 Supp b'lemma:dpd'
237 +4.018422 O b'lemma:oxyr'
238 +4.006587 Med b'+1:lemma:minimal'
239 +3.994386 Gtype b'-1:lemma:rpob'
240 +3.943397 pH b'lemma:ph5'
241 +3.943397 pH b'+1:lemma:.5'
242 +3.925919 Supp b'lemma:fructose'
243 +3.913579 Supp b'+1:lemma:1'
244 +3.908385 O b'+1:postag:NNP'
245 +3.803339 O b'+1:lemma:o.d.'
246 +3.758844 Technique b'lemma:rnaseq'
247 +3.730482 Technique b'+1:lemma:rna-seq'
248 +3.692271 Technique b'-1:lemma:IP'
249 +3.684645 O b'lemma:ompr'
250 +3.679500 O b'-1:lemma:lb'
251 +3.642465 Anti b'+1:lemma:antibody'
252 +3.624491 O b'lemma:s'
253 +3.612874 O b'lemma:2-3'
254 +3.606456 Gtype b'+1:lemma:pq'
255 +3.584554 O b'lemma:soxs'
256 +3.584554 O b'lemma:soxr'
257 +3.545385 O b'lemma:CEL'
258 +3.527179 Gversion b'lemma:u00096'
259 +3.527179 Gversion b'+1:lemma:.2'
260 +3.525538 Technique b'-1:lemma:chip-exo'
261 +3.525111 Anti b'lemma:anti-rpos'
262 +3.517186 O b'postag:VBN'
263 +3.489661 Supp b'lemma:Leu'
264 +3.480284 Supp b'lemma:iptg'
265 +3.469423 Temp b'-1:lemma:43'
266 +3.467975 Gtype b'lemma:pk4854'
267 +3.443002 OD b'-1:lemma:about'
268 +3.435842 Gtype b'lemma:\xce\xb4ompr'
269 +3.435548 Med b'+1:lemma:contain'
270 +3.393209 Med b'lemma:m63'
271 +3.390757 Gversion b'lemma:.2'
272 +3.390757 Gversion b'-1:lemma:u00096'
273 +3.365623 Gtype b'lemma:wild-type'
274 +3.365137 Supp b'+1:lemma:_'
275 +3.285213 O b'-1:lemma:\xc2\xb0c'
276 +3.270674 Air b'lemma:anaerobically'
277 +3.263042 Gversion b'lemma:000913'
278 +3.243898 Gtype b'lemma:\xce\xb4soxs'
279 +3.241988 O b'lemma:at'
280 +3.241694 Supp b'lemma:Adenine'
281 +3.235960 Supp b'+1:lemma:2'
282 +3.220154 Substrain b'+1:lemma:phtpg'
283 +3.168232 O b'lemma:chip-arca'
284 +3.124706 Med b'-1:lemma:ml'
285 +3.119275 Gtype b'lemma:deltaseqa'
286 +3.119275 Gtype b'-1:lemma:old'
287 +3.105855 Phase b'-1:lemma:until'
288 +3.104880 Supp b'lemma:nacl'
289 +3.063586 Phase b'+1:lemma:for'
290 +3.062511 O b'lemma:argr'
291 +3.057692 Temp b'-1:lemma:37'
292 +3.030255 O b'lemma:purr'
293 +3.006731 O b'-1:lemma:media'
294 +2.937639 O b'+1:lemma:pq'
295 +2.930516 Supp b'lemma:methanol'
296 +2.918452 Temp b'+1:lemma:\xc2\xb0c'
297 +2.915173 Gtype b'lemma:WT'
298 +2.909714 OD b'-1:lemma:~'
299 +2.900692 Med b'lemma:L'
300 +2.900692 Med b'+1:lemma:broth'
301 +2.899989 Gtype b'lemma:ptac'
302 +2.898928 O b'+1:lemma:rifampicin'
303 +2.878667 pH b'lemma:.5'
304 +2.878667 pH b'-1:lemma:ph5'
305 +2.877293 O b'postag:DT'
306 +2.875342 O b'+1:lemma:mid-log'
307 +2.845729 Gtype b'+1:lemma:with'
308 +2.833330 Temp b'lemma:\xc2\xb0c'
309 +2.821959 Supp b'-1:lemma:\xc2\xb5m'
310 +2.778935 O b'-1:lemma:dpd'
311 +2.716093 Supp b'+1:lemma:iptg'
312 +2.714614 Supp b'+1:lemma:Deficient'
313 +2.696864 Gtype b'-1:lemma:phtpg'
314 +2.687105 Air b'+1:lemma:at'
315 +2.676418 O b'lemma:affyexp'
316 +2.676025 Gtype b'lemma:\xce\xb4soxr'
317 +2.663692 Gtype b'lemma:\xce\xb4oxyr'
318 +2.658854 Med b'+1:lemma:2.0'
319 +2.651224 Gtype b'lemma:dfnr'
320 +2.649576 O b'-1:lemma:l1'
321 +2.620669 Air b'postag:RB'
322 +2.577561 O b'lemma:Lrp'
323 +2.570022 Med b'+1:lemma:g/l'
324 +2.524794 Technique b'-1:lemma:input'
325 +2.523784 O b'-1:lemma:min'
326 +2.517414 O b'+1:postag:VBP'
327 +2.471571 O b'+1:lemma:acetate'
328 +2.464652 O b'postag:VBG'
329 +2.464091 Air b'-1:lemma:phase'
330 +2.457715 OD b'lemma:0.3'
331 +2.425241 Gtype b'lemma:soxs-8myc'
332 +2.425241 Gtype b'lemma:soxr-8myc'
333 +2.406446 O b'+1:lemma:0.2'
334 +2.380330 Temp b'lemma:37'
335 +2.356535 Temp b'lemma:43'
336 +2.340361 Supp b'lemma:leucine'
337 +
338 +
339 +Top negative:
340 +0.000037 OD b'+1:lemma:\xce\xb4soxr'
341 +0.000031 Technique b'+1:lemma:Anaerobic'
342 +0.000028 OD b'-1:lemma:phase'
343 +0.000026 Agit b'-1:postag:VBG'
344 +0.000015 Agit b'+1:postag:IN'
345 +0.000015 O b'lemma:input'
346 +0.000014 OD b'+1:postag:RB'
347 +0.000012 O b'-1:lemma:Fur'
348 +0.000010 Supp b'+1:lemma:feso4'
349 +0.000010 O b'+1:lemma:ChIP-Seq'
350 +0.000005 O b'-1:lemma:250'
351 +0.000004 OD b'lemma::'
352 +0.000004 Agit b'postag:VBG'
353 +0.000003 OD b'postag::'
354 +0.000001 Temp b'-1:postag:NNS'
355 +-0.000008 O b'lemma:mg1655'
356 +-0.000009 O b'-1:lemma:sample'
357 +-0.000026 O b'-1:lemma:e.'
358 +-0.000069 O b'-1:lemma:1'
359 +-0.000210 Gtype b'-1:postag:NNP'
360 +-0.000267 O b'+1:lemma:co2'
361 +-0.000316 Gtype b'+1:lemma:2'
362 +-0.000469 O b'+1:lemma:rep1'
363 +-0.001404 O b'-1:lemma:at'
364 +-0.002564 Med b'+1:postag:CC'
365 +-0.002647 OD b'lemma:~'
366 +-0.002825 O b'lemma:grow'
367 +-0.003590 O b'lemma:anaerobic'
368 +-0.004607 O b'+1:lemma:dissolve'
369 +-0.005282 Supp b'+1:lemma:of'
370 +-0.008060 OD b'+1:postag:-LRB-'
371 +-0.008731 OD b'+1:postag:-RRB-'
372 +-0.009571 O b'-1:lemma:30'
373 +-0.010095 O b'lemma:m63'
374 +-0.010149 Phase b'lemma:pahse'
375 +-0.010429 Gtype b'+1:lemma:-'
376 +-0.012754 Supp b'lemma:mm'
377 +-0.013827 OD b'+1:lemma:~'
378 +-0.014292 O b'+1:lemma:grow'
379 +-0.015852 OD b'+1:postag:CD'
380 +-0.016820 O b'lemma:25'
381 +-0.017788 OD b'+1:lemma:-rrb-'
382 +-0.019574 O b'+1:lemma:~'
383 +-0.025256 O b'+1:lemma:300'
384 +-0.026121 O b'+1:lemma:0.4'
385 +-0.028318 O b'lemma:o2'
386 +-0.029040 Anti b'+1:lemma:anti-fur'
387 +-0.030350 OD b'+1:lemma:0.4'
388 +-0.030931 Med b'+1:postag:NNS'
389 +-0.033897 O b'-1:lemma:mm'
390 +-0.035604 O b'+1:lemma:o2'
391 +-0.035916 O b'-1:lemma:the'
392 +-0.035990 O b'+1:lemma:arginine'
393 +-0.036415 Air b'-1:lemma:anaerobically'
394 +-0.040153 Air b'-1:postag:CC'
395 +-0.041154 O b'+1:lemma:antibody'
396 +-0.041832 Anti b'-1:postag:NN'
397 +-0.045053 OD b'postag:IN'
398 +-0.047572 Gtype b'+1:postag:IN'
399 +-0.053112 Supp b'+1:lemma:dpd'
400 +-0.053329 O b'-1:lemma:minimal'
401 +-0.060576 O b'-1:postag:IN'
402 +-0.065925 O b'lemma:medium'
403 +-0.066353 O b'+1:lemma:25'
404 +-0.076464 Med b'-1:postag:NN'
405 +-0.078443 Supp b'-1:postag:NN'
406 +-0.078865 Strain b'+1:postag:NN'
407 +-0.091232 Air b'postag:CD'
408 +-0.091763 O b'lemma:30'
409 +-0.095656 O b'lemma:lb'
410 +-0.098575 O b'+1:lemma:shake'
411 +-0.100589 Anti b'+1:postag:JJ'
412 +-0.109250 Supp b'+1:postag:IN'
413 +-0.113108 Agit b'postag:NN'
414 +-0.113815 O b'-1:lemma:o2'
415 +-0.117553 O b'+1:lemma:5'
416 +-0.121789 Supp b'lemma:and'
417 +-0.122865 Gtype b'-1:postag:NN'
418 +-0.129126 O b'lemma:glucose'
419 +-0.130690 O b'-1:lemma:of'
420 +-0.132633 O b'+1:lemma:phase'
421 +-0.134059 OD b'+1:postag:CC'
422 +-0.141756 Air b'-1:lemma:95'
423 +-0.143614 O b'-1:lemma:-lrb-'
424 +-0.149012 O b'+1:lemma:\xc2\xb0c'
425 +-0.155150 O b'+1:postag:NNS'
426 +-0.160903 O b'+1:lemma:-rrb-'
427 +-0.161363 O b'lemma:n2'
428 +-0.162536 O b'postag:VBP'
429 +-0.168298 O b'-1:lemma:n2'
430 +-0.171249 O b'-1:postag:VBN'
431 +-0.176512 O b'lemma:aerobically'
432 +-0.186672 O b'lemma:co2'
433 +-0.186766 O b'+1:lemma:_'
434 +-0.196943 Strain b'postag:NN'
435 +-0.201827 O b'+1:lemma:mm'
436 +-0.206537 O b'-1:postag:-LRB-'
437 +-0.214121 Supp b'postag:CC'
438 +-0.215072 O b'lemma:e.'
439 +-0.235225 O b'-1:lemma:0.1'
440 +-0.261548 O b'lemma:\xce\xb4fur'
441 +-0.264532 O b'-1:lemma:rifampicin'
442 +-0.272491 O b'-1:lemma:0.2'
443 +-0.284244 O b'-1:lemma:from'
444 +-0.285360 O b'-1:lemma:until'
445 +-0.286318 Med b'postag:CD'
446 +-0.287177 Air b'-1:lemma:or'
447 +-0.304066 O b'lemma:150'
448 +-0.304066 O b'+1:lemma:mg/ml'
449 +-0.304896 O b'+1:lemma:or'
450 +-0.309102 O b'+1:postag:-RRB-'
451 +-0.331291 Supp b'-1:postag:NNP'
452 +-0.339237 Med b'+1:postag:IN'
453 +-0.352184 O b'postag:RB'
454 +-0.361577 Gtype b'postag:CD'
455 +-0.367320 O b'-1:lemma:ml'
456 +-0.377900 O b'lemma:dissolve'
457 +-0.378236 O b'+1:lemma:.'
458 +-0.378236 O b'+1:postag:.'
459 +-0.378886 O b'+1:lemma:until'
460 +-0.380197 O b'lemma:od600'
461 +-0.380556 O b'-1:lemma:od600'
462 +-0.388687 O b'lemma:phase'
463 +-0.393310 O b'+1:lemma:0.3'
464 +-0.399102 O b'+1:postag:IN'
465 +-0.399168 Temp b'postag:JJ'
466 +-0.401235 O b'-1:lemma:~'
467 +-0.410916 O b'-1:lemma:rpob'
468 +-0.431448 OD b'lemma:-lrb-'
469 +-0.434432 Technique b'-1:lemma::'
470 +-0.444259 Supp b'+1:postag:VBN'
471 +-0.459196 Technique b'-1:postag::'
472 +-0.459291 OD b'+1:lemma:and'
473 +-0.470209 O b'lemma:\xe2\x88\x86'
474 +-0.486291 O b'+1:lemma:%'
475 +-0.518507 O b'-1:lemma:IP'
476 +-0.519707 Phase b'-1:lemma:at'
477 +-0.526678 O b'lemma:dpd'
478 +-0.534234 Air b'-1:postag:JJ'
479 +-0.540044 pH b'postag:NN'
480 +-0.542011 O b'+1:lemma:c'
481 +-0.553114 O b'-1:lemma:grow'
482 +-0.571198 O b'lemma:0.3'
483 +-0.573213 Med b'+1:postag:NN'
484 +-0.592187 Temp b'postag:NN'
485 +-0.596509 O b'-1:lemma:mid-log'
486 +-0.616998 O b'lemma:media'
487 +-0.625245 O b'+1:lemma:cell'
488 +-0.625847 O b'-1:lemma:cra'
489 +-0.668364 Gversion b'+1:postag:NN'
490 +-0.670411 O b'+1:lemma:+'
491 +-0.670829 O b'lemma:anaerobically'
492 +-0.693082 Supp b'+1:lemma:rifampicin'
493 +-0.727079 O b'-1:lemma:dissolve'
494 +-0.727079 O b'+1:lemma:methanol'
495 +-0.738408 O b'+1:lemma:fecl2'
496 +-0.792874 O b'lemma:20'
497 +-0.848889 OD b'+1:postag:NN'
498 +-0.848997 Supp b'+1:lemma:acetate'
499 +-0.861900 Med b'-1:postag:IN'
500 +-0.865385 O b'lemma:0.2'
501 +-0.892622 Temp b'+1:lemma:to'
502 +-0.892622 Temp b'+1:postag:TO'
503 +-0.937071 O b'+1:lemma:supplement'
504 +-0.938360 O b'-1:lemma:co2'
505 +-0.960239 O b'lemma:2h'
506 +-0.960239 O b'-1:lemma:additional'
507 +-0.973815 O b'lemma:of'
508 +-1.034828 O b'+1:postag:VBG'
509 +-1.035428 O b'+1:lemma:g/l'
510 +-1.050289 O b'lemma:mid-log'
511 +-1.059894 O b'-1:postag:VBG'
512 +-1.066538 O b'lemma:0.1'
513 +-1.076597 O b'-1:postag::'
514 +-1.106329 Phase b'+1:postag:NN'
515 +-1.148398 Phase b'-1:postag:NN'
516 +-1.172180 Air b'+1:postag:JJ'
517 +-1.201626 O b'lemma:37'
518 +-1.206293 OD b'postag:-LRB-'
519 +-1.252513 O b'-1:lemma:37'
520 +-1.399946 O b'+1:lemma:at'
521 +-1.439837 O b'lemma:wt'
522 +-1.474885 O b'-1:lemma:nsrr'
523 +-1.521797 O b'lemma:fecl2'
524 +-1.544569 Anti b'postag:NNP'
525 +-1.548585 O b'+1:lemma:hour'
526 +-1.566877 Supp b'-1:lemma:%'
527 +-1.597712 O b'+1:lemma:in'
528 +-1.629113 O b'-1:lemma:ompr'
529 +-1.773656 Air b'postag:NN'
530 +-1.898147 Phase b'-1:postag:JJ'
531 +-2.222432 O b'lemma:rifampicin'
532 +-2.229376 O b'-1:lemma:2'
533 +-2.493849 O b'+1:lemma:1'
534 +-2.529260 O b'lemma:methanol'
535 +-2.707675 O b'+1:lemma:2'
536 +-2.830767 Supp b'postag:JJ'
537 +-2.988306 Phase b'postag:JJ'
538 +-4.548731 O b'-1:lemma::'
539 +-4.577007 O b'-1:lemma:_'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.24365115413317517, 'c2': 0.017603116029939036}
5 +best CV score:0.797058241278945
6 +model size: 0.06M
7 +
8 +Flat F1: 0.7778441635784862
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.800 0.842 0.821 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.895 0.944 19
19 + Air 0.780 0.742 0.760 62
20 + Anti 0.571 0.444 0.500 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.863 0.774 0.816 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.818 0.662 0.732 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.847 0.735 0.778 480
28 +
29 +
30 +Top likely transitions:
31 +OD -> OD 6.460512
32 +Agit -> Agit 6.341871
33 +Temp -> Temp 5.624129
34 +Med -> Med 5.402282
35 +Anti -> Anti 5.214940
36 +Air -> Air 4.858980
37 +Phase -> Phase 4.544403
38 +Gtype -> Gtype 4.483666
39 +Supp -> Supp 4.372876
40 +Gversion -> Gversion 4.358204
41 +O -> O 4.356881
42 +Technique -> Technique 3.828561
43 +O -> Supp 2.070080
44 +Gtype -> Supp 1.938994
45 +pH -> pH 1.828748
46 +Air -> O 1.699192
47 +O -> Technique 1.307357
48 +Substrain -> Gtype 1.254891
49 +O -> Gtype 1.237193
50 +O -> Temp 0.952632
51 +Med -> O 0.819857
52 +OD -> Phase 0.738941
53 +Technique -> Air 0.735321
54 +O -> Anti 0.520896
55 +Supp -> O 0.493996
56 +Temp -> O 0.435644
57 +O -> Gversion 0.368943
58 +Phase -> O 0.362864
59 +Gtype -> pH 0.340162
60 +O -> Med 0.294591
61 +Gtype -> Air 0.108303
62 +O -> Strain 0.008465
63 +Gtype -> O -0.094763
64 +Gtype -> Anti -0.194733
65 +OD -> O -0.218649
66 +Technique -> pH -0.317427
67 +Agit -> O -0.543505
68 +Supp -> Med -0.600692
69 +Gtype -> OD -0.628749
70 +O -> Air -0.839621
71 +Substrain -> O -0.911959
72 +Med -> Supp -1.122521
73 +Phase -> OD -1.131519
74 +
75 +
76 +Top unlikely transitions:
77 +OD -> OD 6.460512
78 +Agit -> Agit 6.341871
79 +Temp -> Temp 5.624129
80 +Med -> Med 5.402282
81 +Anti -> Anti 5.214940
82 +Air -> Air 4.858980
83 +Phase -> Phase 4.544403
84 +Gtype -> Gtype 4.483666
85 +Supp -> Supp 4.372876
86 +Gversion -> Gversion 4.358204
87 +O -> O 4.356881
88 +Technique -> Technique 3.828561
89 +O -> Supp 2.070080
90 +Gtype -> Supp 1.938994
91 +pH -> pH 1.828748
92 +Air -> O 1.699192
93 +O -> Technique 1.307357
94 +Substrain -> Gtype 1.254891
95 +O -> Gtype 1.237193
96 +O -> Temp 0.952632
97 +Med -> O 0.819857
98 +OD -> Phase 0.738941
99 +Technique -> Air 0.735321
100 +O -> Anti 0.520896
101 +Supp -> O 0.493996
102 +Temp -> O 0.435644
103 +O -> Gversion 0.368943
104 +Phase -> O 0.362864
105 +Gtype -> pH 0.340162
106 +O -> Med 0.294591
107 +Gtype -> Air 0.108303
108 +O -> Strain 0.008465
109 +Gtype -> O -0.094763
110 +Gtype -> Anti -0.194733
111 +OD -> O -0.218649
112 +Technique -> pH -0.317427
113 +Agit -> O -0.543505
114 +Supp -> Med -0.600692
115 +Gtype -> OD -0.628749
116 +O -> Air -0.839621
117 +Substrain -> O -0.911959
118 +Med -> Supp -1.122521
119 +Phase -> OD -1.131519
120 +
121 +
122 +Top positive:
123 +7.945473 O b'lemma:_'
124 +7.412165 Supp b'lemma:Iron'
125 +6.926609 Air b'lemma:anaerobic'
126 +6.922674 O b'lemma:1'
127 +6.590205 Air b'lemma:aerobic'
128 +6.223986 Strain b'lemma:k-12'
129 +6.187011 Technique b'lemma:ChIP-exo'
130 +6.130654 Phase b'lemma:stationary'
131 +5.864907 Supp b'lemma:nitrate'
132 +5.653173 O b'lemma:2'
133 +5.610923 O b'lemma:rpob'
134 +5.610415 Air b'-1:lemma:ChIP-Seq'
135 +5.397732 Phase b'lemma:mid-log'
136 +5.255269 O b'lemma:3'
137 +5.206599 Technique b'lemma:chipseq'
138 +4.960561 Substrain b'lemma:mg1655'
139 +4.914901 O b'postag:IN'
140 +4.746312 Supp b'lemma:pq'
141 +4.579937 Gtype b'lemma:\xce\xb4cra'
142 +4.520187 Gtype b'lemma:type'
143 +4.511014 OD b'lemma:od600'
144 +4.469574 Gtype b'lemma:flag-tag'
145 +4.469574 Gtype b'-1:lemma:c-terminal'
146 +4.462810 O b'lemma:rep1'
147 +4.392739 O b'postag::'
148 +4.375670 Gversion b'lemma:asm584v2'
149 +4.369737 O b'lemma:Custom'
150 +4.356443 O b'lemma:b'
151 +4.303923 Technique b'lemma:ChIP-Seq'
152 +4.302917 Med b'lemma:MOPS'
153 +4.296205 Supp b'lemma:nh4cl'
154 +4.281883 Gtype b'-1:lemma:\xe2\x88\x86'
155 +4.279879 Gtype b'+1:lemma:type'
156 +4.270631 O b'lemma:rep2'
157 +4.239368 Air b'lemma:Aerobic'
158 +4.132080 Gversion b'lemma:nc'
159 +4.117043 Med b'lemma:lb'
160 +4.108014 Supp b'+1:lemma:\xc2\xb5m'
161 +4.076646 O b'lemma:rep3'
162 +4.043095 Gtype b'lemma:arca8myc'
163 +4.041495 Supp b'lemma:glucose'
164 +4.033775 O b'lemma:a'
165 +4.027421 O b'-1:lemma:tag'
166 +4.027011 Med b'lemma:LB'
167 +4.007935 O b'lemma:Cra'
168 +3.910498 Gtype b'lemma:wt'
169 +3.905564 O b'-1:lemma:ChIP-exo'
170 +3.870678 O b'lemma:\xcf\x8332'
171 +3.727782 Supp b'lemma:acetate'
172 +3.697104 Gtype b'lemma:delta-arca'
173 +3.647724 Technique b'lemma:rna-seq'
174 +3.633897 Supp b'lemma:Fe'
175 +3.575150 Gtype b'lemma:fnr8myc'
176 +3.536159 OD b'lemma:od450'
177 +3.488627 Supp b'lemma:rifampicin'
178 +3.426221 O b'lemma:.'
179 +3.426221 O b'postag:.'
180 +3.421248 Technique b'lemma:chip-seq'
181 +3.389912 Air b'postag:RB'
182 +3.367471 Med b'+1:lemma:0.4'
183 +3.358386 Gtype b'lemma:\xe2\x88\x86'
184 +3.341657 Supp b'lemma:dpd'
185 +3.311375 Supp b'lemma:no3'
186 +3.301666 Gtype b'lemma:nsrr'
187 +3.263607 OD b'+1:lemma:stationary'
188 +3.256783 O b'-1:lemma:Aerobic'
189 +3.217842 Anti b'lemma:none'
190 +3.181572 Gversion b'-1:lemma:nc'
191 +3.100116 Technique b'lemma:rnaseq'
192 +3.072172 Gtype b'lemma:\xce\xb4fur'
193 +3.058843 O b'postag:VBN'
194 +3.028102 Anti b'+1:lemma:antibody'
195 +3.018055 Supp b'lemma:arginine'
196 +3.012119 Supp b'+1:lemma:1'
197 +3.009503 Gtype b'lemma:delta-fnr'
198 +2.984566 O b'lemma:CEL'
199 +2.976222 O b'+1:lemma:od600'
200 +2.972362 Supp b'lemma:fructose'
201 +2.971825 Vess b'lemma:flask'
202 +2.971825 Vess b'-1:lemma:warm'
203 +2.946262 Technique b'-1:lemma:IP'
204 +2.917769 Temp b'-1:lemma:sample'
205 +2.914128 Med b'+1:lemma:minimal'
206 +2.847635 Temp b'lemma:\xc2\xb0c'
207 +2.845956 O b'-1:lemma:type'
208 +2.837114 pH b'lemma:ph5'
209 +2.837114 pH b'+1:lemma:.5'
210 +2.822859 Supp b'+1:lemma:2'
211 +2.819530 O b'postag:CC'
212 +2.802486 Gtype b'+1:lemma:with'
213 +2.796286 Anti b'lemma:seqa'
214 +2.757551 Gtype b'+1:lemma:pq'
215 +2.752041 O b'lemma:chip'
216 +2.720379 O b'lemma:-'
217 +2.697742 Gversion b'lemma:u00096'
218 +2.697742 Gversion b'+1:lemma:.2'
219 +2.686468 Technique b'+1:lemma:chip-exo'
220 +2.652892 O b'-1:lemma:0.3-0.35'
221 +2.634884 O b'-1:lemma:glucose'
222 +2.605565 Gversion b'lemma:chip-seq'
223 +2.589331 Strain b'+1:lemma:substr'
224 +2.586612 O b'+1:postag:RB'
225 +2.568676 O b'postag:DT'
226 +2.560900 Temp b'-1:lemma:\xcf\x8332'
227 +2.543878 O b'lemma:with'
228 +2.514778 Med b'lemma:m63'
229 +2.497949 Anti b'lemma:anti-myc'
230 +2.469343 pH b'lemma:.5'
231 +2.469343 pH b'-1:lemma:ph5'
232 +2.464961 Gversion b'lemma:000913'
233 +2.461605 Gtype b'-1:lemma:rpob'
234 +2.454623 O b'lemma:s'
235 +2.438821 O b'+1:postag:NNP'
236 +2.438655 Gversion b'lemma:.2'
237 +2.438655 Gversion b'-1:lemma:u00096'
238 +2.427395 Technique b'-1:lemma:chip-exo'
239 +2.415443 Anti b'lemma:anti-rpos'
240 +2.411875 Temp b'-1:lemma:43'
241 +2.410922 Gtype b'-1:lemma:ptac'
242 +2.405223 Gtype b'lemma:\xce\xb4ompr'
243 +2.401826 O b'-1:lemma:anaerobic'
244 +2.390761 O b'lemma:or'
245 +2.388354 O b'-1:lemma:lb'
246 +2.381109 Gtype b'+1:lemma:flagtag'
247 +2.347230 Med b'+1:lemma:2.0'
248 +2.324166 Med b'lemma:media'
249 +2.310748 O b'-1:lemma:0.3'
250 +2.278494 Supp b'-1:lemma:Cra'
251 +2.263509 O b'+1:lemma:pq'
252 +2.238545 Supp b'+1:lemma:_'
253 +2.229756 O b'-1:lemma:l1'
254 +2.226389 Air b'-1:lemma:co2'
255 +2.200525 Gtype b'+1:lemma:aerobic'
256 +2.173774 Supp b'lemma:nacl'
257 +2.171575 Substrain b'+1:lemma:phtpg'
258 +2.149631 Supp b'lemma:20'
259 +2.120754 Gtype b'lemma:wild-type'
260 +2.119820 Supp b'-1:lemma:\xc2\xb5m'
261 +2.113883 O b'-1:lemma:\xc2\xb0c'
262 +2.105763 Supp b'-1:lemma:+'
263 +2.101125 O b'lemma:oxyr'
264 +2.092553 Gtype b'lemma:\xce\xb4soxs'
265 +2.090291 Air b'-1:lemma:-'
266 +2.081430 Supp b'lemma:Leu'
267 +2.062775 O b'postag:VBG'
268 +2.055064 O b'-1:lemma:stpa'
269 +2.046935 O b'lemma:condition'
270 +2.038964 Temp b'lemma:43'
271 +2.033948 Supp b'lemma:iptg'
272 +2.023886 pH b'+1:postag:CD'
273 +2.022782 Gtype b'+1:lemma:ph5'
274 +2.013337 Temp b'-1:lemma:37'
275 +1.955826 Med b'lemma:L'
276 +1.955826 Med b'+1:lemma:broth'
277 +1.954859 Temp b'lemma:37'
278 +1.927649 O b'+1:lemma:chip-seq'
279 +1.919892 Supp b'-1:lemma:with'
280 +1.915338 O b'lemma:affyexp'
281 +1.911245 Gtype b'lemma:pk4854'
282 +1.909261 Phase b'-1:lemma:mid-log'
283 +1.895692 Temp b'+1:lemma:\xc2\xb0c'
284 +1.886863 O b'lemma:genotype/variation'
285 +1.872821 OD b'-1:lemma:~'
286 +1.872507 OD b'lemma:0.3'
287 +1.858566 Med b'+1:lemma:supplement'
288 +1.848644 Gtype b'lemma:ptac'
289 +1.844359 O b'lemma:Lrp'
290 +1.841048 Med b'+1:lemma:+'
291 +1.831366 O b'lemma:culture'
292 +1.830571 Gtype b'-1:postag:VBG'
293 +1.825208 O b'lemma:-rrb-'
294 +1.822383 Gtype b'lemma:deltaseqa'
295 +1.822383 Gtype b'-1:lemma:old'
296 +1.820507 Gtype b'-1:lemma:nsrr'
297 +1.819979 Gtype b'+1:lemma:_'
298 +1.784801 Med b'+1:lemma:g/l'
299 +1.781733 Technique b'+1:lemma:rna-seq'
300 +1.778548 Supp b'+1:lemma:Deficient'
301 +1.771039 Gversion b'postag:CD'
302 +1.755561 O b'lemma:chip-arca'
303 +1.740739 Supp b'lemma:methanol'
304 +1.728483 O b'lemma:ompr'
305 +1.717002 Med b'-1:lemma:ml'
306 +1.696842 OD b'lemma:phase'
307 +1.693821 Supp b'lemma:Adenine'
308 +1.677174 Air b'-1:postag::'
309 +1.675361 Phase b'-1:lemma:until'
310 +1.672716 Air b'-1:postag:CD'
311 +1.672711 Med b'lemma:broth'
312 +1.672711 Med b'-1:lemma:L'
313 +1.672565 Technique b'-1:lemma:input'
314 +1.666211 Technique b'-1:lemma:_'
315 +1.663572 Air b'lemma:Anaerobic'
316 +1.657183 Gtype b'-1:lemma:_'
317 +1.650921 O b'lemma:Fur'
318 +1.650554 Gtype b'-1:postag:VBP'
319 +1.647569 Med b'-1:lemma:glucose'
320 +1.646454 Gtype b'lemma:WT'
321 +1.642228 Air b'lemma:anaerobically'
322 +1.638505 Supp b'+1:lemma:hour'
323 +
324 +
325 +Top negative:
326 +0.003606 Phase b'-1:postag:TO'
327 +0.003173 O b'+1:lemma:minute'
328 +0.002853 OD b'postag:CC'
329 +0.002630 Technique b'+1:lemma:-rrb-'
330 +0.002520 Technique b'+1:postag:-RRB-'
331 +0.002467 Med b'lemma:m9'
332 +0.002337 Air b'+1:postag:-LRB-'
333 +0.002264 Technique b'+1:lemma:Anaerobic'
334 +0.002098 Phase b'postag:RB'
335 +0.002086 Gtype b'+1:lemma:rna'
336 +0.002053 O b'lemma:~'
337 +0.001666 OD b'-1:postag:JJ'
338 +0.001084 Gtype b'+1:lemma:chip'
339 +0.001034 Air b'+1:lemma:-lrb-'
340 +0.000728 OD b'-1:lemma:to'
341 +0.000728 OD b'-1:postag:TO'
342 +0.000711 O b'-1:lemma:at'
343 +0.000709 O b'+1:lemma:_'
344 +0.000560 Temp b'+1:postag:VB'
345 +0.000555 Gtype b'lemma:lack'
346 +0.000388 OD b'postag:VBD'
347 +0.000335 Air b'-1:postag:RB'
348 +0.000334 OD b'-1:postag:VBN'
349 +0.000307 Air b'-1:lemma:95'
350 +0.000306 Anti b'-1:lemma:Custom'
351 +0.000275 O b'-1:postag:VBZ'
352 +0.000204 Air b'-1:lemma:-lrb-'
353 +0.000203 Air b'-1:postag:-LRB-'
354 +0.000156 Temp b'postag:VB'
355 +0.000139 Supp b'+1:lemma:dissolve'
356 +0.000138 Gtype b'lemma:ptrc'
357 +0.000138 Gtype b'-1:lemma:inducible'
358 +0.000085 Med b'-1:lemma:complete'
359 +0.000051 OD b'+1:lemma:0.35'
360 +0.000048 Temp b'+1:lemma:-rrb-'
361 +0.000043 Air b'-1:lemma:anaerobically'
362 +0.000039 OD b'+1:postag:VBN'
363 +0.000034 Temp b'+1:postag:-RRB-'
364 +0.000030 Supp b'-1:postag:VBN'
365 +0.000028 OD b'lemma:mg1655'
366 +0.000023 OD b'+1:lemma:mg1655'
367 +0.000022 OD b'lemma:k-12'
368 +0.000019 Temp b'lemma:-lrb-'
369 +0.000017 Temp b'postag:-LRB-'
370 +0.000016 Air b'lemma:-rrb-'
371 +0.000015 Air b'postag:-RRB-'
372 +0.000007 Gtype b'lemma:inducible'
373 +0.000007 Gtype b'-1:lemma:carrying'
374 +0.000007 Gtype b'+1:lemma:ptrc'
375 +0.000003 O b'lemma:4'
376 +0.000001 O b'postag:VBZ'
377 +0.000000 Med b'+1:lemma:,'
378 +0.000000 Med b'+1:postag:,'
379 +-0.000019 O b'-1:lemma:g/l'
380 +-0.000078 O b'-1:lemma:iptg'
381 +-0.000082 O b'+1:lemma:until'
382 +-0.000090 Supp b'+1:postag:CD'
383 +-0.000137 O b'+1:lemma:phase'
384 +-0.000630 O b'+1:lemma:grow'
385 +-0.000719 Air b'lemma:,'
386 +-0.000719 Air b'postag:,'
387 +-0.000881 Supp b'-1:postag:NNP'
388 +-0.001151 O b'+1:lemma:95'
389 +-0.001284 O b'lemma:glucose'
390 +-0.001441 O b'lemma:anaerobic'
391 +-0.002285 Supp b'-1:postag:NN'
392 +-0.003211 Air b'-1:lemma:,'
393 +-0.003211 Air b'-1:postag:,'
394 +-0.003285 O b'+1:lemma:rep1'
395 +-0.003344 O b'-1:lemma:n2'
396 +-0.004884 Supp b'+1:postag:IN'
397 +-0.005691 O b'lemma:grow'
398 +-0.008542 O b'lemma:\xce\xb4fur'
399 +-0.008810 O b'lemma:30'
400 +-0.008856 O b'lemma:\xe2\x88\x86'
401 +-0.016851 Gversion b'+1:postag:NN'
402 +-0.017577 Agit b'postag:NN'
403 +-0.019899 OD b'postag:JJ'
404 +-0.021053 Supp b'lemma:and'
405 +-0.035082 Med b'-1:postag:IN'
406 +-0.037650 O b'+1:lemma:shake'
407 +-0.040816 Technique b'-1:lemma::'
408 +-0.042661 Air b'-1:postag:VBN'
409 +-0.047406 O b'lemma:e.'
410 +-0.050232 O b'+1:lemma:300'
411 +-0.057684 Supp b'postag:CD'
412 +-0.060532 Med b'+1:postag:IN'
413 +-0.071550 O b'+1:lemma:5'
414 +-0.074571 Supp b'+1:lemma:-rrb-'
415 +-0.077631 O b'-1:postag:VBN'
416 +-0.083084 OD b'+1:postag:CD'
417 +-0.083227 Air b'postag:CD'
418 +-0.086748 Supp b'+1:postag:-RRB-'
419 +-0.087729 O b'-1:lemma:e.'
420 +-0.089412 O b'-1:lemma:um'
421 +-0.089412 O b'+1:lemma:paraquat'
422 +-0.095031 O b'-1:lemma:,'
423 +-0.095031 O b'-1:postag:,'
424 +-0.097514 Anti b'+1:postag:JJ'
425 +-0.097686 O b'-1:lemma:the'
426 +-0.115298 OD b'+1:postag:-LRB-'
427 +-0.115560 Phase b'-1:postag:NN'
428 +-0.117367 Phase b'-1:postag:JJ'
429 +-0.118533 Supp b'postag:CC'
430 +-0.118866 Med b'-1:postag:NN'
431 +-0.119788 O b'+1:postag:NNS'
432 +-0.128143 O b'lemma:150'
433 +-0.128143 O b'+1:lemma:mg/ml'
434 +-0.128440 Gtype b'postag:CD'
435 +-0.128454 O b'-1:lemma:rifampicin'
436 +-0.130932 Phase b'-1:lemma:at'
437 +-0.144251 Air b'-1:postag:CC'
438 +-0.152959 O b'+1:lemma:c'
439 +-0.153793 O b'+1:lemma:arginine'
440 +-0.155635 O b'+1:lemma:-rrb-'
441 +-0.155871 Phase b'+1:postag:NN'
442 +-0.169334 O b'-1:lemma:1'
443 +-0.178678 O b'-1:lemma:-lrb-'
444 +-0.185252 O b'-1:lemma:cra'
445 +-0.188794 Supp b'+1:postag:VBN'
446 +-0.189587 Supp b'-1:lemma:%'
447 +-0.197975 O b'+1:lemma:.'
448 +-0.197975 O b'+1:postag:.'
449 +-0.206342 O b'-1:postag:IN'
450 +-0.218582 O b'-1:lemma:of'
451 +-0.228664 O b'-1:lemma:0.2'
452 +-0.243004 O b'lemma:phase'
453 +-0.245652 Temp b'postag:JJ'
454 +-0.248718 O b'-1:lemma:30'
455 +-0.258083 Supp b'+1:lemma:rifampicin'
456 +-0.265675 O b'-1:lemma:from'
457 +-0.270538 Anti b'+1:lemma:anti-fur'
458 +-0.275341 Med b'+1:postag:NN'
459 +-0.286886 O b'lemma:20'
460 +-0.287653 Med b'postag:CD'
461 +-0.299457 pH b'postag:NN'
462 +-0.299809 O b'+1:postag:IN'
463 +-0.304068 O b'-1:postag:-LRB-'
464 +-0.308718 O b'+1:lemma:%'
465 +-0.337061 O b'lemma:media'
466 +-0.343972 O b'+1:postag:-RRB-'
467 +-0.375037 O b'-1:lemma:~'
468 +-0.378855 O b'+1:postag:VBG'
469 +-0.390573 O b'+1:lemma:0.3'
470 +-0.442424 Temp b'+1:lemma:to'
471 +-0.442424 Temp b'+1:postag:TO'
472 +-0.442791 O b'lemma:dissolve'
473 +-0.457125 Air b'-1:postag:JJ'
474 +-0.473796 O b'-1:lemma:ml'
475 +-0.487814 O b'-1:lemma:grow'
476 +-0.504521 O b'-1:lemma:od600'
477 +-0.510322 O b'lemma:anaerobically'
478 +-0.574504 O b'lemma:0.3'
479 +-0.603912 O b'+1:lemma:hour'
480 +-0.607283 O b'lemma:od600'
481 +-0.671305 O b'+1:lemma:+'
482 +-0.673609 O b'lemma:fecl2'
483 +-0.687238 O b'-1:lemma:sample'
484 +-0.693028 OD b'+1:postag:NN'
485 +-0.699278 Technique b'-1:postag::'
486 +-0.701848 O b'+1:lemma:fecl2'
487 +-0.704105 Phase b'postag:JJ'
488 +-0.721360 O b'-1:lemma:37'
489 +-0.751804 O b'lemma:2h'
490 +-0.751804 O b'-1:lemma:additional'
491 +-0.765268 O b'+1:lemma:cell'
492 +-0.797811 O b'-1:postag:VBG'
493 +-0.812938 OD b'lemma:-lrb-'
494 +-0.817055 O b'lemma:37'
495 +-0.849161 O b'lemma:0.1'
496 +-0.857948 Temp b'postag:NN'
497 +-0.858109 Supp b'+1:lemma:acetate'
498 +-0.893857 O b'-1:lemma:rpob'
499 +-0.908853 O b'lemma:0.2'
500 +-0.910535 O b'lemma:of'
501 +-0.917031 O b'+1:lemma:g/l'
502 +-0.927240 O b'-1:lemma:dissolve'
503 +-0.927240 O b'+1:lemma:methanol'
504 +-0.952327 O b'-1:postag::'
505 +-1.012901 O b'lemma:mid-log'
506 +-1.041464 O b'-1:lemma:ompr'
507 +-1.052741 O b'lemma:wt'
508 +-1.080730 Anti b'postag:NNP'
509 +-1.207147 O b'+1:lemma:supplement'
510 +-1.210841 O b'lemma:methanol'
511 +-1.241992 Air b'+1:postag:JJ'
512 +-1.248085 O b'lemma:rifampicin'
513 +-1.259918 O b'+1:lemma:at'
514 +-1.297301 O b'-1:lemma:IP'
515 +-1.445280 O b'-1:lemma:nsrr'
516 +-1.572311 O b'+1:lemma:in'
517 +-1.575804 O b'-1:lemma:co2'
518 +-1.596289 OD b'postag:-LRB-'
519 +-1.598275 Supp b'postag:JJ'
520 +-1.805996 Air b'postag:NN'
521 +-1.906139 O b'+1:lemma:2'
522 +-2.059834 O b'+1:lemma:1'
523 +-2.164534 O b'-1:lemma:2'
524 +-3.758611 O b'-1:lemma::'
525 +-3.808109 O b'-1:lemma:_'
526 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.04724343003249949, 'c2': 0.01243677446159028}
5 +best CV score:0.7976445146405444
6 +model size: 0.08M
7 +
8 +Flat F1: 0.7934791324630633
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.891 0.860 0.875 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.742 0.742 0.742 62
20 + Anti 1.000 0.444 0.615 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.862 0.764 0.810 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.869 0.684 0.765 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.875 0.744 0.793 480
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 6.389375
32 +Anti -> Anti 5.797544
33 +OD -> OD 5.755859
34 +Temp -> Temp 5.474910
35 +Med -> Med 5.305285
36 +Gversion -> Gversion 4.804763
37 +O -> O 4.666202
38 +Air -> Air 4.625279
39 +Gtype -> Gtype 4.391065
40 +Phase -> Phase 4.207878
41 +Supp -> Supp 3.772231
42 +Technique -> Technique 3.487681
43 +pH -> pH 2.578623
44 +O -> Supp 1.341155
45 +Gtype -> Supp 1.313090
46 +Substrain -> Gtype 1.142286
47 +O -> Technique 1.070411
48 +O -> Gtype 1.049693
49 +O -> Anti 0.871683
50 +Technique -> Air 0.727345
51 +Gtype -> pH 0.559755
52 +O -> Temp 0.443406
53 +Air -> O 0.413184
54 +Gtype -> Air 0.316166
55 +O -> Gversion 0.270633
56 +O -> Strain 0.150083
57 +Med -> O 0.029148
58 +O -> pH 0.001669
59 +OD -> Temp -0.004690
60 +pH -> O -0.027664
61 +Anti -> O -0.053386
62 +Air -> Gtype -0.064282
63 +Gversion -> O -0.068988
64 +OD -> Technique -0.076420
65 +Phase -> Air -0.090168
66 +O -> Med -0.097953
67 +Phase -> O -0.101900
68 +O -> Phase -0.121729
69 +Anti -> Gtype -0.149802
70 +Air -> Phase -0.152600
71 +Anti -> Supp -0.166950
72 +O -> OD -0.195576
73 +Gversion -> Air -0.198819
74 +Med -> Air -0.247104
75 +Technique -> Supp -0.254151
76 +Supp -> pH -0.311839
77 +Supp -> Anti -0.312452
78 +Anti -> OD -0.340468
79 +Supp -> Phase -0.344456
80 +Gtype -> Technique -0.348541
81 +
82 +
83 +Top unlikely transitions:
84 +Phase -> Air -0.090168
85 +O -> Med -0.097953
86 +Phase -> O -0.101900
87 +O -> Phase -0.121729
88 +Anti -> Gtype -0.149802
89 +Air -> Phase -0.152600
90 +Anti -> Supp -0.166950
91 +O -> OD -0.195576
92 +Gversion -> Air -0.198819
93 +Med -> Air -0.247104
94 +Technique -> Supp -0.254151
95 +Supp -> pH -0.311839
96 +Supp -> Anti -0.312452
97 +Anti -> OD -0.340468
98 +Supp -> Phase -0.344456
99 +Gtype -> Technique -0.348541
100 +Phase -> Technique -0.371932
101 +Gtype -> Gversion -0.440310
102 +Air -> Temp -0.442388
103 +Gtype -> Phase -0.446343
104 +Supp -> Temp -0.482112
105 +OD -> Supp -0.494819
106 +Supp -> O -0.518055
107 +Technique -> O -0.541070
108 +Gtype -> O -0.554034
109 +Supp -> Technique -0.694840
110 +Supp -> OD -0.711794
111 +Supp -> Gversion -0.733427
112 +OD -> Air -0.787405
113 +OD -> Gtype -0.827938
114 +Supp -> Gtype -0.848914
115 +Gtype -> Anti -0.850239
116 +Temp -> Med -0.927802
117 +Gtype -> Med -0.940529
118 +OD -> Med -0.954731
119 +Air -> Med -1.010714
120 +OD -> O -1.020865
121 +O -> Air -1.023312
122 +Supp -> Air -1.036972
123 +Air -> Supp -1.040140
124 +Technique -> pH -1.071992
125 +Agit -> O -1.092416
126 +Technique -> OD -1.136546
127 +Technique -> Gtype -1.152303
128 +Air -> OD -1.203336
129 +Gtype -> OD -1.470509
130 +Substrain -> O -1.705609
131 +Supp -> Med -1.924115
132 +Med -> Supp -2.263703
133 +Phase -> OD -2.559981
134 +
135 +
136 +Top positive:
137 +8.778667 O b'lemma:_'
138 +7.894046 Phase b'lemma:stationary'
139 +7.688886 Supp b'lemma:Iron'
140 +7.227846 O b'lemma:1'
141 +6.518255 Air b'lemma:anaerobic'
142 +6.510310 Air b'lemma:aerobic'
143 +6.504238 Phase b'lemma:mid-log'
144 +6.494190 Technique b'lemma:ChIP-exo'
145 +6.481618 O b'lemma:rpob'
146 +6.445611 Strain b'lemma:k-12'
147 +6.104157 O b'lemma:2'
148 +6.023116 Technique b'lemma:chipseq'
149 +5.944249 Air b'-1:lemma:ChIP-Seq'
150 +5.823850 Supp b'lemma:pq'
151 +5.723339 O b'lemma:3'
152 +5.687179 O b'-1:lemma:tag'
153 +5.451451 Gversion b'lemma:asm584v2'
154 +5.250448 Substrain b'lemma:mg1655'
155 +5.231044 O b'lemma:rep1'
156 +5.209409 Gtype b'lemma:\xce\xb4cra'
157 +5.200206 Supp b'lemma:nitrate'
158 +5.178620 O b'lemma:rep2'
159 +5.178433 O b'lemma:Cra'
160 +5.173563 Med b'lemma:MOPS'
161 +5.068820 Gtype b'lemma:flag-tag'
162 +5.068820 Gtype b'-1:lemma:c-terminal'
163 +5.040185 Supp b'+1:lemma:\xc2\xb5m'
164 +5.017969 O b'lemma:rep3'
165 +4.971785 Air b'lemma:Aerobic'
166 +4.954714 Supp b'lemma:glucose'
167 +4.889061 Gtype b'lemma:arca8myc'
168 +4.884882 O b'postag:IN'
169 +4.832496 Supp b'lemma:nh4cl'
170 +4.697100 O b'-1:lemma:ChIP-exo'
171 +4.685588 Gtype b'lemma:delta-arca'
172 +4.678766 Med b'lemma:LB'
173 +4.646146 O b'lemma:b'
174 +4.589396 Gtype b'lemma:wt'
175 +4.555367 O b'lemma:\xcf\x8332'
176 +4.535944 Gtype b'-1:lemma:\xe2\x88\x86'
177 +4.520189 Gtype b'lemma:fnr8myc'
178 +4.461759 Med b'lemma:lb'
179 +4.436556 Technique b'lemma:rna-seq'
180 +4.355492 O b'postag::'
181 +4.344693 O b'lemma:a'
182 +4.294247 O b'-1:lemma:Aerobic'
183 +4.275618 Supp b'lemma:Fe'
184 +4.228638 Anti b'lemma:none'
185 +4.228158 O b'-1:lemma:0.3-0.35'
186 +4.206556 Supp b'lemma:acetate'
187 +4.187817 Supp b'lemma:no3'
188 +4.123083 Technique b'lemma:chip-seq'
189 +4.115374 Technique b'lemma:rnaseq'
190 +4.092073 O b'lemma:Custom'
191 +4.090797 Gtype b'lemma:delta-fnr'
192 +4.086877 Anti b'lemma:seqa'
193 +4.043157 Supp b'-1:lemma:Cra'
194 +4.011760 O b'lemma:CEL'
195 +4.000976 Supp b'lemma:rifampicin'
196 +3.995000 OD b'lemma:od450'
197 +3.983638 Gversion b'lemma:nc'
198 +3.942031 O b'+1:postag:RB'
199 +3.937436 O b'lemma:.'
200 +3.937436 O b'postag:.'
201 +3.911694 O b'+1:lemma:od600'
202 +3.747230 Gversion b'lemma:chip-seq'
203 +3.735836 Gtype b'lemma:type'
204 +3.708583 O b'-1:lemma:anaerobic'
205 +3.691098 Gtype b'+1:lemma:type'
206 +3.680809 Supp b'lemma:dpd'
207 +3.646664 Med b'+1:lemma:0.4'
208 +3.593949 O b'-1:lemma:glucose'
209 +3.588513 Gtype b'lemma:\xe2\x88\x86'
210 +3.552980 Technique b'lemma:ChIP-Seq'
211 +3.547388 OD b'lemma:od600'
212 +3.533518 Gtype b'+1:lemma:with'
213 +3.504439 O b'+1:lemma:o.d.'
214 +3.492405 O b'lemma:-'
215 +3.484369 Gtype b'lemma:\xce\xb4fur'
216 +3.480535 Supp b'+1:lemma:1'
217 +3.463027 Supp b'lemma:arginine'
218 +3.457950 O b'-1:lemma:lb'
219 +3.432558 Gtype b'lemma:nsrr'
220 +3.411785 Vess b'lemma:flask'
221 +3.411785 Vess b'-1:lemma:warm'
222 +3.392532 O b'lemma:with'
223 +3.384479 pH b'lemma:ph5'
224 +3.384479 pH b'+1:lemma:.5'
225 +3.360996 Air b'postag:RB'
226 +3.348572 O b'lemma:for'
227 +3.346364 Gtype b'lemma:wild-type'
228 +3.342085 Strain b'+1:lemma:substr'
229 +3.336804 O b'lemma:oxyr'
230 +3.311067 Supp b'lemma:fructose'
231 +3.305851 Anti b'lemma:anti-myc'
232 +3.281892 Supp b'+1:lemma:Deficient'
233 +3.280890 Med b'lemma:m63'
234 +3.275989 Gtype b'lemma:\xce\xb4soxs'
235 +3.263146 O b'lemma:or'
236 +3.240421 O b'postag:VBN'
237 +3.225118 O b'lemma:s'
238 +3.218060 Supp b'+1:lemma:2'
239 +3.170999 O b'lemma:ompr'
240 +3.157791 O b'-1:lemma:type'
241 +3.150274 Gversion b'-1:lemma:nc'
242 +3.144459 O b'lemma:affyexp'
243 +3.098092 Temp b'-1:lemma:37'
244 +3.074698 Anti b'+1:lemma:antibody'
245 +3.074328 OD b'+1:lemma:stationary'
246 +3.057680 Gtype b'lemma:WT'
247 +3.055594 Technique b'-1:lemma:IP'
248 +3.047772 Supp b'-1:lemma:+'
249 +3.030792 Supp b'lemma:nacl'
250 +3.021146 Gversion b'lemma:u00096'
251 +3.021146 Gversion b'+1:lemma:.2'
252 +3.016112 Air b'lemma:Anaerobic'
253 +3.015642 O b'lemma:chip-arca'
254 +2.992114 Gtype b'lemma:\xce\xb4ompr'
255 +2.991588 Substrain b'+1:lemma:phtpg'
256 +2.962136 Gversion b'lemma:.2'
257 +2.962136 Gversion b'-1:lemma:u00096'
258 +2.943798 Supp b'+1:lemma:_'
259 +2.929777 O b'+1:lemma:anti-fur'
260 +2.906800 Temp b'-1:lemma:43'
261 +2.894662 Gversion b'lemma:000913'
262 +2.885668 Gtype b'-1:lemma:ptac'
263 +2.873518 O b'lemma:2-3'
264 +2.848786 Air b'lemma:anaerobically'
265 +2.846499 Med b'+1:lemma:2.0'
266 +2.846396 Supp b'lemma:Leu'
267 +2.832090 O b'lemma:argr'
268 +2.824032 Gtype b'+1:lemma:flagtag'
269 +2.803595 Air b'lemma:anerobically'
270 +2.790970 Temp b'lemma:43'
271 +2.789734 O b'lemma:soxs'
272 +2.789734 O b'lemma:soxr'
273 +2.761406 Anti b'lemma:anti-rpos'
274 +2.759003 Gtype b'lemma:deltaseqa'
275 +2.759003 Gtype b'-1:lemma:old'
276 +2.758592 O b'+1:lemma:chip-seq'
277 +2.753991 O b'lemma:purr'
278 +2.739456 O b'lemma:Lrp'
279 +2.739128 Gtype b'lemma:pk4854'
280 +2.724879 Temp b'-1:lemma:sample'
281 +2.710711 O b'-1:lemma:stpa'
282 +2.707614 O b'-1:lemma:dpd'
283 +2.693567 O b'lemma:chip'
284 +2.680179 Temp b'lemma:\xc2\xb0c'
285 +2.675708 O b'lemma:at'
286 +2.674777 Supp b'lemma:Adenine'
287 +2.654457 Med b'-1:lemma:ml'
288 +2.621739 Supp b'+1:lemma:hour'
289 +2.617235 Technique b'-1:lemma:chip-exo'
290 +2.613832 Med b'+1:lemma:minimal'
291 +2.609168 O b'lemma:Fur'
292 +2.603191 Gtype b'lemma:ptac'
293 +2.602754 Temp b'-1:lemma:\xcf\x8332'
294 +2.600527 Technique b'-1:lemma:input'
295 +2.586051 Gtype b'-1:lemma:rpob'
296 +2.582507 Supp b'lemma:iptg'
297 +2.575439 pH b'lemma:.5'
298 +2.575439 pH b'-1:lemma:ph5'
299 +2.567178 Phase b'+1:lemma:for'
300 +2.561161 Temp b'lemma:37'
301 +2.554310 Phase b'-1:lemma:mid-log'
302 +2.520491 Gtype b'lemma:\xce\xb4oxyr'
303 +2.510164 O b'postag:SYM'
304 +2.501887 Gtype b'lemma:\xce\xb4soxr'
305 +2.469798 O b'-1:lemma:l1'
306 +2.453386 Phase b'-1:lemma:until'
307 +2.441656 O b'+1:lemma:43'
308 +2.410885 O b'+1:lemma:mid-log'
309 +2.403011 Gtype b'+1:lemma:ph5'
310 +2.401711 Med b'lemma:glucose'
311 +2.400503 Med b'lemma:L'
312 +2.400503 Med b'+1:lemma:broth'
313 +2.386245 O b'+1:lemma:pq'
314 +2.371627 O b'postag:VBG'
315 +2.358742 O b'lemma:chip-fnr'
316 +2.353099 Gtype b'lemma:dfnr'
317 +2.338808 Supp b'+1:lemma:respiratory'
318 +2.338302 O b'+1:lemma:sparging'
319 +2.329852 Gtype b'+1:lemma:aerobic'
320 +2.319775 O b'postag:DT'
321 +2.307514 Supp b'lemma:0.2'
322 +2.298843 Med b'+1:lemma:+'
323 +2.298674 Supp b'lemma:methanol'
324 +2.262217 Gversion b'lemma:_'
325 +2.247831 Gtype b'+1:lemma:pq'
326 +2.246181 O b'lemma:genotype/variation'
327 +2.244933 Air b'-1:lemma:-'
328 +2.243583 Gtype b'-1:lemma:nsrr'
329 +2.241509 Technique b'+1:lemma:chip-exo'
330 +2.238931 Supp b'+1:lemma:and'
331 +2.229747 OD b'lemma:0.3'
332 +2.227356 O b'lemma:pt7'
333 +2.218343 O b'-1:lemma:min'
334 +2.209534 OD b'-1:lemma:~'
335 +2.191059 O b'postag:CC'
336 +2.183306 O b'-1:lemma:media'
337 +
338 +
339 +Top negative:
340 +-0.007960 O b'+1:lemma:25'
341 +-0.011560 Gtype b'-1:lemma:,'
342 +-0.011560 Gtype b'-1:postag:,'
343 +-0.012403 Air b'-1:lemma:anaerobically'
344 +-0.013219 O b'lemma:pahse'
345 +-0.013507 Gversion b'-1:postag:NN'
346 +-0.013699 O b'+1:lemma:medium'
347 +-0.014516 Med b'-1:postag:CD'
348 +-0.014698 Gtype b'-1:lemma:mg1655'
349 +-0.016645 OD b'+1:lemma:~'
350 +-0.017116 Supp b'+1:lemma:glucose'
351 +-0.018290 OD b'+1:lemma:0.4'
352 +-0.019265 O b'lemma:25'
353 +-0.020700 Med b'+1:postag:NNS'
354 +-0.020972 O b'+1:lemma:0.4'
355 +-0.022100 OD b'+1:postag:-LRB-'
356 +-0.022143 O b'+1:lemma:strain'
357 +-0.024088 Supp b'-1:postag:VBG'
358 +-0.038792 O b'+1:postag:CD'
359 +-0.039859 O b'-1:postag:IN'
360 +-0.040290 O b'+1:lemma:k-12'
361 +-0.041101 O b'lemma:nacl'
362 +-0.042472 Gtype b'+1:lemma:cra'
363 +-0.052362 O b'+1:lemma:grow'
364 +-0.054063 Anti b'-1:postag:NN'
365 +-0.057006 O b'-1:lemma:20'
366 +-0.057134 O b'-1:lemma:mm'
367 +-0.062775 Phase b'-1:postag:NN'
368 +-0.064327 O b'-1:postag:VBN'
369 +-0.064682 O b'postag:RB'
370 +-0.066584 Supp b'-1:postag:NN'
371 +-0.069958 Gtype b'postag:CD'
372 +-0.071415 O b'+1:lemma:95'
373 +-0.073208 Phase b'lemma:pahse'
374 +-0.078903 Gtype b'-1:postag:NN'
375 +-0.080786 O b'+1:lemma:5'
376 +-0.083451 Temp b'-1:lemma:\xc2\xb0c'
377 +-0.084429 O b'+1:lemma:antibody'
378 +-0.093835 O b'-1:lemma:fresh'
379 +-0.095847 Supp b'+1:postag:IN'
380 +-0.103631 OD b'postag:JJ'
381 +-0.105270 O b'+1:lemma:o2'
382 +-0.106875 O b'+1:lemma:mm'
383 +-0.117705 O b'+1:lemma:dissolve'
384 +-0.119607 Med b'+1:postag:NN'
385 +-0.123002 OD b'+1:postag:CD'
386 +-0.127620 O b'-1:lemma:g/l'
387 +-0.133716 Gtype b'-1:postag:CD'
388 +-0.140619 O b'-1:lemma:-lrb-'
389 +-0.143859 O b'lemma:o2'
390 +-0.144789 O b'-1:lemma:o2'
391 +-0.146704 O b'-1:lemma:30'
392 +-0.147636 Temp b'postag:JJ'
393 +-0.152183 Gtype b'+1:lemma:\xe2\x88\x86'
394 +-0.153487 Med b'+1:postag:CC'
395 +-0.156696 O b'lemma:mg1655'
396 +-0.156753 O b'lemma:e.'
397 +-0.159541 Technique b'-1:lemma::'
398 +-0.162599 O b'-1:lemma:minimal'
399 +-0.167611 O b'-1:lemma:with'
400 +-0.176978 O b'-1:postag:-LRB-'
401 +-0.179005 O b'lemma:m63'
402 +-0.180170 O b'lemma:medium'
403 +-0.183738 O b'+1:lemma:Aerobic'
404 +-0.190885 O b'lemma:n2'
405 +-0.191225 Supp b'-1:postag:NNP'
406 +-0.198786 O b'-1:lemma:n2'
407 +-0.199226 Supp b'-1:lemma:and'
408 +-0.199384 O b'lemma:\xc2\xb0c'
409 +-0.202752 O b'-1:lemma:of'
410 +-0.208526 O b'+1:lemma:-rrb-'
411 +-0.210040 O b'-1:lemma:e.'
412 +-0.219231 O b'+1:lemma:shake'
413 +-0.219460 Gtype b'-1:postag:DT'
414 +-0.237900 O b'-1:lemma:0.1'
415 +-0.241643 O b'lemma:lb'
416 +-0.242224 Supp b'lemma:and'
417 +-0.245820 O b'lemma:co2'
418 +-0.252281 Temp b'-1:postag:IN'
419 +-0.261850 OD b'+1:postag:NN'
420 +-0.266916 Supp b'+1:postag:VBN'
421 +-0.267689 O b'+1:lemma:until'
422 +-0.271480 O b'-1:lemma:\xe2\x88\x86'
423 +-0.277039 O b'+1:lemma:minimal'
424 +-0.277788 O b'lemma:grow'
425 +-0.285871 O b'+1:postag:IN'
426 +-0.296129 Med b'postag:-LRB-'
427 +-0.302011 O b'-1:lemma:um'
428 +-0.302011 O b'+1:lemma:paraquat'
429 +-0.304578 O b'+1:postag:-RRB-'
430 +-0.307866 Med b'lemma:-lrb-'
431 +-0.309056 O b'+1:lemma:.'
432 +-0.309056 O b'+1:postag:.'
433 +-0.312126 Air b'-1:lemma:or'
434 +-0.317647 O b'lemma:minimal'
435 +-0.319138 Gtype b'-1:postag:SYM'
436 +-0.326202 O b'-1:lemma:1'
437 +-0.328780 Phase b'-1:lemma:at'
438 +-0.346851 O b'+1:lemma:300'
439 +-0.351977 Med b'-1:postag:NN'
440 +-0.355707 O b'lemma:30'
441 +-0.365992 Gversion b'+1:postag:NN'
442 +-0.367337 O b'-1:lemma:grow'
443 +-0.387748 O b'lemma:k-12'
444 +-0.389573 O b'+1:lemma:cell'
445 +-0.397777 O b'-1:lemma:from'
446 +-0.400558 O b'-1:lemma:mid-log'
447 +-0.403379 O b'+1:lemma:phase'
448 +-0.433545 Supp b'+1:lemma:rifampicin'
449 +-0.436853 O b'+1:lemma:or'
450 +-0.441744 Med b'postag:CD'
451 +-0.446245 Med b'+1:postag:IN'
452 +-0.451884 Gtype b'lemma:delta'
453 +-0.460983 Anti b'+1:postag:JJ'
454 +-0.469979 O b'+1:lemma:_'
455 +-0.472319 O b'-1:lemma:rifampicin'
456 +-0.497593 O b'-1:lemma:cra'
457 +-0.503197 O b'-1:lemma:od600'
458 +-0.504727 Supp b'postag:CC'
459 +-0.508290 pH b'postag:NN'
460 +-0.516046 O b'+1:lemma:%'
461 +-0.518365 Technique b'-1:postag::'
462 +-0.523304 OD b'+1:lemma:and'
463 +-0.525519 OD b'+1:postag:CC'
464 +-0.558490 O b'+1:postag:NNS'
465 +-0.586906 O b'-1:lemma:co2'
466 +-0.603821 O b'lemma:150'
467 +-0.603821 O b'+1:lemma:mg/ml'
468 +-0.605931 O b'+1:lemma:arginine'
469 +-0.606479 O b'lemma:phase'
470 +-0.611575 O b'lemma:od600'
471 +-0.651162 O b'+1:lemma:rep1'
472 +-0.653345 O b'lemma:dissolve'
473 +-0.665212 Med b'-1:postag:IN'
474 +-0.669800 OD b'lemma:-lrb-'
475 +-0.672263 O b'-1:lemma:0.2'
476 +-0.687604 O b'lemma:\xe2\x88\x86'
477 +-0.707413 Temp b'+1:lemma:to'
478 +-0.707413 Temp b'+1:postag:TO'
479 +-0.709054 Temp b'postag:NN'
480 +-0.717292 O b'lemma:mid-log'
481 +-0.727885 O b'-1:lemma:until'
482 +-0.733043 O b'lemma:media'
483 +-0.733111 O b'+1:lemma:c'
484 +-0.747011 OD b'postag:-LRB-'
485 +-0.750366 O b'+1:lemma:0.3'
486 +-0.761257 Anti b'+1:lemma:anti-fur'
487 +-0.768890 Phase b'-1:postag:JJ'
488 +-0.791918 O b'+1:lemma:\xc2\xb0c'
489 +-0.792189 O b'lemma:anaerobically'
490 +-0.829072 O b'lemma:0.2'
491 +-0.870965 O b'lemma:0.3'
492 +-0.872689 O b'lemma:purify'
493 +-0.898705 O b'-1:postag:VBG'
494 +-0.916416 O b'+1:lemma:fecl2'
495 +-0.929519 O b'-1:lemma:~'
496 +-0.964077 Air b'postag:NN'
497 +-0.999189 O b'lemma:20'
498 +-1.006423 O b'lemma:fecl2'
499 +-1.015503 O b'lemma:dpd'
500 +-1.039211 O b'lemma:anaerobic'
501 +-1.067901 O b'-1:lemma:dissolve'
502 +-1.067901 O b'+1:lemma:methanol'
503 +-1.079142 Supp b'+1:lemma:acetate'
504 +-1.086390 O b'lemma:37'
505 +-1.090519 O b'lemma:\xce\xb4fur'
506 +-1.110265 O b'-1:postag::'
507 +-1.136252 O b'-1:lemma:ml'
508 +-1.138020 O b'lemma:2h'
509 +-1.138020 O b'-1:lemma:additional'
510 +-1.184350 O b'lemma:of'
511 +-1.210896 Supp b'-1:lemma:%'
512 +-1.216753 O b'-1:lemma:37'
513 +-1.220049 O b'+1:lemma:g/l'
514 +-1.245093 O b'-1:lemma:sample'
515 +-1.280180 Air b'-1:postag:JJ'
516 +-1.282705 O b'lemma:0.1'
517 +-1.293206 O b'+1:lemma:supplement'
518 +-1.340548 Air b'+1:postag:JJ'
519 +-1.397536 O b'+1:lemma:at'
520 +-1.470536 O b'+1:lemma:in'
521 +-1.532979 O b'+1:postag:VBG'
522 +-1.548239 O b'-1:lemma:rpob'
523 +-1.647386 O b'-1:lemma:ompr'
524 +-1.665792 O b'postag:VBP'
525 +-1.714606 O b'lemma:wt'
526 +-1.723995 O b'-1:lemma:nsrr'
527 +-1.770674 Supp b'postag:JJ'
528 +-1.852907 O b'lemma:rifampicin'
529 +-1.859721 Anti b'postag:NNP'
530 +-1.869393 O b'+1:lemma:+'
531 +-1.873494 O b'-1:lemma:IP'
532 +-2.065639 O b'lemma:methanol'
533 +-2.192016 O b'-1:lemma:2'
534 +-2.353663 O b'+1:lemma:2'
535 +-2.354781 Phase b'postag:JJ'
536 +-2.458047 O b'+1:lemma:hour'
537 +-3.007939 O b'+1:lemma:1'
538 +-3.831961 O b'-1:lemma:_'
539 +-4.149098 O b'-1:lemma::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.01750591736573677, 'c2': 0.02307723566043045}
5 +best CV score:0.7965019925648547
6 +model size: 0.10M
7 +
8 +Flat F1: 0.7968324300802168
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.897 0.912 0.904 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.754 0.742 0.748 62
20 + Anti 1.000 0.667 0.800 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.862 0.764 0.810 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.865 0.662 0.750 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.876 0.748 0.797 480
28 +
29 +
30 +Top likely transitions:
31 +OD -> OD 5.786764
32 +Agit -> Agit 5.337876
33 +Anti -> Anti 5.174874
34 +Temp -> Temp 5.088685
35 +Med -> Med 5.002944
36 +Air -> Air 4.877267
37 +O -> O 4.715817
38 +Gversion -> Gversion 4.473906
39 +Phase -> Phase 4.357763
40 +Gtype -> Gtype 4.242273
41 +Supp -> Supp 4.009897
42 +Technique -> Technique 3.021233
43 +pH -> pH 2.548336
44 +Substrain -> Gtype 1.971828
45 +O -> Supp 1.641966
46 +Gtype -> Supp 1.637091
47 +Air -> O 1.145928
48 +O -> Gtype 1.141727
49 +Technique -> Air 1.068808
50 +Gtype -> pH 0.990954
51 +O -> Technique 0.980217
52 +O -> Temp 0.771371
53 +O -> Anti 0.605031
54 +Gtype -> Air 0.434959
55 +Med -> O 0.223888
56 +O -> Strain 0.200811
57 +O -> pH 0.175383
58 +O -> Gversion 0.140227
59 +OD -> Phase 0.052262
60 +O -> Vess 0.045108
61 +Temp -> O 0.034423
62 +Phase -> O 0.018387
63 +Strain -> O -0.000112
64 +Anti -> pH -0.000851
65 +Strain -> OD -0.002202
66 +O -> Substrain -0.003970
67 +Technique -> Phase -0.027678
68 +Med -> pH -0.028490
69 +Anti -> Phase -0.030227
70 +Gtype -> Strain -0.031833
71 +pH -> OD -0.041636
72 +Air -> Anti -0.042531
73 +OD -> pH -0.045135
74 +pH -> Air -0.051415
75 +Anti -> O -0.054882
76 +Med -> OD -0.055592
77 +O -> Med -0.064478
78 +Med -> Temp -0.070415
79 +Phase -> Air -0.071257
80 +Supp -> Agit -0.072692
81 +
82 +
83 +Top unlikely transitions:
84 +O -> Agit -0.317813
85 +Gversion -> Gtype -0.346160
86 +Anti -> Gtype -0.379001
87 +Gversion -> Supp -0.395995
88 +Technique -> O -0.407315
89 +Technique -> Gversion -0.411287
90 +Air -> Gtype -0.414228
91 +Gversion -> Technique -0.423101
92 +Supp -> pH -0.449062
93 +Air -> Temp -0.454318
94 +OD -> Temp -0.462022
95 +OD -> Technique -0.528407
96 +Gtype -> Phase -0.537062
97 +OD -> Anti -0.541206
98 +Gversion -> Air -0.555674
99 +Supp -> Temp -0.581411
100 +Supp -> Phase -0.581776
101 +Technique -> Supp -0.590399
102 +Gtype -> Technique -0.620634
103 +OD -> Supp -0.644073
104 +Med -> Air -0.679744
105 +Temp -> Med -0.687449
106 +OD -> Med -0.717084
107 +Anti -> OD -0.770834
108 +Supp -> Anti -0.804922
109 +Air -> Supp -0.806945
110 +Gtype -> Gversion -0.811241
111 +Supp -> OD -0.862585
112 +Supp -> Gversion -0.867304
113 +Phase -> Technique -0.873833
114 +Agit -> O -0.910378
115 +Air -> Med -0.936107
116 +Gtype -> Med -0.971156
117 +Supp -> Technique -0.979631
118 +Supp -> Gtype -0.998503
119 +OD -> Gtype -1.001151
120 +Supp -> Air -1.002273
121 +Gtype -> Anti -1.098941
122 +OD -> Air -1.189877
123 +O -> Air -1.194520
124 +OD -> O -1.238899
125 +Air -> OD -1.240827
126 +Gtype -> OD -1.288759
127 +Substrain -> O -1.351729
128 +Technique -> pH -1.436208
129 +Technique -> Gtype -1.565301
130 +Supp -> Med -1.571460
131 +Technique -> OD -1.655463
132 +Med -> Supp -1.791934
133 +Phase -> OD -2.395487
134 +
135 +
136 +Top positive:
137 +7.777505 O b'lemma:_'
138 +6.605877 O b'lemma:1'
139 +5.933184 Phase b'lemma:stationary'
140 +5.921751 Air b'lemma:anaerobic'
141 +5.663542 Supp b'lemma:Iron'
142 +5.628841 O b'lemma:2'
143 +5.570604 Strain b'lemma:k-12'
144 +5.357728 Air b'lemma:aerobic'
145 +5.308359 Technique b'lemma:chipseq'
146 +5.243195 Technique b'lemma:ChIP-exo'
147 +5.146328 O b'lemma:3'
148 +5.113544 O b'lemma:rpob'
149 +4.972436 Supp b'lemma:pq'
150 +4.736752 O b'-1:lemma:tag'
151 +4.683084 Gtype b'lemma:\xce\xb4cra'
152 +4.682011 Phase b'lemma:mid-log'
153 +4.676054 Gversion b'lemma:asm584v2'
154 +4.661754 Supp b'lemma:nh4cl'
155 +4.645082 Gtype b'lemma:flag-tag'
156 +4.645082 Gtype b'-1:lemma:c-terminal'
157 +4.640572 O b'lemma:rep1'
158 +4.613649 O b'lemma:Cra'
159 +4.546591 Gtype b'lemma:arca8myc'
160 +4.537870 O b'postag:IN'
161 +4.531129 Supp b'lemma:glucose'
162 +4.528260 Air b'-1:lemma:ChIP-Seq'
163 +4.460701 O b'lemma:rep2'
164 +4.331423 Med b'lemma:MOPS'
165 +4.320826 Gtype b'lemma:wt'
166 +4.314650 Supp b'lemma:nitrate'
167 +4.296372 O b'lemma:rep3'
168 +4.285293 Substrain b'lemma:mg1655'
169 +4.240354 Supp b'lemma:acetate'
170 +4.111263 Med b'lemma:lb'
171 +4.087796 Gtype b'lemma:delta-arca'
172 +4.036491 O b'+1:postag:RB'
173 +4.022073 Supp b'+1:lemma:\xc2\xb5m'
174 +4.010145 O b'lemma:b'
175 +4.010092 Supp b'lemma:no3'
176 +3.996538 Med b'lemma:LB'
177 +3.996393 Gtype b'-1:lemma:\xe2\x88\x86'
178 +3.982442 Technique b'lemma:rna-seq'
179 +3.957894 Air b'lemma:Aerobic'
180 +3.935017 O b'lemma:a'
181 +3.803123 OD b'lemma:od600'
182 +3.801977 Anti b'lemma:none'
183 +3.786775 Gtype b'lemma:fnr8myc'
184 +3.769938 O b'lemma:.'
185 +3.769938 O b'postag:.'
186 +3.769927 Supp b'lemma:Fe'
187 +3.764066 Technique b'lemma:chip-seq'
188 +3.735366 O b'postag::'
189 +3.685306 O b'lemma:CEL'
190 +3.645917 O b'lemma:\xcf\x8332'
191 +3.642265 O b'-1:lemma:Aerobic'
192 +3.634484 Anti b'lemma:seqa'
193 +3.544891 Technique b'lemma:rnaseq'
194 +3.525311 Supp b'+1:lemma:Deficient'
195 +3.521610 OD b'lemma:od450'
196 +3.519361 O b'-1:lemma:ChIP-exo'
197 +3.516252 O b'lemma:Custom'
198 +3.464219 O b'postag:VBN'
199 +3.463491 Supp b'+1:lemma:1'
200 +3.450572 Supp b'lemma:rifampicin'
201 +3.427503 Med b'+1:lemma:0.4'
202 +3.379916 Gtype b'lemma:delta-fnr'
203 +3.378105 Supp b'-1:lemma:Cra'
204 +3.351155 Gtype b'lemma:\xe2\x88\x86'
205 +3.343450 Gversion b'lemma:chip-seq'
206 +3.314924 Supp b'lemma:fructose'
207 +3.282214 Supp b'lemma:dpd'
208 +3.262950 Gtype b'+1:lemma:with'
209 +3.253726 O b'lemma:-'
210 +3.214173 O b'+1:lemma:od600'
211 +3.187644 O b'-1:lemma:0.3-0.35'
212 +3.182055 Gtype b'lemma:type'
213 +3.172972 Supp b'lemma:arginine'
214 +3.120594 Gtype b'lemma:\xce\xb4fur'
215 +3.117794 Gversion b'lemma:nc'
216 +3.109528 O b'-1:lemma:glucose'
217 +3.081979 O b'+1:lemma:anti-fur'
218 +3.077641 Gtype b'+1:lemma:type'
219 +3.064731 Supp b'+1:lemma:2'
220 +3.053268 Vess b'lemma:flask'
221 +3.053268 Vess b'-1:lemma:warm'
222 +3.046425 O b'-1:lemma:anaerobic'
223 +3.038756 pH b'lemma:ph5'
224 +3.038756 pH b'+1:lemma:.5'
225 +3.027884 Med b'lemma:m63'
226 +3.018439 Gtype b'lemma:wild-type'
227 +3.002251 O b'lemma:chip-arca'
228 +3.002149 Anti b'lemma:anti-myc'
229 +3.001018 Gtype b'lemma:nsrr'
230 +3.000941 O b'lemma:affyexp'
231 +3.000850 Air b'postag:RB'
232 +2.987234 O b'-1:lemma:lb'
233 +2.981372 Strain b'+1:lemma:substr'
234 +2.977968 O b'lemma:with'
235 +2.970913 O b'lemma:s'
236 +2.954955 O b'lemma:oxyr'
237 +2.919696 Technique b'lemma:ChIP-Seq'
238 +2.911598 Gtype b'lemma:WT'
239 +2.907151 Temp b'lemma:\xc2\xb0c'
240 +2.882096 Gtype b'lemma:\xce\xb4soxs'
241 +2.828303 O b'-1:lemma:stpa'
242 +2.815555 O b'lemma:argr'
243 +2.805344 Med b'+1:lemma:minimal'
244 +2.776817 Gversion b'lemma:.2'
245 +2.776817 Gversion b'-1:lemma:u00096'
246 +2.765075 O b'lemma:at'
247 +2.753642 Supp b'-1:lemma:+'
248 +2.743827 Med b'+1:lemma:2.0'
249 +2.731390 O b'lemma:ompr'
250 +2.715846 O b'+1:lemma:pq'
251 +2.713993 Gversion b'lemma:u00096'
252 +2.713993 Gversion b'+1:lemma:.2'
253 +2.703704 Air b'lemma:Anaerobic'
254 +2.702255 Gtype b'lemma:\xce\xb4ompr'
255 +2.686740 Supp b'+1:lemma:_'
256 +2.677806 Gtype b'-1:lemma:ptac'
257 +2.662879 Phase b'+1:lemma:for'
258 +2.660845 O b'lemma:purr'
259 +2.633400 Gversion b'-1:lemma:nc'
260 +2.610705 Med b'-1:lemma:ml'
261 +2.610364 Supp b'lemma:nacl'
262 +2.598932 Technique b'-1:lemma:IP'
263 +2.587524 O b'lemma:or'
264 +2.572768 Supp b'lemma:Leu'
265 +2.557048 Gversion b'lemma:000913'
266 +2.535138 Gtype b'+1:lemma:flagtag'
267 +2.529124 O b'postag:CC'
268 +2.528151 Gtype b'+1:lemma:pq'
269 +2.519454 OD b'+1:lemma:stationary'
270 +2.517078 Gtype b'+1:lemma:aerobic'
271 +2.508910 O b'lemma:Lrp'
272 +2.508428 Gtype b'-1:lemma:rpob'
273 +2.502749 Temp b'lemma:43'
274 +2.501837 Air b'lemma:anaerobically'
275 +2.500161 Gtype b'lemma:ptac'
276 +2.489806 O b'+1:lemma:o.d.'
277 +2.489002 O b'-1:lemma:type'
278 +2.486890 Gtype b'lemma:pk4854'
279 +2.485914 O b'lemma:for'
280 +2.480050 O b'postag:SYM'
281 +2.476780 Supp b'+1:lemma:hour'
282 +2.458558 Air b'-1:lemma:-'
283 +2.455765 Anti b'lemma:anti-rpos'
284 +2.449385 Supp b'lemma:Adenine'
285 +2.445239 O b'lemma:Fur'
286 +2.444393 O b'lemma:soxs'
287 +2.444393 O b'lemma:soxr'
288 +2.434249 Temp b'-1:lemma:37'
289 +2.428845 Gtype b'lemma:deltaseqa'
290 +2.428845 Gtype b'-1:lemma:old'
291 +2.414384 Anti b'+1:lemma:antibody'
292 +2.405965 O b'lemma:2-3'
293 +2.403476 Temp b'-1:lemma:43'
294 +2.394168 Air b'lemma:anerobically'
295 +2.391084 Substrain b'+1:lemma:phtpg'
296 +2.390362 O b'+1:lemma:chip-seq'
297 +2.364093 O b'lemma:chip'
298 +2.344688 pH b'+1:postag:CD'
299 +2.333775 Med b'postag:NNP'
300 +2.308157 Supp b'lemma:iptg'
301 +2.301961 O b'+1:lemma:43'
302 +2.296886 Gtype b'lemma:\xce\xb4oxyr'
303 +2.293633 O b'lemma:chip-fnr'
304 +2.288275 Med b'+1:lemma:+'
305 +2.287838 Technique b'-1:lemma:input'
306 +2.284745 Gtype b'+1:lemma:ph5'
307 +2.282924 O b'+1:lemma:sparging'
308 +2.273731 Technique b'+1:lemma:chip-exo'
309 +2.256890 Gtype b'lemma:\xce\xb4soxr'
310 +2.248969 O b'lemma:pt7'
311 +2.247412 Gtype b'lemma:dfnr'
312 +2.234348 Technique b'-1:lemma:chip-exo'
313 +2.225915 O b'+1:lemma:mid-log'
314 +2.218353 Supp b'+1:lemma:deficient'
315 +2.214682 Phase b'lemma:phase'
316 +2.209324 Phase b'-1:lemma:until'
317 +2.209232 Supp b'lemma:methanol'
318 +2.187036 Technique b'postag:NNP'
319 +2.159413 Temp b'-1:lemma:sample'
320 +2.151326 Gtype b'-1:lemma:nsrr'
321 +2.149028 Supp b'lemma:20'
322 +2.143459 pH b'lemma:.5'
323 +2.143459 pH b'-1:lemma:ph5'
324 +2.130382 Supp b'-1:lemma:with'
325 +2.124595 Phase b'-1:lemma:mid-log'
326 +2.122806 Gversion b'+1:lemma:000913'
327 +2.119158 Temp b'+1:lemma:\xc2\xb0c'
328 +2.116827 O b'lemma:genotype/variation'
329 +2.111971 Gtype b'-1:lemma:from'
330 +2.099395 Gversion b'lemma:_'
331 +2.093842 O b'postag:VBG'
332 +2.087024 Gtype b'-1:lemma::'
333 +2.084864 Med b'+1:lemma:-lrb-'
334 +2.083399 Med b'lemma:broth'
335 +2.083399 Med b'-1:lemma:L'
336 +2.080517 Med b'lemma:L'
337 +
338 +
339 +Top negative:
340 +-0.116698 Gtype b'-1:postag:NNP'
341 +-0.117399 Med b'-1:postag:NN'
342 +-0.121920 Phase b'-1:postag:NN'
343 +-0.126158 O b'-1:lemma:delta'
344 +-0.126979 O b'lemma:medium'
345 +-0.136307 O b'lemma:10'
346 +-0.138994 O b'+1:lemma:2.0'
347 +-0.140939 O b'-1:postag:IN'
348 +-0.146352 OD b'+1:lemma:in'
349 +-0.147540 O b'-1:lemma:iptg'
350 +-0.148360 Gtype b'lemma:ompr'
351 +-0.148713 Supp b'+1:lemma:glucose'
352 +-0.150545 Gtype b'-1:lemma:,'
353 +-0.150545 Gtype b'-1:postag:,'
354 +-0.152555 O b'+1:lemma:mm'
355 +-0.154518 O b'+1:lemma:5'
356 +-0.161052 Air b'postag:CD'
357 +-0.161490 O b'-1:lemma:purify'
358 +-0.162601 O b'+1:lemma:strain'
359 +-0.164231 O b'-1:lemma:with'
360 +-0.165378 O b'-1:lemma:minimal'
361 +-0.167055 Air b'-1:lemma:or'
362 +-0.177695 O b'+1:lemma:0.4'
363 +-0.179481 Med b'+1:postag:CC'
364 +-0.192792 Gtype b'+1:postag:NNS'
365 +-0.194783 O b'-1:lemma:-lrb-'
366 +-0.199810 Gtype b'-1:lemma:mg1655'
367 +-0.202202 O b'+1:lemma:antibody'
368 +-0.204305 O b'+1:lemma:mg1655'
369 +-0.205357 O b'+1:lemma:minimal'
370 +-0.215743 O b'+1:lemma:.'
371 +-0.215743 O b'+1:postag:.'
372 +-0.224035 Supp b'+1:postag:IN'
373 +-0.226319 Technique b'-1:lemma::'
374 +-0.228443 Supp b'lemma:and'
375 +-0.228851 O b'lemma:e.'
376 +-0.233559 Gtype b'+1:lemma:cra'
377 +-0.233930 O b'lemma:8'
378 +-0.236052 Gtype b'+1:lemma:a'
379 +-0.236429 O b'lemma:n2'
380 +-0.240273 O b'-1:postag:-LRB-'
381 +-0.243379 O b'-1:lemma:e.'
382 +-0.246913 O b'-1:lemma:20'
383 +-0.249761 O b'-1:postag:VBP'
384 +-0.250445 O b'+1:lemma:from'
385 +-0.253990 O b'lemma:co2'
386 +-0.257147 O b'lemma:lb'
387 +-0.262866 O b'-1:lemma:grow'
388 +-0.263101 O b'-1:lemma:n2'
389 +-0.266675 Med b'postag:CD'
390 +-0.267433 Med b'+1:postag:NNS'
391 +-0.271690 Med b'lemma:-lrb-'
392 +-0.272421 Med b'postag:-LRB-'
393 +-0.274354 Anti b'+1:postag:JJ'
394 +-0.275817 O b'-1:lemma:fresh'
395 +-0.276458 Gversion b'+1:postag:NN'
396 +-0.284818 Gtype b'+1:lemma:b'
397 +-0.291947 O b'+1:lemma:until'
398 +-0.292980 O b'lemma:minimal'
399 +-0.294264 O b'lemma:30'
400 +-0.295523 O b'-1:lemma:od600'
401 +-0.299205 O b'+1:lemma:b'
402 +-0.302254 O b'lemma:aerobically'
403 +-0.310314 O b'lemma:grow'
404 +-0.331454 O b'-1:lemma:0.1'
405 +-0.331977 O b'-1:postag:VBN'
406 +-0.340682 O b'+1:lemma:delta'
407 +-0.346089 O b'+1:lemma:-lcb-'
408 +-0.347377 O b'-1:lemma:um'
409 +-0.347377 O b'+1:lemma:paraquat'
410 +-0.348080 O b'+1:postag:-RRB-'
411 +-0.348644 Gtype b'postag:CD'
412 +-0.350220 O b'-1:lemma:chip-exo'
413 +-0.353135 O b'lemma:mg/ml'
414 +-0.353135 O b'-1:lemma:150'
415 +-0.368298 O b'-1:lemma:mm'
416 +-0.368952 O b'+1:lemma:phase'
417 +-0.383392 Phase b'-1:lemma:at'
418 +-0.383608 Gtype b'lemma:_'
419 +-0.386439 O b'+1:postag:IN'
420 +-0.389536 O b'lemma:glucose'
421 +-0.397045 Supp b'+1:postag:VBN'
422 +-0.406129 O b'postag:RB'
423 +-0.417245 O b'lemma:m63'
424 +-0.417398 OD b'+1:postag:CC'
425 +-0.419368 Temp b'postag:JJ'
426 +-0.422874 O b'+1:lemma:or'
427 +-0.427973 O b'+1:lemma:%'
428 +-0.435206 O b'+1:lemma:shake'
429 +-0.441406 OD b'+1:lemma:mid-log'
430 +-0.442803 Med b'+1:postag:NN'
431 +-0.449476 O b'lemma:nacl'
432 +-0.461678 O b'+1:lemma:_'
433 +-0.466634 O b'lemma:od600'
434 +-0.469668 Technique b'-1:postag::'
435 +-0.470685 O b'-1:lemma:from'
436 +-0.478610 Supp b'postag:CC'
437 +-0.495782 O b'+1:lemma:dissolve'
438 +-0.501976 Supp b'-1:postag:VBG'
439 +-0.521671 O b'-1:lemma:rifampicin'
440 +-0.527652 O b'lemma:\xc2\xb0c'
441 +-0.532074 O b'-1:lemma:g/l'
442 +-0.535235 O b'-1:lemma:cra'
443 +-0.535605 pH b'postag:NN'
444 +-0.540053 O b'+1:lemma:cell'
445 +-0.542400 O b'-1:lemma:mid-log'
446 +-0.554731 O b'+1:lemma:Aerobic'
447 +-0.557223 OD b'+1:postag:NN'
448 +-0.577398 Temp b'-1:lemma:\xc2\xb0c'
449 +-0.586593 O b'+1:lemma:rep1'
450 +-0.589048 O b'-1:lemma:30'
451 +-0.595835 Med b'-1:postag:IN'
452 +-0.598295 O b'lemma:phase'
453 +-0.599331 Supp b'-1:lemma:%'
454 +-0.612222 O b'+1:lemma:\xc2\xb0c'
455 +-0.624875 O b'-1:lemma:affinity'
456 +-0.636676 OD b'lemma:-lrb-'
457 +-0.643469 O b'+1:postag:NNS'
458 +-0.649099 Temp b'postag:NN'
459 +-0.649456 O b'lemma:of'
460 +-0.658905 Gtype b'-1:postag:SYM'
461 +-0.660226 OD b'+1:lemma:and'
462 +-0.660293 O b'+1:lemma:c'
463 +-0.672111 O b'lemma:mg1655'
464 +-0.675080 Gtype b'lemma:delta'
465 +-0.677816 O b'lemma:dissolve'
466 +-0.678428 Supp b'+1:lemma:rifampicin'
467 +-0.684724 O b'-1:lemma:co2'
468 +-0.686280 Gtype b'-1:postag:DT'
469 +-0.694863 O b'-1:lemma:o2'
470 +-0.722306 Gtype b'+1:lemma:\xe2\x88\x86'
471 +-0.732614 O b'lemma:150'
472 +-0.732614 O b'+1:lemma:mg/ml'
473 +-0.761599 OD b'postag:-LRB-'
474 +-0.772588 Temp b'+1:lemma:to'
475 +-0.772588 Temp b'+1:postag:TO'
476 +-0.781297 O b'lemma:anaerobically'
477 +-0.807083 O b'+1:lemma:arginine'
478 +-0.819715 O b'-1:lemma:\xe2\x88\x86'
479 +-0.823821 O b'+1:lemma:0.3'
480 +-0.836002 O b'lemma:dpd'
481 +-0.858798 O b'+1:lemma:fecl2'
482 +-0.878330 Med b'+1:postag:IN'
483 +-0.883801 O b'-1:lemma:until'
484 +-0.890383 O b'lemma:purify'
485 +-0.893680 Anti b'+1:lemma:anti-fur'
486 +-0.905649 O b'lemma:anaerobic'
487 +-0.919723 O b'-1:lemma:0.2'
488 +-0.932910 O b'+1:lemma:300'
489 +-0.948417 O b'-1:lemma:ml'
490 +-0.951682 O b'-1:lemma:1'
491 +-0.955382 Air b'-1:postag:JJ'
492 +-0.960497 O b'lemma:\xe2\x88\x86'
493 +-0.979778 O b'-1:lemma:dissolve'
494 +-0.979778 O b'+1:lemma:methanol'
495 +-0.985663 O b'lemma:fecl2'
496 +-0.995333 O b'lemma:media'
497 +-1.037234 Phase b'-1:postag:JJ'
498 +-1.042219 O b'lemma:20'
499 +-1.048118 O b'lemma:k-12'
500 +-1.067143 Supp b'+1:lemma:acetate'
501 +-1.079445 O b'lemma:2h'
502 +-1.079445 O b'-1:lemma:additional'
503 +-1.084169 O b'+1:lemma:supplement'
504 +-1.104220 O b'lemma:0.1'
505 +-1.125319 O b'lemma:mid-log'
506 +-1.136189 O b'lemma:0.2'
507 +-1.136814 O b'-1:lemma:~'
508 +-1.142555 O b'lemma:0.3'
509 +-1.163967 O b'+1:lemma:at'
510 +-1.165833 O b'+1:lemma:in'
511 +-1.233659 O b'+1:postag:VBG'
512 +-1.234566 OD b'postag:JJ'
513 +-1.251923 O b'-1:postag:VBG'
514 +-1.261321 Phase b'postag:JJ'
515 +-1.282079 O b'+1:lemma:g/l'
516 +-1.318236 O b'lemma:\xce\xb4fur'
517 +-1.394008 O b'-1:lemma:ompr'
518 +-1.417347 O b'-1:lemma:sample'
519 +-1.574030 O b'lemma:37'
520 +-1.604365 Supp b'postag:JJ'
521 +-1.624714 O b'postag:VBP'
522 +-1.649157 O b'-1:postag::'
523 +-1.654774 Air b'postag:NN'
524 +-1.662186 O b'-1:lemma:rpob'
525 +-1.692863 Air b'+1:postag:JJ'
526 +-1.720211 Anti b'postag:NNP'
527 +-1.810759 O b'lemma:wt'
528 +-1.845010 O b'-1:lemma:37'
529 +-1.856437 O b'+1:lemma:+'
530 +-1.877902 O b'-1:lemma:IP'
531 +-1.987331 O b'-1:lemma:2'
532 +-2.039453 O b'lemma:methanol'
533 +-2.059494 O b'-1:lemma:nsrr'
534 +-2.091546 O b'+1:lemma:2'
535 +-2.107679 O b'lemma:rifampicin'
536 +-2.361875 O b'+1:lemma:hour'
537 +-2.588783 O b'+1:lemma:1'
538 +-3.600862 O b'-1:lemma:_'
539 +-3.688282 O b'-1:lemma::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.06622541546379261, 'c2': 0.005803516535443396}
5 +best CV score:0.806003784483002
6 +model size: 0.08M
7 +
8 +Flat F1: 0.8138835387377452
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.897 0.912 0.904 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.939 0.742 0.829 62
20 + Anti 0.571 0.444 0.500 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.876 0.802 0.837 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.886 0.684 0.772 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.901 0.758 0.814 480
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 6.855330
32 +OD -> OD 6.403256
33 +Temp -> Temp 5.890045
34 +Anti -> Anti 5.766779
35 +Med -> Med 5.604768
36 +Air -> Air 4.967782
37 +O -> O 4.848357
38 +Gversion -> Gversion 4.809684
39 +Gtype -> Gtype 4.697823
40 +Phase -> Phase 4.308635
41 +Supp -> Supp 4.041417
42 +Technique -> Technique 3.913590
43 +pH -> pH 2.570273
44 +O -> Supp 1.615671
45 +Substrain -> Gtype 1.499561
46 +Gtype -> Supp 1.421628
47 +O -> Technique 1.327277
48 +O -> Gtype 1.086040
49 +Air -> O 0.813429
50 +Technique -> Air 0.731728
51 +O -> Temp 0.588704
52 +O -> Anti 0.562550
53 +Gtype -> pH 0.228949
54 +Med -> O 0.224698
55 +O -> Gversion 0.210688
56 +Temp -> O 0.172212
57 +O -> Med 0.102170
58 +Gtype -> Air 0.095373
59 +OD -> Phase 0.013336
60 +Anti -> OD -0.004256
61 +O -> OD -0.004531
62 +Gtype -> Gversion -0.009932
63 +Phase -> O -0.017336
64 +O -> Phase -0.028487
65 +Phase -> Technique -0.066036
66 +Med -> Air -0.087994
67 +Gtype -> Phase -0.099971
68 +Supp -> Gversion -0.157806
69 +Anti -> O -0.169383
70 +Supp -> Temp -0.179380
71 +Phase -> Air -0.231554
72 +Supp -> OD -0.255465
73 +Supp -> Anti -0.267111
74 +Technique -> O -0.296565
75 +OD -> Med -0.297702
76 +Gtype -> Med -0.322957
77 +Technique -> pH -0.337491
78 +Supp -> O -0.351378
79 +OD -> Gtype -0.357504
80 +Gtype -> O -0.378451
81 +
82 +
83 +Top unlikely transitions:
84 +O -> Gtype 1.086040
85 +Air -> O 0.813429
86 +Technique -> Air 0.731728
87 +O -> Temp 0.588704
88 +O -> Anti 0.562550
89 +Gtype -> pH 0.228949
90 +Med -> O 0.224698
91 +O -> Gversion 0.210688
92 +Temp -> O 0.172212
93 +O -> Med 0.102170
94 +Gtype -> Air 0.095373
95 +OD -> Phase 0.013336
96 +Anti -> OD -0.004256
97 +O -> OD -0.004531
98 +Gtype -> Gversion -0.009932
99 +Phase -> O -0.017336
100 +O -> Phase -0.028487
101 +Phase -> Technique -0.066036
102 +Med -> Air -0.087994
103 +Gtype -> Phase -0.099971
104 +Supp -> Gversion -0.157806
105 +Anti -> O -0.169383
106 +Supp -> Temp -0.179380
107 +Phase -> Air -0.231554
108 +Supp -> OD -0.255465
109 +Supp -> Anti -0.267111
110 +Technique -> O -0.296565
111 +OD -> Med -0.297702
112 +Gtype -> Med -0.322957
113 +Technique -> pH -0.337491
114 +Supp -> O -0.351378
115 +OD -> Gtype -0.357504
116 +Gtype -> O -0.378451
117 +Supp -> Gtype -0.415119
118 +Supp -> Technique -0.546983
119 +Air -> Supp -0.580378
120 +Air -> OD -0.591958
121 +Technique -> Gtype -0.721938
122 +OD -> Air -0.869884
123 +Agit -> O -0.874071
124 +Technique -> OD -0.901946
125 +Gtype -> OD -0.968255
126 +Supp -> Air -0.992187
127 +OD -> O -1.076988
128 +Gtype -> Anti -1.093167
129 +O -> Air -1.094876
130 +Supp -> Med -1.350916
131 +Substrain -> O -1.483392
132 +Phase -> OD -2.458856
133 +Med -> Supp -2.624787
134 +
135 +
136 +Top positive:
137 +9.234903 Supp b'lemma:Iron'
138 +8.941229 Phase b'lemma:stationary'
139 +8.612848 O b'lemma:_'
140 +8.414915 Air b'lemma:anaerobic'
141 +8.110054 Air b'lemma:aerobic'
142 +7.668526 O b'lemma:1'
143 +7.617369 Supp b'lemma:nitrate'
144 +7.396629 Technique b'lemma:ChIP-exo'
145 +7.004694 O b'lemma:rpob'
146 +6.951208 Strain b'lemma:k-12'
147 +6.929222 Air b'-1:lemma:ChIP-Seq'
148 +6.919087 Supp b'lemma:pq'
149 +6.729357 Med b'lemma:MOPS'
150 +6.630951 Technique b'lemma:chipseq'
151 +6.338936 Gversion b'lemma:asm584v2'
152 +6.275492 O b'-1:lemma:tag'
153 +6.112382 Gtype b'lemma:\xce\xb4cra'
154 +6.001427 O b'lemma:3'
155 +5.997947 O b'lemma:2'
156 +5.921853 O b'lemma:\xcf\x8332'
157 +5.917190 Gtype b'lemma:flag-tag'
158 +5.917190 Gtype b'-1:lemma:c-terminal'
159 +5.881286 Substrain b'lemma:mg1655'
160 +5.839915 Phase b'lemma:mid-log'
161 +5.810496 Air b'lemma:Aerobic'
162 +5.761122 O b'lemma:Custom'
163 +5.570827 O b'lemma:rep1'
164 +5.482824 Med b'lemma:LB'
165 +5.430575 Supp b'+1:lemma:\xc2\xb5m'
166 +5.428880 O b'lemma:rep2'
167 +5.402169 O b'-1:lemma:ChIP-exo'
168 +5.366372 O b'lemma:Cra'
169 +5.339186 Supp b'lemma:nh4cl'
170 +5.259341 Gtype b'lemma:arca8myc'
171 +5.256666 Gtype b'lemma:type'
172 +5.250724 O b'lemma:rep3'
173 +5.250467 Med b'lemma:lb'
174 +5.157840 Supp b'lemma:rifampicin'
175 +5.124923 O b'lemma:b'
176 +5.111909 Gtype b'lemma:delta-arca'
177 +5.091556 OD b'+1:lemma:stationary'
178 +5.048220 Supp b'lemma:glucose'
179 +5.001772 O b'postag:IN'
180 +4.994648 Gversion b'lemma:nc'
181 +4.925252 Gtype b'lemma:fnr8myc'
182 +4.905784 OD b'lemma:od450'
183 +4.901181 Gtype b'+1:lemma:type'
184 +4.837803 Technique b'lemma:rna-seq'
185 +4.828361 Gtype b'lemma:wt'
186 +4.778406 O b'lemma:a'
187 +4.678498 Supp b'lemma:acetate'
188 +4.667864 Anti b'lemma:none'
189 +4.612775 Technique b'lemma:rnaseq'
190 +4.597784 Supp b'-1:lemma:Cra'
191 +4.596321 Technique b'lemma:ChIP-Seq'
192 +4.580488 O b'-1:lemma:0.3-0.35'
193 +4.559265 Gtype b'-1:lemma:\xe2\x88\x86'
194 +4.543048 Supp b'lemma:no3'
195 +4.494990 Supp b'lemma:Fe'
196 +4.475916 O b'lemma:-'
197 +4.465899 Anti b'lemma:seqa'
198 +4.445546 Gtype b'lemma:delta-fnr'
199 +4.372585 OD b'lemma:od600'
200 +4.346955 Technique b'lemma:chip-seq'
201 +4.301785 O b'lemma:or'
202 +4.260295 Gtype b'lemma:\xce\xb4fur'
203 +4.236090 O b'lemma:for'
204 +4.225287 O b'lemma:.'
205 +4.225287 O b'postag:.'
206 +4.161720 Supp b'lemma:dpd'
207 +4.122022 O b'+1:lemma:od600'
208 +4.073219 Vess b'lemma:flask'
209 +4.073219 Vess b'-1:lemma:warm'
210 +4.068384 O b'-1:lemma:anaerobic'
211 +4.063188 Supp b'lemma:arginine'
212 +4.026011 O b'-1:lemma:Aerobic'
213 +4.023826 O b'postag::'
214 +4.014353 Med b'+1:lemma:0.4'
215 +3.983642 O b'lemma:chip'
216 +3.937745 O b'lemma:CEL'
217 +3.912596 Supp b'+1:lemma:1'
218 +3.911905 Anti b'lemma:anti-myc'
219 +3.908162 O b'-1:lemma:glucose'
220 +3.843811 Supp b'lemma:fructose'
221 +3.821238 Gtype b'lemma:nsrr'
222 +3.805915 Gversion b'-1:lemma:nc'
223 +3.773446 Gversion b'lemma:chip-seq'
224 +3.714736 Gtype b'lemma:\xce\xb4ompr'
225 +3.700490 O b'-1:lemma:type'
226 +3.624739 O b'+1:postag:RB'
227 +3.602969 Strain b'+1:lemma:substr'
228 +3.596045 Med b'+1:lemma:minimal'
229 +3.588702 O b'-1:lemma:lb'
230 +3.537935 O b'+1:lemma:o.d.'
231 +3.533153 Gversion b'lemma:000913'
232 +3.531194 Temp b'lemma:\xc2\xb0c'
233 +3.520259 pH b'lemma:ph5'
234 +3.520259 pH b'+1:lemma:.5'
235 +3.511450 Gtype b'lemma:\xe2\x88\x86'
236 +3.509713 Technique b'-1:lemma:IP'
237 +3.496996 Med b'lemma:m63'
238 +3.478233 Anti b'+1:lemma:antibody'
239 +3.475022 Temp b'-1:lemma:sample'
240 +3.455068 Supp b'-1:lemma:+'
241 +3.449758 Anti b'lemma:anti-rpos'
242 +3.439491 Supp b'+1:lemma:2'
243 +3.415465 O b'lemma:oxyr'
244 +3.385223 Gtype b'lemma:wild-type'
245 +3.352842 O b'postag:VBN'
246 +3.343983 Gtype b'+1:lemma:ph5'
247 +3.343718 Air b'postag:RB'
248 +3.343342 Supp b'lemma:nacl'
249 +3.321016 Gtype b'-1:lemma:ptac'
250 +3.317465 Gtype b'-1:lemma:rpob'
251 +3.298959 O b'lemma:with'
252 +3.236711 O b'lemma:s'
253 +3.233948 Temp b'-1:lemma:\xcf\x8332'
254 +3.232171 Supp b'+1:lemma:hour'
255 +3.226682 O b'-1:lemma:0.3'
256 +3.213829 Gversion b'lemma:u00096'
257 +3.213829 Gversion b'+1:lemma:.2'
258 +3.176657 O b'lemma:ompr'
259 +3.173449 Gversion b'lemma:.2'
260 +3.173449 Gversion b'-1:lemma:u00096'
261 +3.148343 Gtype b'+1:lemma:with'
262 +3.113186 O b'lemma:at'
263 +3.099394 Gtype b'lemma:\xce\xb4soxs'
264 +3.081492 Gtype b'lemma:pk4854'
265 +3.081097 Supp b'+1:lemma:_'
266 +3.076137 Air b'lemma:Anaerobic'
267 +3.073681 O b'lemma:Lrp'
268 +3.064586 Phase b'+1:lemma:for'
269 +3.047746 O b'+1:lemma:chip-seq'
270 +3.047210 Gtype b'lemma:ptac'
271 +3.045585 Phase b'-1:lemma:until'
272 +3.043072 O b'-1:lemma:dpd'
273 +3.032395 Gtype b'lemma:WT'
274 +3.021945 Technique b'+1:lemma:chip-exo'
275 +3.010904 Supp b'lemma:iptg'
276 +3.001563 Anti b'+1:lemma:polyclonal'
277 +2.979219 Supp b'lemma:Leu'
278 +2.976675 O b'lemma:chip-arca'
279 +2.972333 Gtype b'lemma:deltaseqa'
280 +2.972333 Gtype b'-1:lemma:old'
281 +2.971020 O b'lemma:affyexp'
282 +2.960652 Air b'lemma:anerobically'
283 +2.938684 Supp b'+1:lemma:Deficient'
284 +2.913605 Technique b'-1:lemma:chip-exo'
285 +2.890017 O b'lemma:argr'
286 +2.883731 O b'+1:lemma:sparging'
287 +2.880763 Temp b'-1:lemma:43'
288 +2.874818 Med b'-1:lemma:ml'
289 +2.860187 pH b'lemma:.5'
290 +2.860187 pH b'-1:lemma:ph5'
291 +2.852362 Gtype b'+1:lemma:pq'
292 +2.822626 O b'lemma:soxs'
293 +2.822626 O b'lemma:soxr'
294 +2.796727 Temp b'lemma:43'
295 +2.764833 Med b'lemma:L'
296 +2.764833 Med b'+1:lemma:broth'
297 +2.762054 O b'+1:postag:NNP'
298 +2.755377 O b'-1:lemma:stpa'
299 +2.737365 Supp b'-1:lemma:\xc2\xb5m'
300 +2.727294 Temp b'lemma:37'
301 +2.708825 Med b'-1:lemma:LB'
302 +2.705181 O b'-1:lemma:\xc2\xb0c'
303 +2.692331 O b'+1:lemma:pq'
304 +2.691434 Substrain b'+1:lemma:phtpg'
305 +2.690410 O b'lemma:purr'
306 +2.677844 Technique b'+1:lemma:rna-seq'
307 +2.650316 Technique b'-1:lemma:input'
308 +2.638625 Supp b'lemma:methanol'
309 +2.634944 Med b'+1:lemma:contain'
310 +2.625097 Supp b'lemma:Adenine'
311 +2.623356 O b'-1:lemma:l1'
312 +2.607859 O b'postag:DT'
313 +2.598485 Med b'+1:lemma:2.0'
314 +2.589920 Gtype b'-1:lemma:phtpg'
315 +2.581102 Med b'lemma:glucose'
316 +2.575292 Temp b'-1:lemma:37'
317 +2.558206 Gtype b'lemma:\xce\xb4soxr'
318 +2.550759 Gtype b'+1:lemma:flagtag'
319 +2.550220 Gtype b'lemma:\xce\xb4oxyr'
320 +2.549389 Gtype b'+1:lemma:aerobic'
321 +2.517172 Supp b'lemma:leucine'
322 +2.411127 O b'-1:lemma:media'
323 +2.376812 O b'lemma:2-3'
324 +2.368251 Med b'lemma:broth'
325 +2.368251 Med b'-1:lemma:L'
326 +2.353760 Gtype b'-1:lemma:_'
327 +2.331107 O b'lemma:culture'
328 +2.315361 Med b'postag:NNP'
329 +2.299371 O b'lemma:Fur'
330 +2.288936 Supp b'+1:lemma:iptg'
331 +2.280717 Air b'lemma:anaerobically'
332 +2.278786 Air b'-1:lemma:-'
333 +2.266514 O b'lemma:genotype/variation'
334 +2.259722 OD b'lemma:0.3'
335 +2.256007 Technique b'postag:NNP'
336 +2.224072 Air b'+1:lemma:at'
337 +
338 +
339 +Top negative:
340 +-0.002409 Med b'lemma:-lrb-'
341 +-0.002765 O b'+1:lemma:minimal'
342 +-0.002819 Med b'postag:-LRB-'
343 +-0.002883 Supp b'-1:lemma:-'
344 +-0.003857 Agit b'postag:NN'
345 +-0.005100 Air b'-1:lemma:anaerobically'
346 +-0.007289 O b'lemma:lb'
347 +-0.010248 O b'lemma:mg/ml'
348 +-0.010248 O b'-1:lemma:150'
349 +-0.010774 Supp b'+1:postag:CD'
350 +-0.011526 Air b'-1:lemma:95'
351 +-0.011818 Air b'-1:postag:CC'
352 +-0.012619 Supp b'lemma:mm'
353 +-0.013393 O b'-1:lemma:1'
354 +-0.013525 O b'+1:lemma:rep1'
355 +-0.013581 Gtype b'-1:postag:SYM'
356 +-0.013782 OD b'+1:lemma:-lrb-'
357 +-0.015901 OD b'-1:postag:DT'
358 +-0.015963 O b'-1:lemma:70'
359 +-0.016924 OD b'+1:postag:-LRB-'
360 +-0.019258 O b'lemma:m63'
361 +-0.020349 Gversion b'-1:postag:NN'
362 +-0.021440 OD b'+1:lemma:0.4'
363 +-0.022594 Air b'-1:lemma:or'
364 +-0.022941 OD b'-1:lemma:a'
365 +-0.026771 O b'lemma:glucose'
366 +-0.027346 O b'+1:lemma:25'
367 +-0.028054 OD b'lemma:~'
368 +-0.029216 Gtype b'+1:lemma:cra'
369 +-0.030932 Gtype b'-1:postag:NN'
370 +-0.031890 O b'+1:lemma:~'
371 +-0.033995 O b'+1:lemma:95'
372 +-0.035517 O b'-1:lemma:-lrb-'
373 +-0.036049 O b'lemma:pahse'
374 +-0.036135 O b'lemma:purify'
375 +-0.036681 Gtype b'-1:postag:CD'
376 +-0.038170 Med b'-1:lemma:m63'
377 +-0.042355 O b'+1:lemma:antibody'
378 +-0.042769 O b'+1:lemma:0.4'
379 +-0.043812 O b'-1:lemma:25'
380 +-0.050331 Phase b'-1:lemma:at'
381 +-0.051370 O b'+1:lemma:5'
382 +-0.052602 Phase b'+1:postag:NN'
383 +-0.053544 OD b'+1:postag:CD'
384 +-0.055596 O b'+1:lemma:o2'
385 +-0.056365 O b'lemma:25'
386 +-0.065474 O b'-1:lemma:o2'
387 +-0.070061 Gtype b'-1:postag:DT'
388 +-0.070234 O b'-1:lemma:the'
389 +-0.072402 O b'+1:lemma:shake'
390 +-0.079220 Strain b'+1:postag:NN'
391 +-0.079451 O b'+1:lemma:grow'
392 +-0.084137 Gtype b'lemma:_'
393 +-0.085976 O b'+1:lemma:mm'
394 +-0.093102 Med b'+1:postag:CC'
395 +-0.095082 Air b'postag:CD'
396 +-0.097194 O b'lemma:o2'
397 +-0.101822 O b'-1:lemma:30'
398 +-0.102523 O b'-1:postag:VBN'
399 +-0.107653 O b'lemma:cell'
400 +-0.112633 Supp b'-1:postag:NN'
401 +-0.116970 O b'lemma:grow'
402 +-0.120130 O b'lemma:30'
403 +-0.121820 Supp b'+1:postag:IN'
404 +-0.124237 Supp b'-1:postag:NNP'
405 +-0.125482 O b'-1:lemma:mm'
406 +-0.128019 O b'+1:lemma:300'
407 +-0.129344 O b'lemma:\xc2\xb0c'
408 +-0.144557 Med b'-1:postag:NN'
409 +-0.144770 O b'-1:postag:IN'
410 +-0.146162 O b'-1:lemma:of'
411 +-0.149674 Gtype b'+1:lemma:\xe2\x88\x86'
412 +-0.154228 OD b'postag:JJ'
413 +-0.167420 O b'lemma:n2'
414 +-0.171707 O b'lemma:co2'
415 +-0.172528 O b'lemma:medium'
416 +-0.179654 Med b'+1:postag:NNS'
417 +-0.184135 O b'-1:lemma:n2'
418 +-0.189770 O b'+1:lemma:-rrb-'
419 +-0.192678 O b'lemma:aerobically'
420 +-0.195762 O b'-1:lemma:with'
421 +-0.196208 O b'-1:lemma:ml'
422 +-0.204813 Gtype b'-1:lemma:mg1655'
423 +-0.205462 O b'lemma:mg1655'
424 +-0.206806 O b'-1:lemma:e.'
425 +-0.207893 O b'lemma:minimal'
426 +-0.224766 Temp b'postag:JJ'
427 +-0.225885 O b'+1:lemma:.'
428 +-0.225885 O b'+1:postag:.'
429 +-0.227519 O b'lemma:e.'
430 +-0.236166 O b'-1:lemma:rifampicin'
431 +-0.247403 O b'-1:lemma:0.2'
432 +-0.259717 Phase b'-1:postag:NN'
433 +-0.263053 Supp b'lemma:and'
434 +-0.270443 O b'-1:postag:-LRB-'
435 +-0.283380 O b'-1:lemma:um'
436 +-0.283380 O b'+1:lemma:paraquat'
437 +-0.289228 Med b'postag:CD'
438 +-0.290905 O b'-1:lemma:0.1'
439 +-0.295102 OD b'+1:lemma:and'
440 +-0.300641 O b'-1:lemma:until'
441 +-0.306863 O b'+1:lemma:phase'
442 +-0.322631 O b'lemma:phase'
443 +-0.347838 O b'+1:lemma:until'
444 +-0.351130 O b'+1:lemma:_'
445 +-0.365836 O b'-1:lemma:\xe2\x88\x86'
446 +-0.367298 Anti b'+1:postag:JJ'
447 +-0.375233 Gtype b'postag:CD'
448 +-0.377830 Supp b'postag:CC'
449 +-0.381771 O b'postag:RB'
450 +-0.385165 Technique b'-1:lemma::'
451 +-0.387729 O b'+1:postag:IN'
452 +-0.405861 O b'+1:postag:-RRB-'
453 +-0.419130 O b'-1:lemma:from'
454 +-0.425366 O b'-1:lemma:od600'
455 +-0.432532 O b'-1:lemma:cra'
456 +-0.441409 Temp b'postag:NN'
457 +-0.447218 O b'+1:lemma:arginine'
458 +-0.454986 O b'+1:lemma:%'
459 +-0.469664 Med b'+1:postag:IN'
460 +-0.481296 Technique b'-1:postag::'
461 +-0.482628 Gversion b'+1:postag:NN'
462 +-0.484056 O b'-1:lemma:grow'
463 +-0.485046 pH b'postag:NN'
464 +-0.490017 Med b'+1:postag:NN'
465 +-0.496154 O b'lemma:od600'
466 +-0.500947 Supp b'+1:postag:VBN'
467 +-0.517928 O b'+1:lemma:cell'
468 +-0.520851 O b'-1:lemma:sample'
469 +-0.520873 OD b'+1:postag:CC'
470 +-0.521577 O b'+1:lemma:\xc2\xb0c'
471 +-0.525190 Anti b'+1:lemma:anti-fur'
472 +-0.527700 O b'-1:lemma:mid-log'
473 +-0.533028 O b'lemma:dissolve'
474 +-0.550892 O b'lemma:150'
475 +-0.550892 O b'+1:lemma:mg/ml'
476 +-0.551731 Phase b'-1:postag:JJ'
477 +-0.571169 O b'+1:lemma:0.3'
478 +-0.600234 O b'postag:VBP'
479 +-0.623446 O b'lemma:anaerobic'
480 +-0.655760 Gtype b'lemma:delta'
481 +-0.668609 Supp b'+1:lemma:rifampicin'
482 +-0.673715 O b'lemma:\xce\xb4fur'
483 +-0.724036 OD b'+1:postag:NN'
484 +-0.742000 O b'+1:postag:NNS'
485 +-0.743016 O b'+1:lemma:c'
486 +-0.767018 O b'-1:lemma:~'
487 +-0.767765 Air b'-1:postag:JJ'
488 +-0.779655 O b'lemma:media'
489 +-0.790170 O b'lemma:\xe2\x88\x86'
490 +-0.816065 O b'lemma:fecl2'
491 +-0.817183 O b'lemma:dpd'
492 +-0.835270 O b'lemma:0.3'
493 +-0.843288 OD b'lemma:-lrb-'
494 +-0.853171 O b'+1:lemma:fecl2'
495 +-0.889568 O b'lemma:0.2'
496 +-0.905209 OD b'postag:-LRB-'
497 +-0.911568 O b'lemma:anaerobically'
498 +-0.915420 O b'lemma:of'
499 +-0.950583 Temp b'+1:lemma:to'
500 +-0.950583 Temp b'+1:postag:TO'
501 +-0.951305 O b'lemma:20'
502 +-0.951858 O b'-1:lemma:co2'
503 +-1.045949 O b'lemma:0.1'
504 +-1.056174 O b'+1:lemma:supplement'
505 +-1.088532 Supp b'+1:lemma:acetate'
506 +-1.092708 O b'+1:postag:VBG'
507 +-1.103190 O b'-1:postag:VBG'
508 +-1.119328 O b'lemma:2h'
509 +-1.119328 O b'-1:lemma:additional'
510 +-1.131219 Supp b'-1:lemma:%'
511 +-1.142227 O b'-1:lemma:rpob'
512 +-1.149700 O b'lemma:mid-log'
513 +-1.157098 Med b'-1:postag:IN'
514 +-1.158833 O b'-1:lemma:dissolve'
515 +-1.158833 O b'+1:lemma:methanol'
516 +-1.159433 O b'-1:postag::'
517 +-1.197832 Air b'postag:NN'
518 +-1.224684 O b'-1:lemma:37'
519 +-1.250547 O b'+1:lemma:g/l'
520 +-1.287510 O b'+1:lemma:at'
521 +-1.343885 O b'+1:lemma:+'
522 +-1.550316 Air b'+1:postag:JJ'
523 +-1.550420 O b'+1:lemma:in'
524 +-1.655179 O b'-1:lemma:IP'
525 +-1.671646 Anti b'postag:NNP'
526 +-1.676942 O b'lemma:37'
527 +-1.709933 O b'lemma:rifampicin'
528 +-1.740594 O b'-1:lemma:nsrr'
529 +-1.813332 O b'lemma:wt'
530 +-1.837591 Supp b'postag:JJ'
531 +-1.875538 O b'-1:lemma:ompr'
532 +-1.956251 O b'+1:lemma:hour'
533 +-2.104216 O b'-1:lemma:2'
534 +-2.173171 Phase b'postag:JJ'
535 +-2.198016 O b'+1:lemma:2'
536 +-2.302343 O b'lemma:methanol'
537 +-2.686054 O b'+1:lemma:1'
538 +-3.664514 O b'-1:lemma:_'
539 +-4.711592 O b'-1:lemma::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.15174828379035918, 'c2': 0.004631150546332649}
5 +best CV score:0.8039437168420447
6 +model size: 0.06M
7 +
8 +Flat F1: 0.7844164550442063
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.405 0.577 37
12 + pH 1.000 1.000 1.000 12
13 + Technique 0.952 0.909 0.930 22
14 + Med 0.800 0.842 0.821 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.947 0.973 19
19 + Air 0.807 0.742 0.773 62
20 + Anti 0.571 0.444 0.500 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.866 0.792 0.828 106
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.807 0.676 0.736 136
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.848 0.746 0.784 480
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 6.791682
32 +OD -> OD 6.218349
33 +Med -> Med 5.630081
34 +Temp -> Temp 5.606252
35 +Anti -> Anti 5.592992
36 +Air -> Air 5.496160
37 +Gtype -> Gtype 5.032489
38 +O -> O 4.908086
39 +Phase -> Phase 4.730701
40 +Gversion -> Gversion 4.476840
41 +Technique -> Technique 4.434337
42 +Supp -> Supp 4.084657
43 +pH -> pH 1.968754
44 +Substrain -> Gtype 1.897449
45 +O -> Supp 1.725753
46 +Air -> O 1.650897
47 +O -> Gtype 1.589697
48 +Gtype -> Supp 1.521503
49 +O -> Technique 1.404884
50 +Technique -> Air 0.905351
51 +O -> Temp 0.873962
52 +O -> Anti 0.719006
53 +Med -> O 0.502394
54 +O -> Gversion 0.498028
55 +Temp -> O 0.179242
56 +OD -> Phase 0.164045
57 +O -> Phase 0.156883
58 +Phase -> O 0.137263
59 +Gtype -> Air 0.075321
60 +Gtype -> pH 0.063986
61 +O -> Med 0.051856
62 +O -> Strain 0.030496
63 +Supp -> O 0.016375
64 +Anti -> O 0.009641
65 +O -> OD -0.006635
66 +OD -> Med -0.051166
67 +Technique -> OD -0.093116
68 +Supp -> OD -0.100162
69 +Supp -> Air -0.273856
70 +Gtype -> O -0.276598
71 +Technique -> Gtype -0.372736
72 +Gtype -> Anti -0.373755
73 +Gtype -> Med -0.384161
74 +OD -> O -0.433058
75 +Agit -> O -0.645946
76 +O -> Air -0.659946
77 +Substrain -> O -0.912411
78 +Gtype -> OD -0.984268
79 +Supp -> Med -1.212185
80 +Med -> Supp -1.967819
81 +
82 +
83 +Top unlikely transitions:
84 +OD -> OD 6.218349
85 +Med -> Med 5.630081
86 +Temp -> Temp 5.606252
87 +Anti -> Anti 5.592992
88 +Air -> Air 5.496160
89 +Gtype -> Gtype 5.032489
90 +O -> O 4.908086
91 +Phase -> Phase 4.730701
92 +Gversion -> Gversion 4.476840
93 +Technique -> Technique 4.434337
94 +Supp -> Supp 4.084657
95 +pH -> pH 1.968754
96 +Substrain -> Gtype 1.897449
97 +O -> Supp 1.725753
98 +Air -> O 1.650897
99 +O -> Gtype 1.589697
100 +Gtype -> Supp 1.521503
101 +O -> Technique 1.404884
102 +Technique -> Air 0.905351
103 +O -> Temp 0.873962
104 +O -> Anti 0.719006
105 +Med -> O 0.502394
106 +O -> Gversion 0.498028
107 +Temp -> O 0.179242
108 +OD -> Phase 0.164045
109 +O -> Phase 0.156883
110 +Phase -> O 0.137263
111 +Gtype -> Air 0.075321
112 +Gtype -> pH 0.063986
113 +O -> Med 0.051856
114 +O -> Strain 0.030496
115 +Supp -> O 0.016375
116 +Anti -> O 0.009641
117 +O -> OD -0.006635
118 +OD -> Med -0.051166
119 +Technique -> OD -0.093116
120 +Supp -> OD -0.100162
121 +Supp -> Air -0.273856
122 +Gtype -> O -0.276598
123 +Technique -> Gtype -0.372736
124 +Gtype -> Anti -0.373755
125 +Gtype -> Med -0.384161
126 +OD -> O -0.433058
127 +Agit -> O -0.645946
128 +O -> Air -0.659946
129 +Substrain -> O -0.912411
130 +Gtype -> OD -0.984268
131 +Supp -> Med -1.212185
132 +Med -> Supp -1.967819
133 +Phase -> OD -2.190300
134 +
135 +
136 +Top positive:
137 +10.442312 Supp b'lemma:Iron'
138 +9.237513 O b'lemma:_'
139 +8.066306 Phase b'lemma:stationary'
140 +8.024906 Air b'lemma:aerobic'
141 +7.908903 O b'lemma:1'
142 +7.889018 Supp b'lemma:nitrate'
143 +7.674820 Air b'lemma:anaerobic'
144 +7.633175 Technique b'lemma:ChIP-exo'
145 +6.899479 Strain b'lemma:k-12'
146 +6.830547 Med b'lemma:MOPS'
147 +6.657016 O b'lemma:rpob'
148 +6.360670 O b'lemma:2'
149 +6.250166 Gtype b'lemma:type'
150 +6.190584 Substrain b'lemma:mg1655'
151 +6.146067 Air b'-1:lemma:ChIP-Seq'
152 +6.024964 Gversion b'lemma:asm584v2'
153 +5.999859 Technique b'lemma:chipseq'
154 +5.989574 Phase b'lemma:mid-log'
155 +5.930186 Supp b'lemma:pq'
156 +5.914739 O b'lemma:3'
157 +5.824268 O b'lemma:Custom'
158 +5.823157 O b'-1:lemma:tag'
159 +5.790768 O b'-1:lemma:ChIP-exo'
160 +5.787399 Gtype b'lemma:\xce\xb4cra'
161 +5.679226 Air b'lemma:Aerobic'
162 +5.626571 OD b'lemma:od600'
163 +5.547367 Technique b'lemma:ChIP-Seq'
164 +5.529761 O b'lemma:rep2'
165 +5.480516 Med b'lemma:LB'
166 +5.426406 O b'lemma:rep1'
167 +5.393841 Gversion b'lemma:nc'
168 +5.303166 Gtype b'lemma:flag-tag'
169 +5.303166 Gtype b'-1:lemma:c-terminal'
170 +5.281475 Supp b'+1:lemma:\xc2\xb5m'
171 +5.272063 O b'lemma:b'
172 +5.200310 O b'lemma:rep3'
173 +5.142193 OD b'lemma:od450'
174 +5.096163 O b'lemma:\xcf\x8332'
175 +4.990214 O b'lemma:Cra'
176 +4.926222 O b'lemma:a'
177 +4.919918 Gtype b'-1:lemma:\xe2\x88\x86'
178 +4.861544 Supp b'lemma:nh4cl'
179 +4.812167 Gtype b'+1:lemma:type'
180 +4.803668 Gtype b'lemma:wt'
181 +4.778995 Gtype b'lemma:delta-arca'
182 +4.686613 Gtype b'lemma:arca8myc'
183 +4.679240 O b'postag::'
184 +4.678684 O b'postag:IN'
185 +4.626493 OD b'+1:lemma:stationary'
186 +4.555800 Med b'lemma:lb'
187 +4.525977 Gversion b'-1:lemma:nc'
188 +4.514336 Supp b'lemma:glucose'
189 +4.505232 Supp b'lemma:Fe'
190 +4.499806 Gtype b'lemma:fnr8myc'
191 +4.365930 Technique b'lemma:rna-seq'
192 +4.353053 Supp b'lemma:rifampicin'
193 +4.322748 Gtype b'lemma:\xce\xb4fur'
194 +4.244815 Anti b'lemma:none'
195 +4.188590 O b'-1:lemma:Aerobic'
196 +4.181353 Temp b'-1:lemma:sample'
197 +4.061718 Supp b'lemma:dpd'
198 +4.014721 Gtype b'lemma:delta-fnr'
199 +3.991603 Technique b'lemma:rnaseq'
200 +3.988938 O b'lemma:-'
201 +3.968688 O b'-1:lemma:0.3-0.35'
202 +3.942988 Technique b'lemma:chip-seq'
203 +3.894918 Supp b'lemma:acetate'
204 +3.894666 O b'lemma:or'
205 +3.891632 O b'lemma:.'
206 +3.891632 O b'postag:.'
207 +3.833478 Supp b'lemma:arginine'
208 +3.829349 Supp b'lemma:no3'
209 +3.814064 Temp b'-1:lemma:\xcf\x8332'
210 +3.734701 Gtype b'+1:lemma:ph5'
211 +3.730485 Gtype b'lemma:\xce\xb4ompr'
212 +3.716590 Anti b'lemma:seqa'
213 +3.692742 Gtype b'lemma:wild-type'
214 +3.691658 Vess b'lemma:flask'
215 +3.691658 Vess b'-1:lemma:warm'
216 +3.680323 O b'+1:lemma:od600'
217 +3.566813 pH b'lemma:ph5'
218 +3.566813 pH b'+1:lemma:.5'
219 +3.560406 Gtype b'lemma:nsrr'
220 +3.535215 O b'-1:lemma:type'
221 +3.524717 O b'+1:postag:RB'
222 +3.508650 Supp b'lemma:fructose'
223 +3.483038 O b'lemma:chip'
224 +3.461944 Supp b'-1:lemma:Cra'
225 +3.457426 Strain b'+1:lemma:substr'
226 +3.436475 Supp b'+1:lemma:1'
227 +3.379628 Anti b'lemma:anti-myc'
228 +3.377053 Anti b'+1:lemma:antibody'
229 +3.370766 Technique b'-1:lemma:IP'
230 +3.365056 Gtype b'lemma:\xe2\x88\x86'
231 +3.358640 O b'-1:lemma:anaerobic'
232 +3.348760 O b'lemma:CEL'
233 +3.347149 Med b'+1:lemma:0.4'
234 +3.346755 Anti b'lemma:anti-rpos'
235 +3.291412 Gtype b'-1:lemma:ptac'
236 +3.278055 Gversion b'lemma:chip-seq'
237 +3.276131 O b'-1:lemma:0.3'
238 +3.234361 O b'-1:lemma:glucose'
239 +3.215739 O b'lemma:with'
240 +3.173824 Technique b'+1:lemma:chip-exo'
241 +3.151844 Med b'lemma:m63'
242 +3.095334 O b'lemma:2-3'
243 +3.095161 Gversion b'lemma:.2'
244 +3.095161 Gversion b'-1:lemma:u00096'
245 +3.091572 O b'postag:VBN'
246 +3.064702 Gversion b'lemma:u00096'
247 +3.064702 Gversion b'+1:lemma:.2'
248 +3.050277 Temp b'-1:lemma:37'
249 +3.030666 Gtype b'+1:lemma:with'
250 +3.027065 Supp b'+1:lemma:hour'
251 +3.014173 O b'+1:lemma:chip-seq'
252 +3.004960 Gtype b'+1:lemma:flagtag'
253 +2.985168 O b'lemma:for'
254 +2.975989 Supp b'-1:lemma:+'
255 +2.944441 O b'lemma:oxyr'
256 +2.938101 Gtype b'-1:lemma:phtpg'
257 +2.932689 Gtype b'-1:lemma:rpob'
258 +2.920019 Gtype b'+1:lemma:pq'
259 +2.881207 Supp b'lemma:nacl'
260 +2.860299 O b'lemma:s'
261 +2.845172 Supp b'+1:lemma:2'
262 +2.832035 pH b'lemma:.5'
263 +2.832035 pH b'-1:lemma:ph5'
264 +2.826929 Supp b'lemma:Leu'
265 +2.812269 O b'+1:postag:NNP'
266 +2.781059 Air b'lemma:anaerobically'
267 +2.778609 O b'-1:lemma:lb'
268 +2.760280 O b'+1:lemma:o.d.'
269 +2.735776 Temp b'lemma:\xc2\xb0c'
270 +2.720218 O b'lemma:at'
271 +2.711579 Technique b'-1:lemma:chip-exo'
272 +2.699401 O b'lemma:ompr'
273 +2.690406 Air b'-1:lemma:-'
274 +2.684684 Gtype b'lemma:\xce\xb4soxs'
275 +2.665215 Supp b'+1:lemma:_'
276 +2.660630 Phase b'-1:lemma:until'
277 +2.607235 Med b'lemma:L'
278 +2.607235 Med b'+1:lemma:broth'
279 +2.606476 Med b'+1:lemma:minimal'
280 +2.603782 O b'+1:lemma:pq'
281 +2.599075 Supp b'-1:lemma:\xc2\xb5m'
282 +2.567370 O b'-1:lemma:\xc2\xb0c'
283 +2.524994 Supp b'lemma:Adenine'
284 +2.522705 Gtype b'lemma:deltaseqa'
285 +2.522705 Gtype b'-1:lemma:old'
286 +2.484836 Temp b'-1:lemma:43'
287 +2.470532 Supp b'lemma:20'
288 +2.469573 O b'lemma:culture'
289 +2.450105 Technique b'-1:lemma:input'
290 +2.449141 O b'postag:DT'
291 +2.444936 Med b'+1:lemma:2.0'
292 +2.421299 Med b'-1:lemma:ml'
293 +2.419343 Temp b'lemma:43'
294 +2.410866 O b'+1:lemma:mid-log'
295 +2.405996 Supp b'lemma:iptg'
296 +2.397598 Air b'postag:RB'
297 +2.397388 Med b'+1:lemma:g/l'
298 +2.360509 Gtype b'-1:lemma:nsrr'
299 +2.349789 O b'lemma:condition'
300 +2.348126 Gtype b'lemma:ptac'
301 +2.347726 Gversion b'lemma:000913'
302 +2.337057 O b'lemma:chip-arca'
303 +2.336687 Gtype b'lemma:pk4854'
304 +2.327964 Med b'lemma:media'
305 +2.272835 O b'-1:lemma:stpa'
306 +2.265750 O b'lemma:Lrp'
307 +2.265406 O b'lemma:soxs'
308 +2.265406 O b'lemma:soxr'
309 +2.262228 O b'postag:VBG'
310 +2.253322 O b'-1:lemma:l1'
311 +2.237857 Air b'lemma:anerobically'
312 +2.184062 O b'-1:lemma:media'
313 +2.159204 Supp b'-1:lemma:with'
314 +2.158856 O b'lemma:genotype/variation'
315 +2.157878 O b'-1:lemma:dpd'
316 +2.152540 Gtype b'lemma:dfnr'
317 +2.148134 O b'lemma:affyexp'
318 +2.137699 O b'postag:CC'
319 +2.111648 Supp b'lemma:methanol'
320 +2.108494 O b'+1:postag:VBP'
321 +2.075540 OD b'lemma:phase'
322 +2.075417 Gversion b'postag:CD'
323 +2.068323 Supp b'lemma:0.2'
324 +2.064252 Gtype b'-1:lemma:vector'
325 +2.056563 OD b'lemma:0.3'
326 +2.050748 OD b'-1:lemma:~'
327 +2.050625 Gtype b'lemma:WT'
328 +2.032571 Temp b'lemma:37'
329 +2.018067 Gtype b'+1:lemma:_'
330 +2.017840 O b'lemma:Fur'
331 +2.011856 O b'lemma:argr'
332 +2.006861 Technique b'+1:lemma:rna-seq'
333 +1.998918 O b'lemma:purr'
334 +1.996704 Phase b'+1:lemma:for'
335 +1.989638 Med b'lemma:glucose'
336 +1.943868 Phase b'-1:lemma:mid-log'
337 +
338 +
339 +Top negative:
340 +0.000312 OD b'-1:lemma:mid-log'
341 +0.000096 OD b'-1:postag:VBN'
342 +0.000087 Gtype b'-1:lemma:small'
343 +0.000087 Med b'-1:lemma:g/l'
344 +0.000084 Agit b'+1:postag:NN'
345 +0.000056 O b'+1:lemma:ChIP-Seq'
346 +0.000053 Supp b'-1:lemma:1g/l'
347 +0.000051 O b'lemma:hour'
348 +0.000051 Gtype b'lemma:cra'
349 +0.000036 OD b'lemma:and'
350 +0.000021 Temp b'lemma:-lrb-'
351 +0.000021 Temp b'postag:-LRB-'
352 +0.000019 O b'-1:lemma:l2'
353 +0.000014 Technique b'+1:lemma:-rrb-'
354 +0.000014 Technique b'+1:postag:-RRB-'
355 +0.000007 Gtype b'lemma:small'
356 +-0.000002 Gversion b'-1:postag:NN'
357 +-0.000046 O b'-1:lemma:g/l'
358 +-0.000070 Supp b'-1:lemma:and'
359 +-0.000073 O b'-1:lemma:1'
360 +-0.000129 Med b'postag:CD'
361 +-0.000455 O b'-1:lemma:20'
362 +-0.000511 OD b'+1:postag:-LRB-'
363 +-0.000708 OD b'+1:postag:NNS'
364 +-0.000735 Gtype b'postag:CD'
365 +-0.000807 Gtype b'+1:postag:NNS'
366 +-0.000867 O b'+1:postag:CD'
367 +-0.000974 Gtype b'-1:postag:CD'
368 +-0.001057 O b'+1:lemma:~'
369 +-0.001061 O b'+1:lemma:fnr'
370 +-0.001528 Air b'+1:lemma:70'
371 +-0.001678 O b'-1:lemma:the'
372 +-0.001682 O b'postag:VBP'
373 +-0.002024 O b'+1:lemma:min'
374 +-0.002325 Phase b'-1:postag:NN'
375 +-0.002352 O b'lemma:n2'
376 +-0.002519 O b'lemma:\xce\xb4fur'
377 +-0.002662 Air b'lemma:,'
378 +-0.002662 Air b'postag:,'
379 +-0.005486 Gtype b'+1:lemma:2'
380 +-0.006088 O b'+1:lemma:o2'
381 +-0.006828 Air b'-1:lemma:and'
382 +-0.008457 Temp b'-1:lemma:\xc2\xb0c'
383 +-0.009847 O b'-1:lemma:,'
384 +-0.009847 O b'-1:postag:,'
385 +-0.010744 O b'+1:lemma:phase'
386 +-0.011644 O b'lemma:cell'
387 +-0.011902 Gtype b'-1:postag:NNP'
388 +-0.013075 OD b'+1:lemma:and'
389 +-0.013273 O b'-1:lemma:\xe2\x88\x86'
390 +-0.013803 O b'lemma:e.'
391 +-0.013807 O b'-1:lemma:sample'
392 +-0.013821 O b'+1:lemma:mm'
393 +-0.014710 O b'+1:lemma:rep1'
394 +-0.015201 Air b'+1:lemma:-lrb-'
395 +-0.015350 OD b'+1:postag:CD'
396 +-0.017836 O b'+1:lemma:grow'
397 +-0.019975 O b'lemma:anaerobic'
398 +-0.024268 Air b'+1:postag:CD'
399 +-0.025672 O b'lemma:medium'
400 +-0.025888 O b'+1:lemma:95'
401 +-0.026118 O b'lemma:k-12'
402 +-0.027112 O b'+1:lemma:25'
403 +-0.029447 Air b'postag:CC'
404 +-0.030441 O b'lemma:m63'
405 +-0.030598 OD b'+1:postag:CC'
406 +-0.032367 O b'lemma:grow'
407 +-0.035584 Air b'+1:postag:-LRB-'
408 +-0.039003 O b'lemma:mg/ml'
409 +-0.039003 O b'-1:lemma:150'
410 +-0.041109 O b'-1:lemma:e.'
411 +-0.043248 Supp b'-1:postag:NN'
412 +-0.049539 Gversion b'+1:postag:NN'
413 +-0.050421 O b'lemma:25'
414 +-0.053998 Temp b'postag:JJ'
415 +-0.060680 O b'-1:lemma:at'
416 +-0.060890 Phase b'-1:postag:JJ'
417 +-0.061474 O b'lemma:lb'
418 +-0.064297 Med b'-1:postag:NN'
419 +-0.065687 O b'-1:postag:VBN'
420 +-0.067931 O b'-1:lemma:o2'
421 +-0.068731 O b'+1:lemma:5'
422 +-0.069126 O b'+1:lemma:shake'
423 +-0.069173 O b'-1:lemma:25'
424 +-0.071503 Air b'-1:postag:CC'
425 +-0.075779 O b'+1:lemma:0.4'
426 +-0.077365 Air b'-1:lemma:or'
427 +-0.080349 O b'lemma:30'
428 +-0.087136 O b'-1:lemma:mm'
429 +-0.089014 pH b'postag:NN'
430 +-0.089177 O b'lemma:o2'
431 +-0.097371 Supp b'lemma:and'
432 +-0.101711 Gtype b'-1:postag:DT'
433 +-0.104337 O b'+1:lemma:antibody'
434 +-0.113298 O b'-1:lemma:um'
435 +-0.113298 O b'+1:lemma:paraquat'
436 +-0.118293 Phase b'+1:postag:NN'
437 +-0.119989 O b'-1:lemma:30'
438 +-0.126122 Phase b'-1:lemma:at'
439 +-0.135089 Supp b'postag:CC'
440 +-0.137129 O b'-1:postag:IN'
441 +-0.138660 O b'+1:lemma:arginine'
442 +-0.139143 O b'+1:lemma:until'
443 +-0.162860 O b'lemma:150'
444 +-0.162860 O b'+1:lemma:mg/ml'
445 +-0.164772 OD b'postag:JJ'
446 +-0.171975 Temp b'postag:NN'
447 +-0.183842 O b'lemma:phase'
448 +-0.184042 O b'+1:lemma:-rrb-'
449 +-0.185930 O b'-1:lemma:-lrb-'
450 +-0.185933 O b'-1:lemma:rifampicin'
451 +-0.195035 Med b'-1:postag:IN'
452 +-0.204004 Anti b'+1:postag:JJ'
453 +-0.210272 Gtype b'lemma:delta'
454 +-0.217084 Supp b'+1:postag:VBN'
455 +-0.218515 O b'lemma:co2'
456 +-0.219042 O b'-1:lemma:of'
457 +-0.220276 O b'+1:lemma:%'
458 +-0.228465 Med b'+1:postag:NN'
459 +-0.229909 O b'-1:lemma:n2'
460 +-0.230847 OD b'+1:postag:NN'
461 +-0.248221 O b'-1:lemma:ml'
462 +-0.250359 Anti b'+1:lemma:anti-fur'
463 +-0.279734 O b'+1:lemma:0.3'
464 +-0.283648 O b'-1:postag:-LRB-'
465 +-0.286892 O b'-1:lemma:mid-log'
466 +-0.312152 Gtype b'-1:postag:NN'
467 +-0.322232 O b'-1:lemma:0.1'
468 +-0.331861 O b'+1:postag:-RRB-'
469 +-0.336159 O b'-1:lemma:from'
470 +-0.340286 Med b'+1:postag:IN'
471 +-0.341633 O b'+1:postag:IN'
472 +-0.355453 O b'-1:lemma:cra'
473 +-0.366315 O b'+1:lemma:.'
474 +-0.366315 O b'+1:postag:.'
475 +-0.376920 O b'postag:RB'
476 +-0.405053 O b'lemma:\xe2\x88\x86'
477 +-0.424434 O b'+1:postag:NNS'
478 +-0.430071 O b'lemma:aerobically'
479 +-0.440727 Air b'postag:CD'
480 +-0.446825 O b'lemma:anaerobically'
481 +-0.447447 O b'-1:lemma:od600'
482 +-0.465924 O b'+1:lemma:cell'
483 +-0.468267 O b'lemma:od600'
484 +-0.472878 O b'lemma:dissolve'
485 +-0.502409 O b'lemma:20'
486 +-0.508433 O b'-1:lemma:~'
487 +-0.524904 Air b'-1:postag:JJ'
488 +-0.543671 O b'+1:lemma:fecl2'
489 +-0.554628 O b'-1:lemma:0.2'
490 +-0.556138 Temp b'+1:lemma:to'
491 +-0.556138 Temp b'+1:postag:TO'
492 +-0.589220 Supp b'+1:lemma:rifampicin'
493 +-0.610212 O b'lemma:0.3'
494 +-0.622831 O b'lemma:media'
495 +-0.657974 O b'-1:lemma:37'
496 +-0.677014 Supp b'-1:lemma:%'
497 +-0.678005 O b'+1:lemma:\xc2\xb0c'
498 +-0.707832 O b'lemma:of'
499 +-0.722898 O b'-1:lemma:rpob'
500 +-0.723244 O b'+1:lemma:+'
501 +-0.755790 OD b'lemma:-lrb-'
502 +-0.789730 O b'lemma:0.2'
503 +-0.814406 O b'-1:lemma:grow'
504 +-0.909402 O b'-1:postag::'
505 +-0.915529 O b'-1:postag:VBG'
506 +-0.926818 Technique b'-1:postag::'
507 +-0.933497 OD b'postag:-LRB-'
508 +-0.980631 O b'lemma:fecl2'
509 +-1.017727 O b'-1:lemma:dissolve'
510 +-1.017727 O b'+1:lemma:methanol'
511 +-1.020513 O b'lemma:2h'
512 +-1.020513 O b'-1:lemma:additional'
513 +-1.034839 O b'lemma:37'
514 +-1.121637 O b'lemma:0.1'
515 +-1.205775 O b'+1:lemma:supplement'
516 +-1.230075 Supp b'+1:lemma:acetate'
517 +-1.250120 O b'+1:postag:VBG'
518 +-1.256564 O b'lemma:wt'
519 +-1.276371 O b'+1:lemma:g/l'
520 +-1.304341 O b'-1:lemma:IP'
521 +-1.328183 O b'-1:lemma:co2'
522 +-1.391087 O b'-1:lemma:ompr'
523 +-1.452522 Anti b'postag:NNP'
524 +-1.489210 O b'+1:lemma:at'
525 +-1.603327 Air b'+1:postag:JJ'
526 +-1.726036 O b'lemma:mid-log'
527 +-1.737094 O b'lemma:rifampicin'
528 +-1.758957 O b'-1:lemma:nsrr'
529 +-1.762287 Supp b'postag:JJ'
530 +-1.770399 Air b'postag:NN'
531 +-1.810971 O b'+1:lemma:hour'
532 +-2.081711 Phase b'postag:JJ'
533 +-2.115674 O b'lemma:methanol'
534 +-2.200974 O b'+1:lemma:in'
535 +-2.263160 O b'+1:lemma:2'
536 +-2.388655 O b'-1:lemma:2'
537 +-2.590523 O b'+1:lemma:1'
538 +-4.545984 O b'-1:lemma::'
539 +-4.681648 O b'-1:lemma:_'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70_v4.txt
3 +
4 +best params:{'c1': 0.2964346107181682, 'c2': 0.033092970074011845}
5 +best CV score:0.7978231610325259
6 +model size: 0.07M
7 +
8 +Flat F1: 0.8182231777447608
9 + precision recall f1-score support
10 +
11 + OD 0.769 0.400 0.526 25
12 + pH 1.000 1.000 1.000 12
13 + Technique 1.000 0.909 0.952 22
14 + Med 0.897 0.912 0.904 57
15 + Temp 0.818 1.000 0.900 18
16 + Vess 0.000 0.000 0.000 0
17 + Agit 0.000 0.000 0.000 0
18 + Phase 1.000 0.789 0.882 19
19 + Air 0.763 0.763 0.763 59
20 + Anti 0.875 0.778 0.824 9
21 + Strain 1.000 1.000 1.000 1
22 + Gtype 0.810 0.833 0.821 102
23 + Substrain 0.000 0.000 0.000 1
24 + Supp 0.879 0.740 0.803 127
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.854 0.794 0.818 452
28 +
29 +
30 +Top likely transitions:
31 +OD -> OD 6.341340
32 +Agit -> Agit 5.796664
33 +Med -> Med 5.137165
34 +Temp -> Temp 4.995762
35 +Anti -> Anti 4.990782
36 +Gtype -> Gtype 4.240477
37 +Gversion -> Gversion 4.225898
38 +Supp -> Supp 4.204534
39 +Air -> Air 3.999870
40 +Phase -> Phase 3.766897
41 +Technique -> Technique 3.408533
42 +O -> O 3.070414
43 +pH -> pH 2.382635
44 +O -> Supp 1.706269
45 +Gtype -> Supp 1.701317
46 +Phase -> Supp 1.187792
47 +Air -> Temp 1.062230
48 +O -> Gtype 1.028035
49 +O -> Technique 0.914562
50 +Vess -> Temp 0.909296
51 +Substrain -> Gtype 0.750526
52 +Air -> Phase 0.620936
53 +Med -> O 0.613845
54 +Temp -> Agit 0.571012
55 +Air -> Med 0.331698
56 +O -> Anti 0.318145
57 +O -> Gversion 0.070972
58 +Med -> Supp 0.067887
59 +Technique -> Air 0.035349
60 +Air -> O 0.020752
61 +Supp -> O 0.013814
62 +Supp -> Technique -0.037404
63 +Vess -> O -0.060438
64 +Gtype -> Phase -0.068194
65 +O -> Phase -0.091450
66 +OD -> O -0.114114
67 +Supp -> Gtype -0.122317
68 +Gtype -> OD -0.173122
69 +Anti -> O -0.230490
70 +Gtype -> Med -0.240559
71 +O -> Air -0.241130
72 +O -> Temp -0.243070
73 +Technique -> pH -0.267697
74 +Phase -> OD -0.299766
75 +Supp -> Med -0.355333
76 +Gversion -> O -0.394835
77 +OD -> Med -0.450024
78 +Gtype -> Anti -0.546414
79 +O -> Agit -0.704801
80 +Agit -> O -0.801535
81 +
82 +
83 +Top unlikely transitions:
84 +Temp -> Temp 4.995762
85 +Anti -> Anti 4.990782
86 +Gtype -> Gtype 4.240477
87 +Gversion -> Gversion 4.225898
88 +Supp -> Supp 4.204534
89 +Air -> Air 3.999870
90 +Phase -> Phase 3.766897
91 +Technique -> Technique 3.408533
92 +O -> O 3.070414
93 +pH -> pH 2.382635
94 +O -> Supp 1.706269
95 +Gtype -> Supp 1.701317
96 +Phase -> Supp 1.187792
97 +Air -> Temp 1.062230
98 +O -> Gtype 1.028035
99 +O -> Technique 0.914562
100 +Vess -> Temp 0.909296
101 +Substrain -> Gtype 0.750526
102 +Air -> Phase 0.620936
103 +Med -> O 0.613845
104 +Temp -> Agit 0.571012
105 +Air -> Med 0.331698
106 +O -> Anti 0.318145
107 +O -> Gversion 0.070972
108 +Med -> Supp 0.067887
109 +Technique -> Air 0.035349
110 +Air -> O 0.020752
111 +Supp -> O 0.013814
112 +Supp -> Technique -0.037404
113 +Vess -> O -0.060438
114 +Gtype -> Phase -0.068194
115 +O -> Phase -0.091450
116 +OD -> O -0.114114
117 +Supp -> Gtype -0.122317
118 +Gtype -> OD -0.173122
119 +Anti -> O -0.230490
120 +Gtype -> Med -0.240559
121 +O -> Air -0.241130
122 +O -> Temp -0.243070
123 +Technique -> pH -0.267697
124 +Phase -> OD -0.299766
125 +Supp -> Med -0.355333
126 +Gversion -> O -0.394835
127 +OD -> Med -0.450024
128 +Gtype -> Anti -0.546414
129 +O -> Agit -0.704801
130 +Agit -> O -0.801535
131 +Gtype -> O -0.877514
132 +Substrain -> O -0.942951
133 +Technique -> O -1.070076
134 +
135 +
136 +Top positive:
137 +5.560165 Technique b'lemma[:2]:Ch'
138 +3.867275 O b'lemma[:2]:re'
139 +3.624712 Air b'lemma:anaerobic'
140 +3.330563 Phase b'lemma:mid-log'
141 +3.251308 O b'-1:lemma:tag'
142 +3.248997 O b'+1:lemma:m63'
143 +3.228929 Gtype b'-1:lemma:\xe2\x88\x86'
144 +3.210040 O b'lemma:1'
145 +3.210040 O b'lemma[:2]:1'
146 +3.159131 OD b'lemma[:2]:od'
147 +3.135910 Gtype b'+1:lemma:flagtag'
148 +3.047581 Supp b'+1:lemma:\xc2\xb5m'
149 +2.949460 Med b'+1:lemma:0.4'
150 +2.916942 Air b'lemma:aerobic'
151 +2.901664 Gtype b'lemma[:1]:\xce\xb4'
152 +2.895390 Gtype b'lemma:arca8myc'
153 +2.891054 O b'+1:lemma:od600'
154 +2.884994 Gtype b'lemma[:2]:cr'
155 +2.868477 Phase b'lemma:stationary'
156 +2.758780 Supp b'lemma:Iron'
157 +2.758780 Supp b'lemma[:2]:Ir'
158 +2.751464 Supp b'+1:lemma:1'
159 +2.749032 Air b'lemma[:1]:a'
160 +2.739001 Air b'-1:lemma:ChIP-Seq'
161 +2.637207 Supp b'+1:lemma:2'
162 +2.611400 Anti b'+1:lemma:antibody'
163 +2.538029 Substrain b'lemma:mg1655'
164 +2.532452 O b'-1:lemma:ChIP-exo'
165 +2.530749 Gversion b'-1:lemma:nc'
166 +2.522114 O b'lemma:rpob'
167 +2.497323 O b'lemma:_'
168 +2.497323 O b'lemma[:1]:_'
169 +2.497323 O b'lemma[:2]:_'
170 +2.467576 O b'lemma:2'
171 +2.467576 O b'lemma[:2]:2'
172 +2.451387 Technique b'lemma[:2]:rn'
173 +2.450652 O b'lemma:c-terminal'
174 +2.450652 O b'+1:lemma:flag-tag'
175 +2.450652 O b'lemma[:2]:c-'
176 +2.442475 Gtype b'lemma[:2]:de'
177 +2.418091 O b'lemma:3'
178 +2.418091 O b'lemma[:2]:3'
179 +2.408360 OD b'+1:lemma:stationary'
180 +2.385694 Temp b'-1:lemma:sample'
181 +2.372903 Gtype b'-1:lemma:rpob'
182 +2.354163 Gtype b'lemma:type'
183 +2.354163 Gtype b'lemma[:2]:ty'
184 +2.346327 O b'lemma[:2]:ge'
185 +2.344928 Technique b'lemma:chipseq'
186 +2.307523 Gtype b'lemma[:1]:w'
187 +2.303430 Med b'lemma[:1]:L'
188 +2.301957 Technique b'lemma[:1]:C'
189 +2.289093 Supp b'lemma:arginine'
190 +2.273052 O b'-1:lemma:Aerobic'
191 +2.264869 Med b'+1:lemma:2.0'
192 +2.254065 O b'-1:lemma:anaerobic'
193 +2.249649 Gtype b'lemma[:1]:W'
194 +2.162423 Anti b'lemma[:2]:an'
195 +2.160242 Strain b'lemma:k-12'
196 +2.160242 Strain b'lemma[:2]:k-'
197 +2.144723 Supp b'lemma:acetate'
198 +2.129184 Gtype b'lemma:flag-tag'
199 +2.129184 Gtype b'-1:lemma:c-terminal'
200 +2.111214 Air b'lemma[:1]:A'
201 +2.098945 Gtype b'lemma:nsrr'
202 +2.098945 Gtype b'lemma[:2]:ns'
203 +2.095022 Supp b'-1:lemma:supplement'
204 +2.082819 O b'-1:lemma:fructose'
205 +2.076750 pH b'lemma:ph5'
206 +2.076750 pH b'+1:lemma:.5'
207 +2.072261 Temp b'-1:lemma:\xcf\x8332'
208 +2.065755 O b'lemma:b'
209 +2.065755 O b'lemma[:2]:b'
210 +2.065419 Gtype b'+1:lemma:type'
211 +2.051267 Air b'lemma[:2]:an'
212 +2.036209 Technique b'lemma[:2]:ch'
213 +2.031416 O b'+1:lemma:pq'
214 +2.016412 Temp b'lemma[:1]:3'
215 +2.004704 Supp b'+1:lemma:mid-log'
216 +1.984448 O b'-1:lemma:0.3-0.35'
217 +1.967808 Supp b'lemma[:2]:0.'
218 +1.947363 Supp b'lemma:nitrate'
219 +1.947363 Supp b'lemma[:2]:ni'
220 +1.937420 Gversion b'lemma:chip-seq'
221 +1.913900 Gversion b'lemma:nc'
222 +1.913900 Gversion b'lemma[:2]:nc'
223 +1.912878 O b'lemma:0.4'
224 +1.907802 Strain b'+1:lemma:substr'
225 +1.897924 Air b'lemma[:2]:ae'
226 +1.871109 Phase b'-1:lemma:mid-log'
227 +1.845125 Air b'lemma:Aerobic'
228 +1.845125 Air b'lemma[:2]:Ae'
229 +1.827855 Supp b'-1:lemma:Cra'
230 +1.824332 Supp b'lemma:Fe'
231 +1.824332 Supp b'lemma[:2]:Fe'
232 +1.824175 Supp b'lemma:pq'
233 +1.824175 Supp b'lemma[:2]:pq'
234 +1.809404 Supp b'+1:lemma:_'
235 +1.808766 OD b'lemma[:1]:o'
236 +1.806302 Med b'isUpper'
237 +1.798647 Gtype b'lemma[:2]:fl'
238 +1.798534 Supp b'lemma[:2]:gl'
239 +1.795820 O b'lemma:Custom'
240 +1.795820 O b'lemma[:2]:Cu'
241 +1.790374 Air b'+1:lemma:37'
242 +1.790074 Gversion b'lemma[:2]:00'
243 +1.785485 Supp b'lemma[:2]:fe'
244 +1.780403 O b'+1:postag:RB'
245 +1.756454 Supp b'lemma:iptg'
246 +1.735997 Supp b'lemma[:1]:I'
247 +1.733781 Gtype b'lemma:fnr8myc'
248 +1.728269 Gversion b'lemma:asm584v2'
249 +1.728269 Gversion b'lemma[:2]:as'
250 +1.720876 Supp b'+1:lemma:hour'
251 +1.715461 Temp b'-1:lemma:37'
252 +1.694020 Substrain b'lemma[:2]:mg'
253 +1.689985 Strain b'lemma[:1]:k'
254 +1.673566 O b'postag::'
255 +1.673566 O b'postag[:1]::'
256 +1.673566 O b'postag[:2]::'
257 +1.669711 Supp b'lemma[:1]:2'
258 +1.661627 O b'+1:postag:VBD'
259 +1.659863 Vess b'lemma:flask'
260 +1.659863 Vess b'-1:lemma:warm'
261 +1.656423 Gtype b'lemma[:2]:ar'
262 +1.655250 Supp b'lemma[:1]:1'
263 +1.654055 Med b'lemma[:1]:m'
264 +1.646225 Med b'lemma:MOPS'
265 +1.646225 Med b'lemma[:1]:M'
266 +1.646225 Med b'lemma[:2]:MO'
267 +1.643933 Vess b'lemma[:2]:fl'
268 +1.636408 O b'-1:lemma:medium'
269 +1.620415 O b'postag[:1]:V'
270 +1.620415 O b'postag[:2]:VB'
271 +1.611470 Supp b'lemma:fructose'
272 +1.588047 Temp b'+1:lemma:\xc2\xb0c'
273 +1.571785 Technique b'symb'
274 +1.567665 Supp b'lemma:dpd'
275 +1.567665 Supp b'lemma[:2]:dp'
276 +1.561904 Gtype b'+1:lemma:_'
277 +1.558404 O b'+1:lemma:grow'
278 +1.556080 Technique b'-1:lemma:input'
279 +1.531102 Gtype b'+1:lemma::'
280 +1.516656 O b'isNumber'
281 +1.513499 Technique b'+1:lemma:chip-exo'
282 +1.484457 pH b'+1:postag:CD'
283 +1.479000 Gtype b'lemma[:2]:wi'
284 +1.463363 O b'+1:lemma:sparging'
285 +1.455056 Med b'+1:lemma:supplement'
286 +1.451942 Anti b'-1:lemma::'
287 +1.445199 Gtype b'+1:postag::'
288 +1.418029 Gtype b'-1:lemma:_'
289 +1.414884 Gtype b'hGreek'
290 +1.409332 Supp b'lemma[:2]:ac'
291 +1.384832 Med b'lemma:lb'
292 +1.384832 Med b'lemma[:2]:lb'
293 +1.381767 Gtype b'lemma:wt'
294 +1.381767 Gtype b'lemma[:2]:wt'
295 +1.376804 O b'isLower'
296 +1.370539 O b'-1:lemma:wt'
297 +1.369037 Supp b'-1:lemma:media'
298 +1.368164 O b'lemma:chip'
299 +1.362593 Temp b'-1:lemma:43'
300 +1.357457 O b'lemma:.'
301 +1.357457 O b'postag:.'
302 +1.357457 O b'postag[:1]:.'
303 +1.357457 O b'postag[:2]:.'
304 +1.357457 O b'lemma[:2]:.'
305 +1.343506 Gtype b'-1:lemma:vector'
306 +1.332982 Gtype b'lemma:pk4854'
307 +1.332982 Gtype b'lemma[:2]:pk'
308 +1.331836 O b'-1:lemma:antibody'
309 +1.322840 Gtype b'lemma[:1]:f'
310 +1.319451 Gtype b'-1:lemma:dna'
311 +1.319338 Med b'+1:lemma:g/l'
312 +1.314272 O b'lemma:\xcf\x8332'
313 +1.314272 O b'lemma[:1]:\xcf\x83'
314 +1.314272 O b'lemma[:2]:\xcf\x833'
315 +1.313977 Technique b'-1:lemma:chip-exo'
316 +1.307181 O b'-1:lemma:mg/ml'
317 +1.305428 Supp b'-1:lemma:+'
318 +1.296639 O b'lemma:purr'
319 +1.293394 Supp b'-1:lemma:0.2'
320 +1.292494 O b'-1:lemma:l1'
321 +1.282743 Supp b'lemma[:2]:fr'
322 +1.274232 Supp b'lemma:rifampicin'
323 +1.268226 Air b'+1:lemma:L'
324 +1.266974 Supp b'lemma:nh4cl'
325 +1.266974 Supp b'lemma[:2]:nh'
326 +1.264907 Supp b'lemma[:2]:ri'
327 +1.262307 Supp b'-1:postag:CD'
328 +1.260796 O b'+1:lemma:acetate'
329 +1.259575 OD b'lemma[:2]:0.'
330 +1.259142 Technique b'-1:lemma:_'
331 +1.246498 Gversion b'postag:NN'
332 +1.244033 Gtype b'symb'
333 +1.240305 Supp b'lemma[:2]:ip'
334 +1.236703 Temp b'lemma:43'
335 +1.236703 Temp b'lemma[:2]:43'
336 +1.228007 Gtype b'+1:lemma:control'
337 +
338 +
339 +Top negative:
340 +-0.001047 O b'+1:lemma:0.4'
341 +-0.001061 Supp b'postag[:1]:C'
342 +-0.001343 O b'lemma[:2]:co'
343 +-0.001801 Phase b'+1:postag:NN'
344 +-0.001816 Supp b'lemma[:1]:r'
345 +-0.003273 Supp b'+1:lemma:-rrb-'
346 +-0.004514 Agit b'isUpper'
347 +-0.005651 Supp b'+1:postag:-RRB-'
348 +-0.007968 O b'lemma[:2]:0.'
349 +-0.008508 Air b'isUpper'
350 +-0.008563 Supp b'-1:postag::'
351 +-0.008695 Air b'postag:-RRB-'
352 +-0.008695 Air b'postag[:2]:-R'
353 +-0.008695 Air b'lemma[:2]:-r'
354 +-0.009571 OD b'hUpper'
355 +-0.009571 OD b'hLower'
356 +-0.011349 Supp b'hUpper'
357 +-0.011349 Supp b'hLower'
358 +-0.011606 O b'-1:postag:JJ'
359 +-0.011683 pH b'postag[:1]:N'
360 +-0.011683 pH b'postag[:2]:NN'
361 +-0.012286 Supp b'isNumber'
362 +-0.014704 Air b'lemma:-rrb-'
363 +-0.017854 O b'-1:lemma:control'
364 +-0.021700 Med b'postag:NN'
365 +-0.024044 O b'lemma:sample'
366 +-0.026147 Supp b'postag:CD'
367 +-0.026147 Supp b'postag[:2]:CD'
368 +-0.028829 Air b'-1:postag:RB'
369 +-0.029219 Gtype b'lemma[:1]:n'
370 +-0.031150 Agit b'symb'
371 +-0.031420 O b'-1:postag:VBN'
372 +-0.031634 Air b'lemma[:1]:n'
373 +-0.032172 O b'+1:lemma:rep1'
374 +-0.032713 O b'+1:postag:JJ'
375 +-0.033674 OD b'postag:CD'
376 +-0.033674 OD b'postag[:2]:CD'
377 +-0.033755 OD b'postag[:1]:N'
378 +-0.033755 OD b'postag[:2]:NN'
379 +-0.035292 Air b'+1:lemma:anaerobically'
380 +-0.038458 Temp b'postag:NN'
381 +-0.040319 Supp b'-1:lemma:%'
382 +-0.041803 Temp b'hGreek'
383 +-0.050279 Med b'-1:lemma:lb'
384 +-0.052295 O b'-1:lemma:ml'
385 +-0.053032 O b'+1:lemma:sample'
386 +-0.061146 Gtype b'lemma[:1]:m'
387 +-0.063531 Vess b'hUpper'
388 +-0.063531 Vess b'hLower'
389 +-0.067513 Supp b'isLower'
390 +-0.080308 Temp b'postag[:1]:N'
391 +-0.080308 Temp b'postag[:2]:NN'
392 +-0.082384 Supp b'isUpper'
393 +-0.086496 Supp b'+1:lemma:acetate'
394 +-0.089008 O b'lemma[:2]:30'
395 +-0.093208 O b'lemma[:2]:fe'
396 +-0.093816 O b'lemma[:1]:m'
397 +-0.096992 O b'lemma[:1]:d'
398 +-0.099835 Gtype b'+1:lemma:-rrb-'
399 +-0.103697 O b'lemma[:2]:od'
400 +-0.106617 Anti b'symb'
401 +-0.111217 Phase b'postag:JJ'
402 +-0.111217 Phase b'postag[:1]:J'
403 +-0.111217 Phase b'postag[:2]:JJ'
404 +-0.115123 Gversion b'+1:postag:NN'
405 +-0.120980 Air b'-1:postag:-RRB-'
406 +-0.124184 Gtype b'lemma[:2]:rp'
407 +-0.124385 Air b'-1:lemma:-rrb-'
408 +-0.134523 O b'lemma:phase'
409 +-0.137832 Air b'isLower'
410 +-0.138018 O b'-1:postag:RB'
411 +-0.142066 Anti b'+1:lemma:anti-fur'
412 +-0.154765 O b'+1:lemma:hour'
413 +-0.162942 O b'lemma[:1]:n'
414 +-0.164561 O b'lemma:20'
415 +-0.167133 OD b'postag[:1]:C'
416 +-0.167374 O b'lemma:2h'
417 +-0.167374 O b'-1:lemma:additional'
418 +-0.167374 O b'lemma[:2]:2h'
419 +-0.168348 OD b'isLower'
420 +-0.175723 O b'-1:lemma:stir'
421 +-0.176493 O b'+1:lemma:0.3'
422 +-0.176731 O b'+1:lemma:culture'
423 +-0.176925 O b'-1:lemma:dna'
424 +-0.178939 O b'+1:lemma:delta'
425 +-0.190332 Air b'symb'
426 +-0.197832 Anti b'+1:postag:JJ'
427 +-0.203484 Supp b'lemma[:2]:mi'
428 +-0.206368 Air b'postag[:1]:N'
429 +-0.206368 Air b'postag[:2]:NN'
430 +-0.207030 Gtype b'-1:lemma:,'
431 +-0.207030 Gtype b'-1:postag:,'
432 +-0.209582 O b'-1:lemma:IP'
433 +-0.213265 Gtype b'+1:lemma:\xe2\x88\x86'
434 +-0.215249 Gtype b'-1:lemma:mg1655'
435 +-0.217685 O b'lemma:fecl2'
436 +-0.220159 O b'+1:lemma:-rrb-'
437 +-0.220935 Gtype b'-1:postag:NN'
438 +-0.227404 Technique b'postag:NN'
439 +-0.230401 Technique b'-1:lemma::'
440 +-0.232009 Air b'lemma:aerobically'
441 +-0.236029 O b'+1:lemma:phase'
442 +-0.238310 O b'lemma:wt'
443 +-0.238310 O b'lemma[:2]:wt'
444 +-0.243118 Temp b'-1:postag:NN'
445 +-0.244383 O b'lemma[:2]:ph'
446 +-0.246532 O b'-1:lemma:-lrb-'
447 +-0.249897 O b'-1:lemma:37'
448 +-0.254341 O b'lemma[:1]:L'
449 +-0.257191 Gtype b'-1:postag:SYM'
450 +-0.263503 O b'lemma[:2]:di'
451 +-0.264603 O b'lemma[:1]:w'
452 +-0.264928 O b'-1:lemma:grow'
453 +-0.265388 O b'+1:lemma:antibody'
454 +-0.273738 O b'lemma[:2]:ce'
455 +-0.277706 O b'lemma[:1]:k'
456 +-0.279240 O b'lemma[:1]:4'
457 +-0.290259 Supp b'postag:JJ'
458 +-0.290259 Supp b'postag[:1]:J'
459 +-0.290259 Supp b'postag[:2]:JJ'
460 +-0.292361 O b'-1:lemma:rna'
461 +-0.304576 Technique b'isUpper'
462 +-0.307541 Technique b'isLower'
463 +-0.310027 O b'lemma[:2]:me'
464 +-0.339328 O b'-1:lemma:ompr'
465 +-0.339987 O b'-1:postag:-LRB-'
466 +-0.345773 O b'lemma:150'
467 +-0.345773 O b'-1:lemma:concentration'
468 +-0.345773 O b'+1:lemma:mg/ml'
469 +-0.345773 O b'lemma[:2]:15'
470 +-0.347446 Med b'-1:postag:NN'
471 +-0.348906 Supp b'lemma:2'
472 +-0.348906 Supp b'lemma[:2]:2'
473 +-0.351429 Air b'+1:lemma:-lrb-'
474 +-0.352484 Air b'+1:postag:-LRB-'
475 +-0.353556 O b'-1:lemma:cra'
476 +-0.356329 Supp b'+1:lemma:dpd'
477 +-0.369302 Agit b'hUpper'
478 +-0.369302 Agit b'hLower'
479 +-0.381952 Technique b'-1:postag::'
480 +-0.384746 O b'-1:lemma:rifampicin'
481 +-0.399014 O b'lemma:cell'
482 +-0.402687 O b'+1:lemma:dissolve'
483 +-0.409688 Gtype b'isNumber'
484 +-0.436191 O b'lemma[:2]:gl'
485 +-0.440734 O b'+1:lemma:cell'
486 +-0.442622 O b'+1:postag:NNS'
487 +-0.452887 O b'lemma[:1]:p'
488 +-0.455834 O b'+1:lemma:.'
489 +-0.455834 O b'+1:postag:.'
490 +-0.459545 O b'lemma[:2]:mg'
491 +-0.481182 Air b'-1:postag:JJ'
492 +-0.482835 OD b'+1:postag:NN'
493 +-0.494269 Air b'postag:NN'
494 +-0.509176 O b'+1:postag:-RRB-'
495 +-0.510299 Anti b'postag:NNP'
496 +-0.510777 Air b'+1:postag:RB'
497 +-0.521353 Gtype b'lemma[:1]:g'
498 +-0.528030 O b'lemma:dissolve'
499 +-0.528030 O b'+1:lemma:methanol'
500 +-0.544300 Med b'-1:postag:VBN'
501 +-0.544878 O b'lemma:mid-log'
502 +-0.558725 Med b'+1:postag:NN'
503 +-0.565193 O b'lemma[:1]:0'
504 +-0.574181 OD b'+1:postag:CD'
505 +-0.594752 Supp b'-1:lemma:glucose'
506 +-0.616524 O b'-1:lemma:sample'
507 +-0.632529 Phase b'hUpper'
508 +-0.632529 Phase b'hLower'
509 +-0.635261 Gtype b'lemma:delta'
510 +-0.674300 O b'lemma:methanol'
511 +-0.674300 O b'-1:lemma:dissolve'
512 +-0.705737 Med b'-1:postag:CD'
513 +-0.728901 O b'+1:lemma:supplement'
514 +-0.740907 O b'lemma[:2]:ri'
515 +-0.792014 Phase b'-1:postag:NN'
516 +-0.804108 O b'lemma[:1]:A'
517 +-0.910559 OD b'isNumber'
518 +-0.914108 O b'+1:lemma:g/l'
519 +-0.932717 O b'lemma:rifampicin'
520 +-0.954851 O b'-1:lemma:supplement'
521 +-0.966998 Gtype b'lemma[:1]:a'
522 +-0.969405 Supp b'lemma[:1]:c'
523 +-1.019136 O b'lemma[:1]:\xce\xb4'
524 +-1.110821 Med b'symb'
525 +-1.170611 Gversion b'isLower'
526 +-1.220694 Gtype b'lemma[:1]:c'
527 +-1.221441 Gtype b'isUpper'
528 +-1.282188 O b'+1:postag:VBG'
529 +-1.331537 OD b'postag[:1]:-'
530 +-1.377314 Supp b'symb'
531 +-1.414732 Supp b'hGreek'
532 +-1.450462 OD b'lemma[:1]:-'
533 +-1.661998 O b'-1:lemma:2'
534 +-1.785964 O b'+1:lemma:2'
535 +-1.952351 O b'-1:postag::'
536 +-1.971208 O b'+1:lemma:1'
537 +-2.697878 O b'-1:lemma::'
538 +-2.771750 O b'-1:postag:VBG'
539 +-4.091719 O b'-1:lemma:_'
540 +