Estefani Gaytan Nunez

upload

Showing 24 changed files with 4282 additions and 0 deletions
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.20934163148847484, 'c2': 5.972574594521125e-05}
5 +best CV score:0.8647885676297223
6 +model size: 0.05M
7 +
8 +Flat F1: 0.7568876974674016
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 0.955 0.913 0.933 23
14 + Med 1.000 0.925 0.961 53
15 + Temp 1.000 0.690 0.816 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.882 1.000 0.938 15
19 + Air 0.543 0.362 0.435 69
20 + Anti 0.786 1.000 0.880 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.863 0.812 0.836 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.607 0.784 0.684 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.774 0.758 0.757 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 6.372271
32 +Agit -> Agit 6.336087
33 +Med -> Med 5.775362
34 +Supp -> Supp 5.599916
35 +OD -> OD 5.514939
36 +Anti -> Anti 5.400862
37 +Phase -> Phase 5.214849
38 +Air -> Air 4.870530
39 +Gtype -> Gtype 4.826805
40 +O -> O 4.601602
41 +Gversion -> Gversion 4.343416
42 +Technique -> Technique 4.055366
43 +pH -> pH 3.102886
44 +Substrain -> Gtype 1.934887
45 +Air -> O 1.597978
46 +O -> Supp 1.582569
47 +Gtype -> Supp 1.508336
48 +O -> Technique 1.406325
49 +O -> Gtype 1.205559
50 +O -> Temp 0.833133
51 +Supp -> O 0.751090
52 +O -> Phase 0.676762
53 +Temp -> O 0.670412
54 +Gtype -> Air 0.616010
55 +Med -> O 0.606543
56 +O -> Anti 0.311020
57 +Technique -> Air 0.225375
58 +OD -> O 0.192356
59 +O -> Med 0.168459
60 +Phase -> O 0.168068
61 +O -> Gversion 0.143784
62 +O -> pH 0.078839
63 +O -> OD 0.075883
64 +Strain -> O 0.069008
65 +Gtype -> Med -0.229150
66 +Gtype -> O -0.660221
67 +O -> Air -0.746238
68 +Substrain -> O -0.760215
69 +Phase -> OD -0.885028
70 +Technique -> Gtype -1.154393
71 +Med -> Supp -1.787447
72 +
73 +
74 +Top unlikely transitions:
75 +Temp -> Temp 6.372271
76 +Agit -> Agit 6.336087
77 +Med -> Med 5.775362
78 +Supp -> Supp 5.599916
79 +OD -> OD 5.514939
80 +Anti -> Anti 5.400862
81 +Phase -> Phase 5.214849
82 +Air -> Air 4.870530
83 +Gtype -> Gtype 4.826805
84 +O -> O 4.601602
85 +Gversion -> Gversion 4.343416
86 +Technique -> Technique 4.055366
87 +pH -> pH 3.102886
88 +Substrain -> Gtype 1.934887
89 +Air -> O 1.597978
90 +O -> Supp 1.582569
91 +Gtype -> Supp 1.508336
92 +O -> Technique 1.406325
93 +O -> Gtype 1.205559
94 +O -> Temp 0.833133
95 +Supp -> O 0.751090
96 +O -> Phase 0.676762
97 +Temp -> O 0.670412
98 +Gtype -> Air 0.616010
99 +Med -> O 0.606543
100 +O -> Anti 0.311020
101 +Technique -> Air 0.225375
102 +OD -> O 0.192356
103 +O -> Med 0.168459
104 +Phase -> O 0.168068
105 +O -> Gversion 0.143784
106 +O -> pH 0.078839
107 +O -> OD 0.075883
108 +Strain -> O 0.069008
109 +Gtype -> Med -0.229150
110 +Gtype -> O -0.660221
111 +O -> Air -0.746238
112 +Substrain -> O -0.760215
113 +Phase -> OD -0.885028
114 +Technique -> Gtype -1.154393
115 +Med -> Supp -1.787447
116 +
117 +
118 +Top positive:
119 +10.190054 Supp b'lemma:Iron'
120 +9.112733 O b'lemma:_'
121 +7.880021 Air b'lemma:anaerobic'
122 +7.549281 Technique b'lemma:ChIP-exo'
123 +7.465093 Strain b'+1:lemma:substr'
124 +7.422565 Med b'lemma:MOPS'
125 +7.415432 Phase b'lemma:exponential'
126 +7.415432 Phase b'lemma:stationary'
127 +7.398082 Air b'-1:lemma:ChIP-Seq'
128 +7.166683 O b'lemma:2'
129 +7.146221 O b'lemma:-'
130 +7.132564 Supp b'lemma:pq'
131 +6.977885 Air b'lemma:aerobic'
132 +6.744626 Gtype b'lemma:wt'
133 +6.631092 Phase b'lemma:mid-log'
134 +6.560555 O b'lemma:1'
135 +6.447344 Technique b'lemma:chipseq'
136 +6.389162 Gtype b'lemma:\xce\xb4cra'
137 +6.371591 Gversion b'lemma:asm584v2'
138 +6.280253 Supp b'+1:lemma:\xc2\xb5m'
139 +6.224426 O b'lemma:3'
140 +6.224046 O b'-1:lemma:tag'
141 +6.202217 Air b'lemma:Aerobic'
142 +6.046396 O b'lemma:rpob'
143 +5.857469 Gversion b'lemma:nc'
144 +5.848042 O b'lemma:Custom'
145 +5.738285 Gtype b'lemma:wild-type'
146 +5.702974 O b'lemma:Cra'
147 +5.644004 O b'lemma:b'
148 +5.533358 Gtype b'lemma:flag-tag'
149 +5.533358 Gtype b'-1:lemma:c-terminal'
150 +5.521522 O b'lemma:a'
151 +5.328505 Gtype b'lemma:type'
152 +5.276884 Supp b'lemma:nacl'
153 +5.252181 O b'-1:lemma:ChIP-exo'
154 +5.206802 Gtype b'+1:lemma:ph5'
155 +5.150774 Technique b'lemma:ChIP-Seq'
156 +5.068213 O b'postag:IN'
157 +5.035817 O b'lemma:rep1'
158 +4.983308 Vess b'lemma:flask'
159 +4.983308 Vess b'-1:lemma:warm'
160 +4.943797 O b'lemma:rep3'
161 +4.854929 Supp b'-1:lemma:Cra'
162 +4.845807 Gtype b'lemma:\xe2\x88\x86'
163 +4.794055 Gtype b'+1:lemma:type'
164 +4.645526 Gtype b'lemma:arca8myc'
165 +4.620794 Gtype b'lemma:\xce\xb4fur'
166 +4.559495 O b'lemma:\xcf\x8332'
167 +4.549068 O b'-1:lemma:type'
168 +4.420072 Supp b'lemma:rifampicin'
169 +4.386336 Technique b'lemma:rna-seq'
170 +4.375116 Supp b'lemma:acetate'
171 +4.337376 O b'lemma:ompr'
172 +4.249331 O b'postag:CC'
173 +4.232009 Supp b'lemma:nitrate'
174 +4.189729 Supp b'lemma:glucose'
175 +4.169856 Substrain b'lemma:mg1655'
176 +4.152149 Gtype b'lemma:dfnr'
177 +4.143746 Supp b'lemma:nh4cl'
178 +4.056334 O b'+1:lemma:pq'
179 +4.052385 Gtype b'lemma:pk4854'
180 +4.037199 Gtype b'lemma:fnr8myc'
181 +3.985929 Anti b'lemma:none'
182 +3.957986 Gtype b'lemma:delta-fnr'
183 +3.954172 Med b'lemma:lb'
184 +3.939606 Supp b'-1:lemma:+'
185 +3.935266 Technique b'lemma:chip-seq'
186 +3.929436 O b'postag:VBN'
187 +3.908650 Med b'-1:lemma:ml'
188 +3.884593 Gversion b'-1:lemma:nc'
189 +3.878464 Anti b'lemma:\xcf\x8332'
190 +3.863072 O b'lemma:.'
191 +3.863072 O b'postag:.'
192 +3.860626 O b'lemma:chip'
193 +3.854556 Technique b'lemma:rnaseq'
194 +3.854116 O b'postag::'
195 +3.783932 Agit b'+1:lemma:rpm'
196 +3.781020 Gtype b'lemma:\xce\xb4ompr'
197 +3.721025 Air b'-1:lemma:-'
198 +3.718911 Agit b'lemma:rpm'
199 +3.658315 O b'-1:lemma:glucose'
200 +3.650822 Gtype b'-1:lemma:\xe2\x88\x86'
201 +3.626592 Gtype b'lemma:nsrr'
202 +3.625435 Supp b'-1:lemma:with'
203 +3.621959 Med b'+1:lemma:0.4'
204 +3.534472 Gtype b'lemma:\xce\xb4soxr'
205 +3.534031 Supp b'lemma:no3'
206 +3.532340 O b'-1:lemma:0.3'
207 +3.529130 Temp b'-1:lemma:sample'
208 +3.523066 O b'lemma:with'
209 +3.479777 OD b'lemma:od450'
210 +3.451557 Gtype b'lemma:delta-arca'
211 +3.421135 Anti b'lemma:seqa'
212 +3.408004 O b'lemma:rep2'
213 +3.394045 O b'-1:lemma:Aerobic'
214 +3.391719 Gversion b'lemma:chip-seq'
215 +3.391021 Supp b'lemma:dpd'
216 +3.352795 O b'-1:lemma:lb'
217 +3.334642 Technique b'-1:lemma:IP'
218 +3.303675 O b'-1:lemma:0.3-0.35'
219 +3.272998 Air b'lemma:anaerobically'
220 +3.245394 Supp b'lemma:Leu'
221 +3.235692 Supp b'lemma:Fe'
222 +3.223774 Technique b'-1:lemma:chip-exo'
223 +3.209091 Air b'lemma:aerobically'
224 +3.198219 O b'postag:VBG'
225 +3.170084 O b'lemma:harbor'
226 +3.168019 Temp b'-1:lemma:43'
227 +3.163951 Supp b'lemma:arginine'
228 +3.160003 Gtype b'+1:lemma:pq'
229 +3.153015 Anti b'+1:lemma:antibody'
230 +3.115121 Strain b'lemma:k-12'
231 +3.074034 Technique b'+1:lemma:chip-exo'
232 +3.071794 Air b'postag:RB'
233 +3.069425 Substrain b'lemma:mg1655star'
234 +3.062133 Supp b'lemma:Adenine'
235 +3.060892 Air b'lemma:anaeroibc'
236 +3.056193 Temp b'-1:lemma:\xcf\x8332'
237 +3.042361 Supp b'+1:lemma:mm'
238 +3.019482 Gtype b'-1:lemma:ptac'
239 +2.998564 O b'-1:lemma:into'
240 +2.994519 pH b'lemma:5.5'
241 +2.973677 Gversion b'lemma:.2'
242 +2.973677 Gversion b'-1:lemma:u00096'
243 +2.973445 O b'-1:lemma:\xc2\xb0c'
244 +2.958977 Med b'lemma:media'
245 +2.930356 O b'+1:lemma:chip-seq'
246 +2.896285 Gtype b'+1:lemma:with'
247 +2.895621 Gtype b'-1:lemma:rpob'
248 +2.894434 Gtype b'+1:lemma:flagtag'
249 +2.893824 O b'lemma:CEL'
250 +2.871221 O b'+1:lemma:arca-8myc'
251 +2.854072 Temp b'+1:lemma:in'
252 +2.819779 Gtype b'-1:lemma:delta'
253 +2.816913 O b'+1:lemma:250'
254 +2.810671 O b'+1:lemma:od600'
255 +2.799953 O b'+1:lemma:mid-log'
256 +2.791088 Supp b'-1:lemma:vol'
257 +2.782754 Gversion b'lemma:u00096'
258 +2.782754 Gversion b'+1:lemma:.2'
259 +2.762169 O b'lemma:ml'
260 +2.714953 O b'+1:postag:NNP'
261 +2.698964 Gtype b'lemma:\xce\xb4oxyr'
262 +2.673462 Gtype b'-1:postag:VBG'
263 +2.672861 Med b'lemma:L'
264 +2.672861 Med b'+1:lemma:broth'
265 +2.665253 O b'+1:lemma:acetate'
266 +2.647014 Phase b'-1:lemma:mid-log'
267 +2.641349 Med b'lemma:m63'
268 +2.626973 Substrain b'+1:lemma:phtpg'
269 +2.622910 Gversion b'lemma:000913'
270 +2.605384 O b'lemma::'
271 +2.594999 pH b'+1:postag:CD'
272 +2.538219 O b'lemma:s'
273 +2.537207 Med b'lemma:broth'
274 +2.537207 Med b'-1:lemma:L'
275 +2.527821 pH b'lemma:ph5'
276 +2.527821 pH b'+1:lemma:.5'
277 +2.515586 O b'+1:lemma:coli'
278 +2.509864 Supp b'+1:lemma:1'
279 +2.508452 Med b'-1:lemma:fresh'
280 +2.494059 Supp b'lemma:fructose'
281 +2.475120 Temp b'lemma:43'
282 +2.463892 O b'-1:lemma:rpm'
283 +2.434009 Anti b'lemma:anti-myc'
284 +2.426928 O b'+1:postag:RB'
285 +2.422956 Gtype b'+1:lemma:_'
286 +2.396151 Med b'+1:lemma:2.0'
287 +2.362312 O b'lemma:condition'
288 +2.343697 O b'+1:lemma:or'
289 +2.310123 Med b'-1:lemma:glucose'
290 +2.304110 Temp b'lemma:\xc2\xb0c'
291 +2.250756 Gtype b'+1:postag::'
292 +2.243158 Med b'lemma:minimal'
293 +2.236131 Temp b'-1:lemma:37'
294 +2.232349 Temp b'+1:lemma:\xc2\xb0c'
295 +2.215545 Gtype b'lemma:deltaseqa'
296 +2.215545 Gtype b'-1:lemma:old'
297 +2.168823 O b'postag:NNS'
298 +2.142782 pH b'+1:lemma:5.5'
299 +2.133425 O b'lemma:at'
300 +2.127108 Gtype b'-1:lemma:factor'
301 +2.118652 O b'lemma:agitation'
302 +2.100263 Med b'+1:lemma:minimal'
303 +2.099378 Supp b'+1:lemma:min'
304 +2.099095 Phase b'lemma:phase'
305 +2.087589 Supp b'+1:lemma:and'
306 +2.072277 O b'+1:lemma:sparging'
307 +2.066002 Supp b'+1:lemma:Deficient'
308 +2.054354 O b'+1:lemma:anti-fur'
309 +2.053062 O b'lemma:genotype/variation'
310 +2.041875 O b'-1:lemma:l1'
311 +2.039194 Gtype b'-1:lemma::'
312 +2.031026 Supp b'-1:lemma:\xc2\xb5m'
313 +2.010199 O b'postag:DT'
314 +2.007491 O b'lemma:culture'
315 +2.000485 O b'postag:VBD'
316 +1.989764 OD b'lemma:0.3-0.35'
317 +1.982940 Supp b'-1:lemma:sodium'
318 +1.980905 Gversion b'postag:CD'
319 +
320 +
321 +Top negative:
322 +0.039807 OD b'+1:postag:,'
323 +0.039185 O b'lemma:um'
324 +0.035254 O b'+1:postag:JJ'
325 +0.030051 O b'+1:postag:SYM'
326 +0.027873 Supp b'-1:lemma:m'
327 +0.025396 O b"lemma:'s"
328 +0.025396 O b'postag:POS'
329 +0.025396 O b'-1:lemma:manufacturer'
330 +0.025396 O b'+1:lemma:instruction'
331 +0.024333 Supp b'-1:lemma:rifampicin'
332 +0.023672 O b'+1:lemma:more'
333 +0.023672 O b'+1:postag:JJR'
334 +0.017097 O b'lemma:final'
335 +0.014163 O b'+1:lemma:_'
336 +0.013275 O b'lemma:mm'
337 +0.012005 Anti b'lemma:subunit'
338 +0.012005 Anti b'+1:lemma:\xce\xb2'
339 +0.003898 Gtype b'lemma:Fur'
340 +0.002851 Gtype b'+1:postag:NNP'
341 +0.002190 O b'lemma:%'
342 +0.001946 Gtype b'lemma:transcription'
343 +0.001946 Gtype b'+1:lemma:factor'
344 +0.001569 O b'postag:VBZ'
345 +0.001233 O b'lemma:short'
346 +0.001233 O b'+1:lemma:rnase'
347 +0.000698 Air b'-1:lemma:anaerobically'
348 +0.000218 Med b'+1:postag:VBG'
349 +0.000187 Temp b'-1:lemma:control'
350 +0.000185 Gversion b'-1:lemma::'
351 +0.000177 Phase b'-1:postag::'
352 +0.000113 Supp b'+1:postag:RB'
353 +0.000102 O b'+1:lemma:37'
354 +0.000083 Supp b'lemma:ph'
355 +0.000058 Phase b'+1:lemma:.'
356 +0.000058 Phase b'+1:postag:.'
357 +0.000058 Supp b'-1:lemma:ph'
358 +0.000040 O b'-1:lemma:m63'
359 +0.000031 Gtype b'+1:lemma:,'
360 +0.000031 Gtype b'+1:postag:,'
361 +0.000023 O b'lemma:IP'
362 +0.000019 Supp b'lemma:300'
363 +0.000019 Supp b'+1:lemma:\xc2\xb5l'
364 +0.000009 Temp b'lemma:sample'
365 +0.000007 Temp b'-1:lemma:see'
366 +0.000007 Supp b'lemma:feso4'
367 +0.000004 O b'lemma:acetate'
368 +0.000002 O b'lemma:1:500'
369 +0.000002 O b'-1:lemma:back'
370 +0.000001 Temp b'lemma:control'
371 +-0.000014 O b'+1:lemma:7.6'
372 +-0.000065 O b'lemma:aerobic'
373 +-0.000130 O b'-1:lemma:rpob'
374 +-0.000160 Temp b'postag:JJ'
375 +-0.000216 O b'+1:lemma:contain'
376 +-0.000303 O b'-1:lemma:minimal'
377 +-0.000303 O b'+1:lemma:of'
378 +-0.000650 O b'+1:lemma:95'
379 +-0.002197 O b'-1:lemma:m'
380 +-0.002284 O b'-1:postag:JJ'
381 +-0.002326 O b'+1:lemma:,'
382 +-0.002326 O b'+1:postag:,'
383 +-0.003392 Gtype b'+1:postag:NN'
384 +-0.004717 O b'+1:lemma:fnr'
385 +-0.005411 Gversion b'+1:postag:NN'
386 +-0.005829 Air b'-1:postag:VBN'
387 +-0.010227 Supp b'+1:lemma:of'
388 +-0.011686 O b'+1:lemma:~'
389 +-0.011760 Gtype b'+1:postag:CD'
390 +-0.011855 O b'-1:lemma:5'
391 +-0.012174 O b'-1:lemma:.'
392 +-0.012174 O b'-1:postag:.'
393 +-0.014528 O b'lemma:20'
394 +-0.016459 Agit b'postag:NN'
395 +-0.020390 Technique b'-1:lemma::'
396 +-0.022295 O b'+1:lemma:-lrb-'
397 +-0.023051 O b'lemma:od600'
398 +-0.027871 Gtype b'postag:VBG'
399 +-0.033411 O b'-1:postag:-RRB-'
400 +-0.035506 O b'lemma:m63'
401 +-0.038491 Supp b'-1:lemma:10'
402 +-0.043161 O b'+1:lemma:%'
403 +-0.051673 Gtype b'-1:postag:NN'
404 +-0.053797 Supp b'lemma:mm'
405 +-0.055546 Air b'+1:postag:CD'
406 +-0.061075 O b'lemma:1m'
407 +-0.068387 O b'+1:lemma:for'
408 +-0.069911 O b'+1:lemma:-rrb-'
409 +-0.075578 O b'-1:lemma:37'
410 +-0.091100 O b'-1:lemma:of'
411 +-0.094169 O b'-1:lemma:until'
412 +-0.099146 Supp b'postag:CD'
413 +-0.114939 O b'+1:lemma:ph'
414 +-0.122454 O b'lemma:wt'
415 +-0.126051 O b'-1:postag:DT'
416 +-0.132413 Anti b'+1:postag:JJ'
417 +-0.132521 O b'-1:lemma:iptg'
418 +-0.138791 O b'-1:lemma:from'
419 +-0.142787 Supp b'+1:postag:-RRB-'
420 +-0.148089 Gtype b'postag:CD'
421 +-0.160312 O b'-1:lemma:-lrb-'
422 +-0.163006 O b'+1:postag:-LRB-'
423 +-0.163639 O b'lemma:medium'
424 +-0.171523 Supp b'+1:lemma:fructose'
425 +-0.175650 Air b'postag:CD'
426 +-0.176789 Gtype b'-1:lemma:mg1655'
427 +-0.209413 Supp b'lemma:10'
428 +-0.229904 Agit b'-1:postag:NN'
429 +-0.233855 Supp b'postag:NN'
430 +-0.254205 Supp b'-1:lemma:dpd'
431 +-0.270474 O b'-1:lemma:\xe2\x88\x86'
432 +-0.322402 O b'-1:lemma:1m'
433 +-0.327143 Supp b'postag:CC'
434 +-0.327477 O b'lemma:aerobically'
435 +-0.334210 Med b'-1:postag:CD'
436 +-0.337947 O b'lemma:mid-log'
437 +-0.351998 O b'lemma:minimal'
438 +-0.358452 Phase b'-1:lemma:at'
439 +-0.378720 O b'-1:lemma:final'
440 +-0.390763 Supp b'+1:lemma:dpd'
441 +-0.395432 Med b'postag:CD'
442 +-0.398943 O b'lemma:37'
443 +-0.413061 O b'+1:lemma:+'
444 +-0.428486 Gtype b'+1:lemma:-lrb-'
445 +-0.435167 O b'lemma:\xce\xb4fur'
446 +-0.445119 O b'-1:lemma:delta'
447 +-0.454315 O b'+1:lemma:.'
448 +-0.454315 O b'+1:postag:.'
449 +-0.455581 Supp b'-1:lemma:-lrb-'
450 +-0.463177 Phase b'+1:postag:NN'
451 +-0.463568 Med b'+1:postag:NN'
452 +-0.470619 Supp b'+1:postag:VBN'
453 +-0.474915 O b'-1:lemma:sample'
454 +-0.477379 Med b'-1:postag:NN'
455 +-0.481602 O b'lemma:nitrogen'
456 +-0.484766 O b'lemma:ph'
457 +-0.504002 O b'-1:postag:-LRB-'
458 +-0.509161 Supp b'-1:postag:-LRB-'
459 +-0.510610 Air b'-1:lemma:or'
460 +-0.519487 O b'+1:lemma:supplement'
461 +-0.524200 O b'-1:lemma:n2'
462 +-0.543542 O b'+1:postag:IN'
463 +-0.594418 pH b'postag:NN'
464 +-0.615404 O b'-1:postag:IN'
465 +-0.619607 O b'-1:lemma:cra'
466 +-0.656333 O b'-1:lemma:mm'
467 +-0.688334 O b'+1:postag:-RRB-'
468 +-0.710435 O b'-1:lemma:ml'
469 +-0.716844 O b'lemma:anaerobically'
470 +-0.730927 O b'+1:lemma:until'
471 +-0.747298 O b'lemma:media'
472 +-0.754688 O b'+1:lemma:mm'
473 +-0.768772 Med b'-1:postag:IN'
474 +-0.775892 Supp b'+1:lemma:acetate'
475 +-0.788215 Technique b'-1:postag::'
476 +-0.802700 O b'-1:lemma:co2'
477 +-0.820146 O b'lemma:methanol'
478 +-0.863797 Supp b'-1:postag:NNP'
479 +-0.897516 O b'+1:lemma:at'
480 +-0.907339 O b'+1:lemma:2.0'
481 +-0.918505 O b'-1:postag::'
482 +-0.945238 O b'-1:lemma:dissolve'
483 +-0.945238 O b'+1:lemma:methanol'
484 +-0.959379 Supp b'+1:lemma:rifampicin'
485 +-0.986744 O b'-1:lemma:nsrr'
486 +-0.993158 O b'+1:lemma:g/l'
487 +-1.104395 O b'lemma:2h'
488 +-1.104395 O b'-1:lemma:additional'
489 +-1.107657 O b'+1:lemma:1m'
490 +-1.131265 O b'+1:postag:VBG'
491 +-1.178961 O b'lemma:of'
492 +-1.201973 O b'-1:lemma:fresh'
493 +-1.209225 O b'postag:VBP'
494 +-1.212055 O b'-1:lemma:ph'
495 +-1.241153 Phase b'-1:postag:JJ'
496 +-1.329472 O b'lemma:30'
497 +-1.335303 Temp b'postag:NN'
498 +-1.335403 Supp b'+1:lemma:-lrb-'
499 +-1.347076 O b'-1:lemma:IP'
500 +-1.360555 Air b'+1:postag:JJ'
501 +-1.373057 Supp b'+1:postag:-LRB-'
502 +-1.494837 O b'-1:lemma:30'
503 +-1.531593 Anti b'postag:NNP'
504 +-1.575468 Phase b'postag:JJ'
505 +-1.599957 Temp b'+1:postag:IN'
506 +-1.732683 Air b'postag:NN'
507 +-1.747609 OD b'+1:postag:NN'
508 +-1.793039 O b'lemma:rifampicin'
509 +-1.853309 Supp b'postag:JJ'
510 +-1.930986 O b'+1:lemma:1'
511 +-1.993172 Supp b'+1:lemma:,'
512 +-1.993172 Supp b'+1:postag:,'
513 +-2.085464 O b'-1:lemma:2'
514 +-2.086023 O b'+1:lemma:in'
515 +-2.172547 O b'lemma:0.3'
516 +-2.181747 O b'-1:postag:VBG'
517 +-2.236719 O b'-1:lemma:1'
518 +-2.380060 O b'-1:lemma:vol'
519 +-2.454507 O b'+1:lemma:2'
520 +-4.266439 O b'-1:lemma:_'
521 +-4.856613 O b'-1:lemma::'
522 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.09054223875215395, 'c2': 0.0023518182968725273}
5 +best CV score:0.8689308276485085
6 +model size: 0.08M
7 +
8 +Flat F1: 0.7754340877116509
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 1.000 0.913 0.955 23
14 + Med 1.000 0.943 0.971 53
15 + Temp 1.000 0.690 0.816 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.882 1.000 0.938 15
19 + Air 0.543 0.362 0.435 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.868 0.776 0.820 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.700 0.784 0.739 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.810 0.754 0.775 451
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 7.626632
32 +Temp -> Temp 7.045132
33 +Supp -> Supp 6.489590
34 +Med -> Med 6.142134
35 +Anti -> Anti 5.984670
36 +Phase -> Phase 5.518817
37 +OD -> OD 5.345555
38 +Gtype -> Gtype 5.176791
39 +Gversion -> Gversion 5.166638
40 +Air -> Air 4.592159
41 +O -> O 4.393244
42 +pH -> pH 3.826126
43 +Technique -> Technique 3.732727
44 +O -> Supp 1.143221
45 +O -> Technique 0.922279
46 +Air -> O 0.769652
47 +Substrain -> Gtype 0.641263
48 +O -> Gtype 0.629642
49 +Gtype -> Supp 0.604798
50 +O -> Temp 0.290538
51 +O -> Phase 0.232866
52 +Temp -> O 0.222973
53 +Supp -> O 0.152297
54 +Gtype -> Air 0.133118
55 +Phase -> O 0.090499
56 +O -> Anti 0.049867
57 +Strain -> O 0.017575
58 +Phase -> Air 0.001750
59 +O -> pH 0.000507
60 +Med -> O 0.000355
61 +pH -> O 0.000352
62 +Air -> Agit -0.001348
63 +Phase -> OD -0.008421
64 +Air -> Supp -0.050545
65 +Agit -> Air -0.052125
66 +Anti -> O -0.059732
67 +OD -> O -0.120587
68 +Technique -> pH -0.229765
69 +O -> Med -0.258685
70 +Technique -> OD -0.288848
71 +Air -> Phase -0.339706
72 +O -> OD -0.391929
73 +Gtype -> Med -0.529268
74 +Air -> Temp -0.898446
75 +Supp -> Med -0.919358
76 +Technique -> O -1.026211
77 +OD -> Air -1.149284
78 +O -> Air -1.334781
79 +Gtype -> O -1.407268
80 +Med -> Supp -1.495850
81 +
82 +
83 +Top unlikely transitions:
84 +Temp -> Temp 7.045132
85 +Supp -> Supp 6.489590
86 +Med -> Med 6.142134
87 +Anti -> Anti 5.984670
88 +Phase -> Phase 5.518817
89 +OD -> OD 5.345555
90 +Gtype -> Gtype 5.176791
91 +Gversion -> Gversion 5.166638
92 +Air -> Air 4.592159
93 +O -> O 4.393244
94 +pH -> pH 3.826126
95 +Technique -> Technique 3.732727
96 +O -> Supp 1.143221
97 +O -> Technique 0.922279
98 +Air -> O 0.769652
99 +Substrain -> Gtype 0.641263
100 +O -> Gtype 0.629642
101 +Gtype -> Supp 0.604798
102 +O -> Temp 0.290538
103 +O -> Phase 0.232866
104 +Temp -> O 0.222973
105 +Supp -> O 0.152297
106 +Gtype -> Air 0.133118
107 +Phase -> O 0.090499
108 +O -> Anti 0.049867
109 +Strain -> O 0.017575
110 +Phase -> Air 0.001750
111 +O -> pH 0.000507
112 +Med -> O 0.000355
113 +pH -> O 0.000352
114 +Air -> Agit -0.001348
115 +Phase -> OD -0.008421
116 +Air -> Supp -0.050545
117 +Agit -> Air -0.052125
118 +Anti -> O -0.059732
119 +OD -> O -0.120587
120 +Technique -> pH -0.229765
121 +O -> Med -0.258685
122 +Technique -> OD -0.288848
123 +Air -> Phase -0.339706
124 +O -> OD -0.391929
125 +Gtype -> Med -0.529268
126 +Air -> Temp -0.898446
127 +Supp -> Med -0.919358
128 +Technique -> O -1.026211
129 +OD -> Air -1.149284
130 +O -> Air -1.334781
131 +Gtype -> O -1.407268
132 +Med -> Supp -1.495850
133 +Substrain -> O -2.598977
134 +
135 +
136 +Top positive:
137 +10.185301 Technique b'lemma[:2]:Ch'
138 +8.021086 O b'-1:lemma:tag'
139 +6.996653 Gtype b'lemma[:1]:\xce\xb4'
140 +6.769097 O b'lemma:2'
141 +6.193177 O b'lemma:1'
142 +6.092689 Air b'lemma:anaerobic'
143 +5.980202 Phase b'lemma:stationary'
144 +5.955259 Supp b'+1:lemma:\xc2\xb5m'
145 +5.876491 O b'lemma:3'
146 +5.874142 O b'lemma:with'
147 +5.804642 O b'lemma:-'
148 +5.578233 Supp b'lemma:Iron'
149 +5.578233 Supp b'lemma[:2]:Ir'
150 +5.487985 O b'lemma[:2]:re'
151 +5.341246 Temp b'+1:lemma:in'
152 +5.328334 Supp b'-1:lemma:Cra'
153 +5.243720 Strain b'+1:lemma:substr'
154 +5.241759 Phase b'lemma:mid-log'
155 +5.207769 Substrain b'lemma[:2]:mg'
156 +5.202431 O b'lemma:b'
157 +5.093704 Supp b'-1:lemma:+'
158 +4.978877 Supp b'-1:lemma:vol'
159 +4.960498 O b'lemma:_'
160 +4.960498 O b'lemma[:1]:_'
161 +4.785889 Gtype b'-1:lemma:\xe2\x88\x86'
162 +4.773191 Technique b'lemma:chipseq'
163 +4.718733 O b'-1:lemma:lb'
164 +4.679807 O b'lemma:a'
165 +4.628908 Gversion b'-1:lemma:nc'
166 +4.467825 O b'+1:lemma:pq'
167 +4.445709 Air b'-1:lemma:ChIP-Seq'
168 +4.378030 O b'-1:lemma:glucose'
169 +4.329558 Air b'lemma[:2]:ae'
170 +4.328211 O b'lemma:rpob'
171 +4.198740 O b'lemma:delta'
172 +4.173979 Med b'-1:lemma:ml'
173 +4.095801 Supp b'lemma:acetate'
174 +4.022526 O b'lemma:Custom'
175 +4.022526 O b'lemma[:2]:Cu'
176 +4.014705 O b'-1:lemma:0.3'
177 +4.002057 Supp b'-1:lemma:with'
178 +3.969838 Med b'+1:lemma:0.4'
179 +3.874806 Supp b'lemma:pq'
180 +3.874806 Supp b'lemma[:2]:pq'
181 +3.827929 Gtype b'lemma:type'
182 +3.827929 Gtype b'lemma[:2]:ty'
183 +3.798612 O b'lemma:chip'
184 +3.738737 Supp b'lemma:arginine'
185 +3.738664 O b'lemma:ompr'
186 +3.673537 Med b'+1:lemma:2.0'
187 +3.658221 Agit b'+1:lemma:rpm'
188 +3.493765 O b'-1:lemma:into'
189 +3.452810 Air b'lemma:Aerobic'
190 +3.452810 Air b'lemma[:2]:Ae'
191 +3.452043 Temp b'-1:lemma:\xcf\x8332'
192 +3.425411 Temp b'-1:lemma:sample'
193 +3.422615 Gversion b'lemma:chip-seq'
194 +3.364562 Gtype b'lemma:fnr8myc'
195 +3.351216 O b'-1:lemma:type'
196 +3.349721 Gtype b'lemma:arca8myc'
197 +3.319200 O b'+1:lemma:od600'
198 +3.296763 Gtype b'lemma[:2]:pk'
199 +3.278704 O b'-1:lemma:Aerobic'
200 +3.266869 O b'+1:lemma:mid-log'
201 +3.263095 Gtype b'lemma[:2]:cr'
202 +3.249020 Supp b'lemma:rifampicin'
203 +3.239748 O b'+1:lemma:sparging'
204 +3.237458 Supp b'lemma[:2]:ri'
205 +3.210269 O b'-1:lemma:0.3-0.35'
206 +3.205436 Technique b'lemma[:2]:rn'
207 +3.180683 O b'lemma[:1]:h'
208 +3.156042 Phase b'lemma[:2]:ex'
209 +3.140201 Air b'lemma:aerobic'
210 +3.137123 O b'+1:lemma:250'
211 +3.129608 Anti b'lemma[:2]:an'
212 +3.121981 Supp b'-1:lemma:final'
213 +3.117574 O b'lemma[:2]:ge'
214 +3.100954 Air b'-1:lemma:-'
215 +3.095232 Technique b'-1:lemma:input'
216 +3.095018 Med b'-1:lemma:fresh'
217 +3.052910 O b'lemma:.'
218 +3.052910 O b'postag:.'
219 +3.052910 O b'postag[:1]:.'
220 +3.041093 Technique b'lemma[:2]:ch'
221 +3.027376 Supp b'+1:lemma:1'
222 +3.017384 Phase b'lemma:exponential'
223 +3.011507 Gtype b'lemma[:1]:W'
224 +3.000727 Supp b'lemma[:2]:gl'
225 +3.000345 Med b'lemma:MOPS'
226 +3.000345 Med b'lemma[:1]:M'
227 +3.000345 Med b'lemma[:2]:MO'
228 +2.946850 Anti b'lemma:none'
229 +2.913481 Supp b'lemma:fructose'
230 +2.906897 Gtype b'lemma:flag-tag'
231 +2.906897 Gtype b'-1:lemma:c-terminal'
232 +2.891578 Phase b'-1:lemma:until'
233 +2.885138 O b'lemma:n'
234 +2.870143 O b'+1:lemma:43'
235 +2.866970 O b'+1:postag:RB'
236 +2.866043 Gtype b'+1:lemma::'
237 +2.843685 O b'postag::'
238 +2.843685 O b'postag[:1]::'
239 +2.823299 Air b'lemma[:2]:an'
240 +2.803059 Gtype b'lemma[:1]:w'
241 +2.801040 Gtype b'lemma[:2]:de'
242 +2.793153 Gtype b'+1:lemma:flagtag'
243 +2.782454 Gversion b'lemma:asm584v2'
244 +2.771209 Gversion b'lemma[:2]:as'
245 +2.728848 O b'+1:postag:NNP'
246 +2.710278 O b'-1:lemma:ChIP-exo'
247 +2.685529 O b'+1:lemma:or'
248 +2.672360 Med b'lemma:broth'
249 +2.672360 Med b'-1:lemma:L'
250 +2.672360 Med b'lemma[:2]:br'
251 +2.653066 Gtype b'lemma:nsrr'
252 +2.653066 Gtype b'lemma[:2]:ns'
253 +2.644949 Gtype b'lemma:wt'
254 +2.644949 Gtype b'lemma[:2]:wt'
255 +2.643481 O b'+1:lemma:acetate'
256 +2.640438 Gtype b'lemma[:2]:ar'
257 +2.625663 Gversion b'lemma:nc'
258 +2.625663 Gversion b'lemma[:2]:nc'
259 +2.605563 Anti b'+1:lemma:antibody'
260 +2.591652 Technique b'-1:lemma:chip-exo'
261 +2.575198 O b'lemma[:2]:fo'
262 +2.565649 Gtype b'-1:lemma:vector'
263 +2.556891 Gtype b'-1:postag:VBG'
264 +2.531858 Substrain b'+1:lemma:phtpg'
265 +2.530034 O b'lemma:0.4'
266 +2.515928 OD b'lemma:od450'
267 +2.506637 O b'lemma[:1]:C'
268 +2.500720 Supp b'lemma:sodium'
269 +2.401085 O b'+1:lemma:coli'
270 +2.393881 Supp b'+1:lemma:phosphate'
271 +2.376464 O b'-1:postag:NNS'
272 +2.374397 Temp b'-1:lemma:43'
273 +2.366322 Supp b'+1:lemma:_'
274 +2.344507 Supp b'lemma[:2]:ni'
275 +2.318294 O b'lemma[:2]:Cr'
276 +2.313838 O b'lemma[:2]:om'
277 +2.312761 O b'-1:lemma:anaerobic'
278 +2.297530 Gtype b'-1:lemma:rpob'
279 +2.286218 O b'-1:lemma:phase'
280 +2.285928 Gversion b'lemma[:2]:00'
281 +2.270036 Supp b'lemma:iptg'
282 +2.264276 Supp b'lemma[:1]:I'
283 +2.248929 O b'+1:lemma:30'
284 +2.245626 O b'lemma:oxyr'
285 +2.240334 O b'lemma:Cra'
286 +2.237370 Gtype b'-1:lemma:_'
287 +2.220905 Med b'lemma:L'
288 +2.220905 Med b'+1:lemma:broth'
289 +2.217317 Supp b'lemma:nacl'
290 +2.217317 Supp b'lemma[:2]:na'
291 +2.191402 Temp b'lemma:43'
292 +2.191402 Temp b'lemma[:2]:43'
293 +2.169633 Supp b'lemma:Fe'
294 +2.169633 Supp b'lemma[:2]:Fe'
295 +2.169099 pH b'lemma[:2]:ph'
296 +2.167053 O b'lemma:ml'
297 +2.167053 O b'lemma[:2]:ml'
298 +2.163570 Gtype b'lemma:dfnr'
299 +2.163570 Gtype b'lemma[:2]:df'
300 +2.115282 OD b'lemma[:1]:o'
301 +2.113233 OD b'lemma:0.3-0.35'
302 +2.096796 Supp b'lemma:no3'
303 +2.092494 O b'-1:lemma:aerobically'
304 +2.080983 Phase b'lemma[:1]:e'
305 +2.077238 Anti b'-1:lemma::'
306 +2.069956 O b'lemma[:2]:ha'
307 +2.068054 Supp b'lemma:dpd'
308 +2.068054 Supp b'lemma[:2]:dp'
309 +2.065958 Supp b'lemma[:2]:ac'
310 +2.065024 Gtype b'+1:lemma:type'
311 +2.034166 O b'+1:lemma:nitrate'
312 +2.033879 Gtype b'lemma[:1]:f'
313 +2.029691 Gtype b'-1:lemma:knock-out'
314 +2.028831 pH b'+1:postag:CD'
315 +2.022928 Gtype b'+1:lemma:_'
316 +1.993507 Supp b'lemma:Leu'
317 +1.993507 Supp b'lemma[:2]:Le'
318 +1.979381 O b'-1:lemma:stpa'
319 +1.969036 Technique b'lemma[:1]:C'
320 +1.953152 Technique b'+1:lemma:chip-exo'
321 +1.940011 Air b'+1:postag:IN'
322 +1.924352 Supp b'lemma[:2]:30'
323 +1.903191 O b'lemma:purr'
324 +1.902904 Air b'lemma[:1]:a'
325 +1.895935 O b'lemma:A'
326 +1.889757 Gtype b'+1:lemma:with'
327 +1.884874 O b'+1:lemma:dfnr'
328 +1.875072 Supp b'-1:lemma:30'
329 +1.870747 Supp b'-1:lemma::'
330 +1.864656 Technique b'-1:lemma:rna-seq'
331 +1.858198 Gtype b'lemma[:1]:t'
332 +1.836001 O b'lemma[:2]:in'
333 +1.821777 pH b'lemma:ph5'
334 +1.821777 pH b'+1:lemma:.5'
335 +1.813827 Supp b'lemma:Adenine'
336 +1.813827 Supp b'lemma[:2]:Ad'
337 +
338 +
339 +Top negative:
340 +-0.122503 O b'+1:lemma:vol'
341 +-0.122503 O b'lemma[:2]:1/'
342 +-0.125682 Supp b'+1:postag:NNS'
343 +-0.128364 O b'lemma[:1]:5'
344 +-0.128783 Med b'postag[:1]:C'
345 +-0.131277 Supp b'postag:CD'
346 +-0.131277 Supp b'postag[:2]:CD'
347 +-0.131348 Supp b'-1:postag::'
348 +-0.133721 Gtype b'lemma:-lrb-'
349 +-0.135824 Phase b'lemma[:2]:pa'
350 +-0.136598 O b'lemma:grow'
351 +-0.140727 Air b'+1:lemma:-lrb-'
352 +-0.148264 O b'lemma:ph'
353 +-0.150736 O b'+1:lemma:ph'
354 +-0.159027 O b'+1:postag:CD'
355 +-0.160597 OD b'+1:lemma:0.4'
356 +-0.165913 O b'+1:lemma:phosphate'
357 +-0.169625 Air b'-1:postag:CC'
358 +-0.171481 OD b'postag[:1]:N'
359 +-0.171481 OD b'postag[:2]:NN'
360 +-0.171501 O b'lemma:1m'
361 +-0.171501 O b'lemma[:2]:1m'
362 +-0.176117 Supp b'-1:lemma:.'
363 +-0.176117 Supp b'-1:postag:.'
364 +-0.177236 Temp b'postag[:1]:N'
365 +-0.177236 Temp b'postag[:2]:NN'
366 +-0.184780 Supp b'+1:postag:-RRB-'
367 +-0.189482 Gversion b'+1:postag:NN'
368 +-0.191580 O b'+1:lemma:1/100'
369 +-0.194798 Med b'lemma[:1]:c'
370 +-0.201698 Gtype b'+1:postag:CD'
371 +-0.208826 O b'lemma:150'
372 +-0.208826 O b'+1:lemma:mg/ml'
373 +-0.208826 O b'lemma[:2]:15'
374 +-0.209680 O b'+1:lemma:supplement'
375 +-0.211723 O b'lemma[:1]:p'
376 +-0.211762 O b'-1:lemma:g/l'
377 +-0.218211 O b'lemma:30'
378 +-0.218501 Temp b'-1:lemma:\xc2\xb0c'
379 +-0.223383 O b'lemma:7.6'
380 +-0.223383 O b'+1:lemma:;'
381 +-0.223383 O b'lemma[:2]:7.'
382 +-0.230315 O b'-1:postag:-LRB-'
383 +-0.234210 Air b'-1:postag:VBN'
384 +-0.234317 Air b'+1:postag:-LRB-'
385 +-0.238132 O b'lemma:\xe2\x88\x86'
386 +-0.238132 O b'lemma[:1]:\xe2\x88\x86'
387 +-0.239813 O b'lemma[:2]:20'
388 +-0.243759 Supp b'lemma[:2]:ph'
389 +-0.268591 O b'-1:lemma:-lrb-'
390 +-0.273129 O b'+1:lemma:.'
391 +-0.273129 O b'+1:postag:.'
392 +-0.274515 Gtype b'-1:postag:NN'
393 +-0.276640 O b'lemma:aerobic'
394 +-0.280353 pH b'postag[:1]:N'
395 +-0.280353 pH b'postag[:2]:NN'
396 +-0.281176 O b'+1:lemma:co2'
397 +-0.284309 OD b'hUpper'
398 +-0.284309 OD b'hLower'
399 +-0.299607 O b'+1:lemma:rep2'
400 +-0.305286 Supp b'+1:postag::'
401 +-0.305442 O b'-1:lemma:1m'
402 +-0.307656 O b'lemma[:2]:an'
403 +-0.311346 Supp b'lemma[:1]:s'
404 +-0.313198 O b'-1:lemma:IP'
405 +-0.327309 O b'+1:lemma:1m'
406 +-0.328461 Med b'+1:postag:NN'
407 +-0.331739 Supp b'-1:lemma:dpd'
408 +-0.332765 O b'-1:lemma:n2'
409 +-0.333171 Supp b'postag[:1]:C'
410 +-0.334484 O b'+1:lemma:-rrb-'
411 +-0.335881 Technique b'postag:NN'
412 +-0.336835 Supp b'-1:lemma:%'
413 +-0.343844 O b'lemma:37'
414 +-0.343844 O b'lemma[:2]:37'
415 +-0.352890 O b'lemma:co2'
416 +-0.366891 O b'-1:lemma:rpob'
417 +-0.372918 O b'-1:lemma:of'
418 +-0.379129 O b'lemma[:1]:k'
419 +-0.385300 Supp b'lemma:10'
420 +-0.411833 O b'lemma[:2]:od'
421 +-0.414482 O b'lemma:20'
422 +-0.419047 Supp b'-1:lemma:-lrb-'
423 +-0.423807 Air b'postag:NN'
424 +-0.429435 O b'lemma:anaerobically'
425 +-0.430690 Med b'-1:postag:NN'
426 +-0.435460 Gtype b'postag[:1]:V'
427 +-0.435460 Gtype b'postag[:2]:VB'
428 +-0.436108 O b'-1:lemma:with'
429 +-0.436725 Supp b'postag:JJ'
430 +-0.448963 O b'-1:lemma:from'
431 +-0.449583 Supp b'postag[:1]:J'
432 +-0.449583 Supp b'postag[:2]:JJ'
433 +-0.452505 O b'lemma:wt'
434 +-0.452505 O b'lemma[:2]:wt'
435 +-0.456297 Med b'+1:postag:IN'
436 +-0.460912 O b'lemma:of'
437 +-0.460912 O b'lemma[:2]:of'
438 +-0.464084 O b'lemma[:2]:ph'
439 +-0.476123 Phase b'+1:postag:NN'
440 +-0.478646 O b'-1:lemma:37'
441 +-0.479822 O b'lemma[:2]:mg'
442 +-0.490640 O b'lemma:glucose'
443 +-0.494749 O b'-1:lemma:mm'
444 +-0.513617 O b'+1:postag:IN'
445 +-0.520527 O b'+1:lemma:2.0'
446 +-0.524819 Phase b'+1:lemma:pahse'
447 +-0.528372 O b'lemma:media'
448 +-0.534673 Temp b'postag:NN'
449 +-0.544692 Air b'-1:lemma:or'
450 +-0.551287 Air b'-1:postag:JJ'
451 +-0.559988 O b'-1:lemma:\xe2\x88\x86'
452 +-0.572775 Air b'+1:postag:JJ'
453 +-0.575164 O b'+1:postag:VBG'
454 +-0.579204 Air b'postag[:1]:N'
455 +-0.579204 Air b'postag[:2]:NN'
456 +-0.579634 Supp b'+1:lemma:fructose'
457 +-0.613266 O b'lemma[:2]:ae'
458 +-0.626767 Anti b'symb'
459 +-0.633791 Med b'+1:postag:NNS'
460 +-0.643773 O b'+1:lemma:+'
461 +-0.653623 O b'lemma[:2]:me'
462 +-0.686630 O b'-1:lemma:delta'
463 +-0.691758 Phase b'postag[:1]:J'
464 +-0.691758 Phase b'postag[:2]:JJ'
465 +-0.695582 Gtype b'lemma[:1]:c'
466 +-0.707615 Agit b'symb'
467 +-0.710847 O b'lemma[:1]:n'
468 +-0.719133 O b'lemma:mid-log'
469 +-0.720783 O b'lemma[:2]:0.'
470 +-0.727230 O b'lemma:0.3'
471 +-0.730648 Supp b'-1:postag:-LRB-'
472 +-0.737779 Gtype b'lemma[:1]:r'
473 +-0.743565 Med b'-1:postag:CD'
474 +-0.750347 O b'+1:lemma:for'
475 +-0.760505 Air b'symb'
476 +-0.775646 O b'lemma[:2]:30'
477 +-0.777433 O b'lemma:2h'
478 +-0.777433 O b'-1:lemma:additional'
479 +-0.777433 O b'lemma[:2]:2h'
480 +-0.811526 O b'-1:lemma:final'
481 +-0.812240 O b'lemma[:1]:0'
482 +-0.816482 Gtype b'-1:lemma:-lrb-'
483 +-0.821802 O b'-1:lemma:30'
484 +-0.824390 O b'lemma[:2]:gl'
485 +-0.845131 Supp b'lemma[:1]:a'
486 +-0.845884 O b'lemma:methanol'
487 +-0.854110 Technique b'-1:postag::'
488 +-0.866570 O b'+1:lemma:until'
489 +-0.885272 Supp b'+1:lemma:rifampicin'
490 +-0.885699 O b'-1:lemma:nsrr'
491 +-0.888698 O b'-1:lemma:cra'
492 +-0.904476 O b'+1:lemma:at'
493 +-0.932299 Med b'symb'
494 +-0.942332 O b'-1:lemma:co2'
495 +-0.962304 O b'+1:lemma:g/l'
496 +-0.962951 O b'+1:lemma:mm'
497 +-0.972177 O b'lemma:soxs-8myc'
498 +-0.976305 O b'-1:lemma:dissolve'
499 +-0.976305 O b'+1:lemma:methanol'
500 +-1.026438 O b'lemma[:2]:ar'
501 +-1.071623 Gtype b'lemma[:1]:a'
502 +-1.074211 Gtype b'+1:lemma:-rrb-'
503 +-1.085129 Phase b'postag:JJ'
504 +-1.096666 O b'-1:lemma:ph'
505 +-1.111621 Supp b'-1:postag:NNP'
506 +-1.112692 Phase b'hUpper'
507 +-1.112692 Phase b'hLower'
508 +-1.151925 Anti b'postag:NNP'
509 +-1.166449 Supp b'symb'
510 +-1.186999 Phase b'-1:postag:JJ'
511 +-1.188170 O b'-1:lemma:until'
512 +-1.246650 Agit b'hUpper'
513 +-1.246650 Agit b'hLower'
514 +-1.262971 O b'+1:lemma:in'
515 +-1.269087 O b'-1:lemma:ml'
516 +-1.380405 OD b'+1:postag:NN'
517 +-1.438101 Supp b'+1:postag:VBN'
518 +-1.535115 Supp b'hGreek'
519 +-1.573856 Supp b'+1:lemma:-lrb-'
520 +-1.599754 Supp b'+1:postag:-LRB-'
521 +-1.625658 O b'-1:postag:VBG'
522 +-1.641325 O b'-1:postag::'
523 +-1.657440 O b'lemma[:2]:ri'
524 +-1.696650 O b'-1:lemma:1'
525 +-1.882679 O b'+1:lemma:1'
526 +-1.947502 O b'lemma:rifampicin'
527 +-2.109295 O b'-1:lemma:sample'
528 +-2.179742 O b'-1:lemma:fresh'
529 +-2.704413 Supp b'+1:lemma:,'
530 +-2.704413 Supp b'+1:postag:,'
531 +-2.995590 Supp b'lemma[:1]:c'
532 +-3.044999 O b'+1:lemma:2'
533 +-3.183494 O b'lemma[:2]:fl'
534 +-3.486906 O b'-1:lemma:vol'
535 +-3.568659 O b'-1:lemma:2'
536 +-3.575405 Temp b'+1:postag:IN'
537 +-4.334558 O b'postag:VBP'
538 +-5.276604 O b'-1:lemma:_'
539 +-5.933765 O b'-1:lemma::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.11486416926458794, 'c2': 0.02680674100354409}
5 +best CV score:0.8644252350498997
6 +model size: 0.12M
7 +
8 +Flat F1: 0.7889552451646998
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 0.955 0.913 0.933 23
14 + Med 1.000 0.943 0.971 53
15 + Temp 1.000 0.690 0.816 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.875 0.933 0.903 15
19 + Air 0.556 0.362 0.439 69
20 + Anti 0.579 1.000 0.733 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.877 0.753 0.810 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.848 0.791 0.819 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.845 0.749 0.789 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 5.434090
32 +Agit -> Agit 5.083855
33 +Anti -> Anti 5.066839
34 +OD -> OD 4.944534
35 +Supp -> Supp 4.921053
36 +Med -> Med 4.814443
37 +Gtype -> Gtype 4.724645
38 +Gversion -> Gversion 4.371417
39 +O -> O 4.134917
40 +Phase -> Phase 4.076535
41 +Air -> Air 4.013049
42 +Technique -> Technique 3.791769
43 +pH -> pH 2.691203
44 +Substrain -> Gtype 1.373548
45 +Gtype -> Supp 1.262860
46 +O -> Gtype 1.231509
47 +O -> Technique 1.057715
48 +O -> Supp 0.927276
49 +Air -> O 0.926745
50 +Gtype -> pH 0.675182
51 +Gtype -> Air 0.618804
52 +Technique -> Air 0.558242
53 +Supp -> O 0.412087
54 +Temp -> O 0.355896
55 +O -> Temp 0.211297
56 +Med -> O 0.203763
57 +O -> Anti 0.188101
58 +Strain -> O 0.110948
59 +O -> Phase 0.067802
60 +O -> OD 0.026887
61 +Phase -> O -0.000033
62 +OD -> O -0.000624
63 +O -> Agit -0.005237
64 +Anti -> O -0.013260
65 +Technique -> OD -0.015560
66 +Supp -> Air -0.017152
67 +Air -> Supp -0.041258
68 +Technique -> O -0.086710
69 +Agit -> O -0.090919
70 +Air -> Med -0.201293
71 +O -> Med -0.213366
72 +Air -> Phase -0.347298
73 +Air -> Temp -0.357907
74 +Phase -> Supp -0.382883
75 +OD -> Air -0.397558
76 +Phase -> OD -0.415065
77 +Gtype -> Anti -0.434583
78 +Gtype -> Med -0.515634
79 +Technique -> pH -0.532638
80 +Gtype -> O -0.589161
81 +
82 +
83 +Top unlikely transitions:
84 +Med -> Med 4.814443
85 +Gtype -> Gtype 4.724645
86 +Gversion -> Gversion 4.371417
87 +O -> O 4.134917
88 +Phase -> Phase 4.076535
89 +Air -> Air 4.013049
90 +Technique -> Technique 3.791769
91 +pH -> pH 2.691203
92 +Substrain -> Gtype 1.373548
93 +Gtype -> Supp 1.262860
94 +O -> Gtype 1.231509
95 +O -> Technique 1.057715
96 +O -> Supp 0.927276
97 +Air -> O 0.926745
98 +Gtype -> pH 0.675182
99 +Gtype -> Air 0.618804
100 +Technique -> Air 0.558242
101 +Supp -> O 0.412087
102 +Temp -> O 0.355896
103 +O -> Temp 0.211297
104 +Med -> O 0.203763
105 +O -> Anti 0.188101
106 +Strain -> O 0.110948
107 +O -> Phase 0.067802
108 +O -> OD 0.026887
109 +Phase -> O -0.000033
110 +OD -> O -0.000624
111 +O -> Agit -0.005237
112 +Anti -> O -0.013260
113 +Technique -> OD -0.015560
114 +Supp -> Air -0.017152
115 +Air -> Supp -0.041258
116 +Technique -> O -0.086710
117 +Agit -> O -0.090919
118 +Air -> Med -0.201293
119 +O -> Med -0.213366
120 +Air -> Phase -0.347298
121 +Air -> Temp -0.357907
122 +Phase -> Supp -0.382883
123 +OD -> Air -0.397558
124 +Phase -> OD -0.415065
125 +Gtype -> Anti -0.434583
126 +Gtype -> Med -0.515634
127 +Technique -> pH -0.532638
128 +Gtype -> O -0.589161
129 +O -> Air -0.833200
130 +Supp -> Med -0.863491
131 +Technique -> Gtype -0.999961
132 +Substrain -> O -1.154679
133 +Med -> Supp -1.746454
134 +
135 +
136 +Top positive:
137 +5.242070 Air b'word:Aerobic'
138 +4.810824 O b'lemma:_'
139 +4.810824 O b'word:_'
140 +4.702604 Air b'lemma:anaerobic'
141 +4.379183 O b'word:Cra'
142 +3.811896 Technique b'word:ChIP-Seq'
143 +3.692913 Gtype b'lemma:wild-type'
144 +3.658772 Air b'postag:RB'
145 +3.551778 O b'postag:IN'
146 +3.496331 O b'-1:lemma:ChIP-exo'
147 +3.382322 Gtype b'lemma:wt'
148 +3.353140 O b'postag::'
149 +3.300897 Air b'word:Anaerobic'
150 +3.260825 Technique b'lemma:ChIP-exo'
151 +3.157424 Technique b'word:ChIP-exo'
152 +3.138715 Supp b'lemma:Iron'
153 +3.138715 Supp b'word:Iron'
154 +3.138715 Supp b'+1:word:Deficient'
155 +3.135138 Gtype b'word:WT'
156 +3.125987 Technique b'word:ChIPSeq'
157 +3.117161 Supp b'lemma:nh4cl'
158 +3.113447 Supp b'lemma:pq'
159 +3.113447 Supp b'word:PQ'
160 +3.089898 O b'lemma:2'
161 +3.089898 O b'word:2'
162 +2.955558 Technique b'lemma:rna-seq'
163 +2.907748 Supp b'-1:word:Cra'
164 +2.858859 O b'lemma:-'
165 +2.858859 O b'word:-'
166 +2.779905 O b'lemma:1'
167 +2.779905 O b'word:1'
168 +2.749191 O b'postag:CC'
169 +2.719373 O b'lemma:rpob'
170 +2.719373 O b'word:RpoB'
171 +2.681464 O b'lemma:3'
172 +2.681464 O b'word:3'
173 +2.680679 Gversion b'lemma:asm584v2'
174 +2.680679 Gversion b'word:ASM584v2'
175 +2.675651 Phase b'lemma:mid-log'
176 +2.675651 Phase b'word:mid-log'
177 +2.644118 O b'lemma:chip'
178 +2.643576 Strain b'+1:lemma:substr'
179 +2.643576 Strain b'+1:word:substr'
180 +2.573762 Gtype b'lemma:\xce\xb4cra'
181 +2.570718 Gtype b'word:\xce\x94cra'
182 +2.569025 Med b'lemma:MOPS'
183 +2.569025 Med b'word:MOPS'
184 +2.564545 Supp b'+1:lemma:\xc2\xb5m'
185 +2.564545 Supp b'+1:word:\xc2\xb5M'
186 +2.540412 O b'lemma:.'
187 +2.540412 O b'postag:.'
188 +2.540412 O b'word:.'
189 +2.523030 O b'-1:word:tag'
190 +2.521842 Gtype b'lemma:type'
191 +2.521842 Gtype b'word:type'
192 +2.501449 Technique b'lemma:chipseq'
193 +2.486934 O b'lemma:Custom'
194 +2.486934 O b'word:Custom'
195 +2.456959 Gtype b'+1:lemma:type'
196 +2.456959 Gtype b'+1:word:type'
197 +2.442001 O b'lemma:a'
198 +2.306156 Phase b'lemma:exponential'
199 +2.306156 Phase b'word:exponential'
200 +2.306156 Phase b'lemma:stationary'
201 +2.306156 Phase b'word:stationary'
202 +2.278404 O b'-1:lemma:tag'
203 +2.248178 O b'lemma:b'
204 +2.248178 O b'word:B'
205 +2.236935 Air b'word:anaerobic'
206 +2.220325 Supp b'lemma:arginine'
207 +2.200862 pH b'+1:postag:CD'
208 +2.198724 Supp b'lemma:glucose'
209 +2.198724 Supp b'word:glucose'
210 +2.169027 O b'+1:postag:RB'
211 +2.146306 Supp b'lemma:nacl'
212 +2.146306 Supp b'word:NaCl'
213 +2.141305 O b'lemma:ompr'
214 +2.141305 O b'word:OmpR'
215 +2.134893 Gtype b'lemma:flag-tag'
216 +2.134893 Gtype b'-1:lemma:c-terminal'
217 +2.134893 Gtype b'word:Flag-tag'
218 +2.134893 Gtype b'-1:word:C-terminal'
219 +2.115770 Gversion b'lemma:nc'
220 +2.115770 Gversion b'word:NC'
221 +2.102355 Substrain b'lemma:mg1655'
222 +2.102355 Substrain b'word:MG1655'
223 +2.096735 O b'+1:lemma:pq'
224 +2.096735 O b'+1:word:PQ'
225 +2.076016 Gtype b'postag:-LRB-'
226 +2.067392 O b'-1:word:Aerobic'
227 +2.067015 Gtype b'lemma:delta-fnr'
228 +2.067015 Gtype b'word:delta-fnr'
229 +2.020747 O b'word:A'
230 +2.011558 Supp b'lemma:rifampicin'
231 +2.011558 Supp b'word:rifampicin'
232 +2.009828 Technique b'lemma:rnaseq'
233 +2.009828 Technique b'word:RNASeq'
234 +1.995883 O b'+1:postag:NNP'
235 +1.979049 Gtype b'-1:postag:VBG'
236 +1.964073 O b'+1:word:were'
237 +1.938262 O b'lemma:rep1'
238 +1.938262 O b'word:rep1'
239 +1.933374 Supp b'lemma:acetate'
240 +1.933374 Supp b'word:acetate'
241 +1.929172 O b'postag:VBN'
242 +1.923161 Strain b'lemma:k-12'
243 +1.923161 Strain b'word:K-12'
244 +1.913934 Technique b'-1:lemma:chip-exo'
245 +1.905918 O b'lemma:rep3'
246 +1.905918 O b'word:rep3'
247 +1.902460 Gtype b'-1:lemma:\xe2\x88\x86'
248 +1.902460 Gtype b'-1:word:\xe2\x88\x86'
249 +1.888089 Supp b'lemma:nitrate'
250 +1.888089 Supp b'word:nitrate'
251 +1.880361 Supp b'-1:lemma:with'
252 +1.880361 Supp b'-1:word:with'
253 +1.879119 O b'+1:word:ChIP-Seq'
254 +1.873439 Gtype b'lemma:\xe2\x88\x86'
255 +1.873439 Gtype b'word:\xe2\x88\x86'
256 +1.869636 O b'-1:lemma:lb'
257 +1.869636 O b'-1:word:LB'
258 +1.865650 Gversion b'-1:lemma:nc'
259 +1.865650 Gversion b'-1:word:NC'
260 +1.865606 Gtype b'lemma:arca8myc'
261 +1.865606 Gtype b'word:ArcA8myc'
262 +1.864036 Agit b'+1:lemma:rpm'
263 +1.864036 Agit b'+1:word:rpm'
264 +1.856334 Agit b'lemma:rpm'
265 +1.856334 Agit b'word:rpm'
266 +1.839596 Anti b'lemma:seqa'
267 +1.839596 Anti b'word:SeqA'
268 +1.836163 Supp b'-1:postag:CC'
269 +1.835200 O b'lemma:with'
270 +1.835200 O b'word:with'
271 +1.814125 O b'lemma:culture'
272 +1.811651 Supp b'-1:lemma:Cra'
273 +1.792538 Air b'-1:lemma:ChIP-Seq'
274 +1.792538 Air b'-1:word:ChIP-Seq'
275 +1.768358 Air b'lemma:aerobic'
276 +1.762953 O b'isLower'
277 +1.736496 Gtype b'word:cra'
278 +1.714381 Gversion b'word:ChIP-Seq'
279 +1.710946 Air b'-1:postag::'
280 +1.709986 Gtype b'postag:NN'
281 +1.698044 O b'lemma:at'
282 +1.695726 Gtype b'lemma:\xce\xb4soxr'
283 +1.695726 Gtype b'word:\xce\x94soxR'
284 +1.678182 Vess b'lemma:flask'
285 +1.678182 Vess b'-1:lemma:warm'
286 +1.678182 Vess b'word:flask'
287 +1.678182 Vess b'-1:word:warmed'
288 +1.674493 Gtype b'lemma:pk4854'
289 +1.674493 Gtype b'word:PK4854'
290 +1.651636 Anti b'lemma:none'
291 +1.651636 Anti b'word:none'
292 +1.644746 O b'-1:lemma:0.3'
293 +1.644746 O b'-1:word:0.3'
294 +1.636558 Supp b'lemma:no3'
295 +1.636558 Supp b'word:NO3'
296 +1.624682 Gtype b'+1:postag::'
297 +1.623664 Phase b'-1:lemma:mid-log'
298 +1.623664 Phase b'-1:word:mid-log'
299 +1.618042 Supp b'lemma:dpd'
300 +1.618042 Supp b'word:DPD'
301 +1.614435 Gtype b'lemma:\xce\xb4fur'
302 +1.614435 Gtype b'word:\xce\x94fur'
303 +1.613699 Gtype b'+1:lemma:ph5'
304 +1.613699 Gtype b'+1:word:pH5'
305 +1.607121 Gversion b'lemma:chip-seq'
306 +1.597322 Anti b'lemma:\xcf\x8332'
307 +1.597322 Anti b'word:\xcf\x8332'
308 +1.592709 O b'lemma:Cra'
309 +1.582551 Med b'lemma:lb'
310 +1.582551 Med b'word:LB'
311 +1.568363 Supp b'-1:lemma:vol'
312 +1.568363 Supp b'-1:word:vol'
313 +1.559468 Air b'lemma:Aerobic'
314 +1.556342 Technique b'word:RNA-Seq'
315 +1.553167 Med b'-1:lemma:ml'
316 +1.553167 Med b'-1:word:ml'
317 +1.553041 Med b'+1:lemma:0.4'
318 +1.553041 Med b'+1:word:0.4'
319 +1.551077 Supp b'lemma:Leu'
320 +1.551077 Supp b'word:Leu'
321 +1.543969 Air b'lemma:anaerobically'
322 +1.543969 Air b'word:anaerobically'
323 +1.543172 O b'lemma::'
324 +1.543172 O b'word::'
325 +1.540510 Anti b'+1:lemma:antibody'
326 +1.540510 Anti b'+1:word:antibody'
327 +1.537669 Air b'lemma:anaeroibc'
328 +1.537669 Air b'word:Anaeroibc'
329 +1.525613 OD b'-1:postag:IN'
330 +1.524837 Supp b'lemma:Fe'
331 +1.524837 Supp b'word:Fe'
332 +1.524330 Gtype b'postag:-RRB-'
333 +1.516209 O b'-1:lemma:glucose'
334 +1.516209 O b'-1:word:glucose'
335 +1.504553 Gtype b'+1:lemma:with'
336 +1.504553 Gtype b'+1:word:with'
337 +
338 +
339 +Top negative:
340 +-0.149499 Anti b'+1:word:anti-Fur'
341 +-0.152520 O b'+1:lemma:95'
342 +-0.152520 O b'+1:word:95'
343 +-0.156326 Supp b'+1:lemma:dpd'
344 +-0.156326 Supp b'+1:word:DPD'
345 +-0.162658 Supp b'lemma:10'
346 +-0.162658 Supp b'word:10'
347 +-0.163808 O b'+1:lemma:for'
348 +-0.165890 Phase b'+1:postag:NN'
349 +-0.166827 O b'-1:lemma:dfnr'
350 +-0.166827 O b'-1:word:dFNR'
351 +-0.171844 O b'+1:word:was'
352 +-0.175747 O b'-1:lemma:of'
353 +-0.175747 O b'-1:word:of'
354 +-0.193766 O b'-1:word:from'
355 +-0.208529 O b'-1:lemma:1m'
356 +-0.208529 O b'-1:word:1M'
357 +-0.210506 Air b'isLower'
358 +-0.210844 Technique b'-1:postag::'
359 +-0.216830 O b'+1:lemma:.'
360 +-0.216830 O b'+1:postag:.'
361 +-0.216830 O b'+1:word:.'
362 +-0.223522 O b'-1:lemma:final'
363 +-0.223522 O b'-1:word:final'
364 +-0.223753 Med b'-1:postag:CD'
365 +-0.233131 O b'-1:postag:JJ'
366 +-0.239249 OD b'isNumber'
367 +-0.243733 O b'lemma:20'
368 +-0.243733 O b'word:20'
369 +-0.246275 Air b'-1:postag:CC'
370 +-0.247467 O b'lemma:k-12'
371 +-0.247467 O b'word:K-12'
372 +-0.265976 O b'lemma:glucose'
373 +-0.265976 O b'word:glucose'
374 +-0.271369 O b'+1:word:C'
375 +-0.274901 O b'-1:lemma:the'
376 +-0.284581 O b'-1:word:the'
377 +-0.284785 O b'+1:lemma:supplement'
378 +-0.284785 O b'+1:word:supplemented'
379 +-0.285764 pH b'postag:NN'
380 +-0.293864 pH b'isUpper'
381 +-0.308118 Air b'-1:lemma:or'
382 +-0.308118 Air b'-1:word:or'
383 +-0.310139 Gtype b'-1:postag:CD'
384 +-0.322263 Supp b'postag:CC'
385 +-0.324104 O b'-1:lemma:cra'
386 +-0.328286 O b'lemma:37'
387 +-0.328286 O b'word:37'
388 +-0.330168 O b'word:ChIP-exo'
389 +-0.334656 O b'-1:lemma:37'
390 +-0.334656 O b'-1:word:37'
391 +-0.340095 Anti b'isUpper'
392 +-0.342430 O b'-1:lemma:n2'
393 +-0.342430 O b'-1:word:N2'
394 +-0.348188 O b'-1:lemma:mm'
395 +-0.348188 O b'-1:word:mM'
396 +-0.350821 Supp b'+1:lemma:fructose'
397 +-0.350821 Supp b'+1:word:fructose'
398 +-0.352833 O b'+1:postag:IN'
399 +-0.355440 O b'-1:postag:-LRB-'
400 +-0.358948 O b'+1:postag:-LRB-'
401 +-0.368508 O b'lemma:fructose'
402 +-0.368508 O b'word:fructose'
403 +-0.369578 Gtype b'+1:postag:CD'
404 +-0.370939 O b'lemma:aerobically'
405 +-0.370939 O b'word:aerobically'
406 +-0.372723 O b'lemma:\xce\xb4fur'
407 +-0.372723 O b'word:\xce\x94fur'
408 +-0.374885 Supp b'+1:lemma:acetate'
409 +-0.374885 Supp b'+1:word:acetate'
410 +-0.383718 O b'-1:lemma:ph'
411 +-0.383718 O b'-1:word:pH'
412 +-0.388799 O b'-1:lemma:rpob'
413 +-0.388799 O b'-1:word:RpoB'
414 +-0.388879 O b'word:cells'
415 +-0.392268 Supp b'-1:lemma:-lrb-'
416 +-0.392268 Supp b'-1:word:-LRB-'
417 +-0.394884 O b'+1:postag:-RRB-'
418 +-0.401778 O b'lemma:minimal'
419 +-0.401778 O b'word:minimal'
420 +-0.405443 O b'+1:lemma:2.0'
421 +-0.405443 O b'+1:word:2.0'
422 +-0.409365 O b'lemma:medium'
423 +-0.409365 O b'word:medium'
424 +-0.416217 O b'-1:lemma:until'
425 +-0.416217 O b'-1:word:until'
426 +-0.422801 O b'+1:lemma:until'
427 +-0.422801 O b'+1:word:until'
428 +-0.422973 O b'lemma:nitrate'
429 +-0.422973 O b'word:nitrate'
430 +-0.424570 O b'-1:lemma:co2'
431 +-0.424570 O b'-1:word:CO2'
432 +-0.426175 Supp b'-1:postag:-LRB-'
433 +-0.430390 O b'-1:lemma:dissolve'
434 +-0.430390 O b'+1:lemma:methanol'
435 +-0.430390 O b'-1:word:dissolved'
436 +-0.430390 O b'+1:word:methanol'
437 +-0.431326 O b'-1:lemma:chip-exo'
438 +-0.434536 Med b'-1:postag:NN'
439 +-0.437879 Supp b'+1:lemma:nacl'
440 +-0.437879 Supp b'+1:word:NaCl'
441 +-0.438271 Gtype b'+1:lemma:-lrb-'
442 +-0.438271 Gtype b'+1:word:-LRB-'
443 +-0.517777 O b'lemma:0.3'
444 +-0.517777 O b'word:0.3'
445 +-0.554787 O b'+1:lemma:+'
446 +-0.554787 O b'+1:word:+'
447 +-0.556538 Med b'+1:postag:IN'
448 +-0.560443 Med b'-1:postag:IN'
449 +-0.564073 Phase b'isUpper'
450 +-0.567749 O b'lemma:mid-log'
451 +-0.567749 O b'word:mid-log'
452 +-0.569865 O b'+1:lemma:mm'
453 +-0.569865 O b'+1:word:mM'
454 +-0.574929 O b'+1:lemma:at'
455 +-0.574929 O b'+1:word:at'
456 +-0.576720 O b'lemma:methanol'
457 +-0.576720 O b'word:methanol'
458 +-0.582128 O b'-1:lemma:\xe2\x88\x86'
459 +-0.582128 O b'-1:word:\xe2\x88\x86'
460 +-0.586115 O b'lemma:anaerobically'
461 +-0.586115 O b'word:anaerobically'
462 +-0.586317 pH b'isLower'
463 +-0.602158 O b'lemma:30'
464 +-0.602158 O b'word:30'
465 +-0.604997 O b'+1:lemma:g/l'
466 +-0.604997 O b'+1:word:g/L'
467 +-0.607721 O b'+1:lemma:1m'
468 +-0.607721 O b'+1:word:1M'
469 +-0.633619 O b'-1:postag:IN'
470 +-0.635683 O b'-1:lemma:30'
471 +-0.635683 O b'-1:word:30'
472 +-0.637769 O b'-1:lemma:ml'
473 +-0.637769 O b'-1:word:ml'
474 +-0.669793 Air b'postag:NN'
475 +-0.682250 O b'+1:word:ChIP-exo'
476 +-0.688742 O b'lemma:of'
477 +-0.688742 O b'word:of'
478 +-0.712089 Air b'+1:postag:JJ'
479 +-0.712952 O b'-1:lemma:1'
480 +-0.712952 O b'-1:word:1'
481 +-0.726205 O b'lemma:2h'
482 +-0.726205 O b'-1:lemma:additional'
483 +-0.726205 O b'word:2h'
484 +-0.726205 O b'-1:word:additional'
485 +-0.727554 O b'-1:postag::'
486 +-0.740726 O b'-1:lemma:nsrr'
487 +-0.740726 O b'-1:word:NsrR'
488 +-0.747068 Temp b'postag:NN'
489 +-0.802318 O b'lemma:nitrogen'
490 +-0.802318 O b'word:nitrogen'
491 +-0.820825 Supp b'+1:postag:VBN'
492 +-0.826345 O b'lemma:media'
493 +-0.826345 O b'word:media'
494 +-0.827286 O b'-1:lemma:2'
495 +-0.827286 O b'-1:word:2'
496 +-0.831918 O b'-1:lemma:IP'
497 +-0.831918 O b'-1:word:IP'
498 +-0.838607 O b'-1:lemma:fresh'
499 +-0.838607 O b'-1:word:fresh'
500 +-0.854504 O b'lemma:wt'
501 +-0.854588 Supp b'-1:postag:NNP'
502 +-0.874101 O b'postag:RB'
503 +-0.937975 Agit b'isUpper'
504 +-0.956352 O b'+1:lemma:in'
505 +-0.956352 O b'+1:word:in'
506 +-0.979766 O b'+1:lemma:1'
507 +-0.979766 O b'+1:word:1'
508 +-0.982287 Gtype b'isLower'
509 +-0.999251 Technique b'isNumber'
510 +-1.074766 O b'postag:VBP'
511 +-1.110753 Gtype b'isUpper'
512 +-1.116498 O b'+1:postag:VBG'
513 +-1.181571 Supp b'+1:postag:-LRB-'
514 +-1.192346 O b'-1:lemma:sample'
515 +-1.216569 Supp b'+1:lemma:-lrb-'
516 +-1.216569 Supp b'+1:word:-LRB-'
517 +-1.249356 Technique b'isLower'
518 +-1.267812 O b'+1:lemma:2'
519 +-1.267812 O b'+1:word:2'
520 +-1.275807 O b'lemma:rifampicin'
521 +-1.275807 O b'word:rifampicin'
522 +-1.320279 Gtype b'isNumber'
523 +-1.328236 Gversion b'isLower'
524 +-1.369132 O b'-1:lemma:vol'
525 +-1.369132 O b'-1:word:vol'
526 +-1.376089 Supp b'+1:lemma:,'
527 +-1.376089 Supp b'+1:postag:,'
528 +-1.376089 Supp b'+1:word:,'
529 +-1.376957 Phase b'postag:JJ'
530 +-1.401753 OD b'+1:postag:NN'
531 +-1.485274 Anti b'postag:NNP'
532 +-1.787019 Supp b'postag:JJ'
533 +-1.891720 Temp b'+1:postag:IN'
534 +-2.179250 O b'-1:lemma:_'
535 +-2.179250 O b'-1:word:_'
536 +-2.181717 Phase b'-1:postag:JJ'
537 +-2.190768 O b'-1:postag:VBG'
538 +-2.226803 O b'-1:lemma::'
539 +-2.226803 O b'-1:word::'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.04937325798482469, 'c2': 0.021347060592283952}
5 +best CV score:0.8727912197138052
6 +model size: 0.16M
7 +
8 +Flat F1: 0.7834470810208732
9 + precision recall f1-score support
10 +
11 + OD 0.857 0.818 0.837 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 1.000 0.913 0.955 23
14 + Med 1.000 0.943 0.971 53
15 + Temp 1.000 0.621 0.766 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.882 1.000 0.938 15
19 + Air 0.556 0.362 0.439 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.885 0.812 0.847 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.735 0.806 0.769 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.818 0.763 0.783 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 5.697885
32 +Med -> Med 5.203676
33 +Supp -> Supp 5.023623
34 +Anti -> Anti 4.919423
35 +Agit -> Agit 4.878209
36 +OD -> OD 4.490452
37 +Gtype -> Gtype 4.342655
38 +Phase -> Phase 4.265502
39 +Gversion -> Gversion 4.228866
40 +O -> O 3.927755
41 +Air -> Air 3.904248
42 +Technique -> Technique 3.337823
43 +pH -> pH 3.047130
44 +Substrain -> Gtype 0.928380
45 +O -> Technique 0.767492
46 +O -> Supp 0.345238
47 +O -> Gtype 0.335711
48 +Technique -> Air 0.302367
49 +Air -> O 0.238748
50 +Gtype -> Supp 0.106918
51 +Temp -> O 0.099174
52 +O -> Anti 0.028918
53 +O -> Temp 0.024359
54 +Gtype -> Air 0.000872
55 +O -> Strain -0.006968
56 +OD -> Med -0.016869
57 +Vess -> O -0.025460
58 +Air -> Agit -0.030216
59 +Technique -> Supp -0.054732
60 +Gtype -> Temp -0.066953
61 +Air -> Technique -0.074730
62 +Phase -> Gtype -0.080334
63 +O -> Phase -0.082037
64 +Anti -> Supp -0.094817
65 +O -> Agit -0.122548
66 +Anti -> Gtype -0.136175
67 +Agit -> Air -0.139559
68 +Phase -> Technique -0.151160
69 +Gtype -> Gversion -0.170913
70 +Supp -> Phase -0.196758
71 +Supp -> O -0.208933
72 +Agit -> O -0.223185
73 +Supp -> Technique -0.223482
74 +Phase -> OD -0.285779
75 +Supp -> Gversion -0.304736
76 +Technique -> OD -0.319741
77 +Supp -> Anti -0.336780
78 +Med -> O -0.359992
79 +Temp -> Med -0.365995
80 +Supp -> Air -0.395049
81 +
82 +
83 +Top unlikely transitions:
84 +Vess -> O -0.025460
85 +Air -> Agit -0.030216
86 +Technique -> Supp -0.054732
87 +Gtype -> Temp -0.066953
88 +Air -> Technique -0.074730
89 +Phase -> Gtype -0.080334
90 +O -> Phase -0.082037
91 +Anti -> Supp -0.094817
92 +O -> Agit -0.122548
93 +Anti -> Gtype -0.136175
94 +Agit -> Air -0.139559
95 +Phase -> Technique -0.151160
96 +Gtype -> Gversion -0.170913
97 +Supp -> Phase -0.196758
98 +Supp -> O -0.208933
99 +Agit -> O -0.223185
100 +Supp -> Technique -0.223482
101 +Phase -> OD -0.285779
102 +Supp -> Gversion -0.304736
103 +Technique -> OD -0.319741
104 +Supp -> Anti -0.336780
105 +Med -> O -0.359992
106 +Temp -> Med -0.365995
107 +Supp -> Air -0.395049
108 +Anti -> Temp -0.433807
109 +Air -> Med -0.443745
110 +Technique -> Gtype -0.461980
111 +Anti -> O -0.477920
112 +Phase -> Med -0.496773
113 +Phase -> Supp -0.504111
114 +Supp -> Gtype -0.542703
115 +Air -> Supp -0.549017
116 +OD -> Air -0.590463
117 +Gtype -> Technique -0.617626
118 +OD -> O -0.633869
119 +Gversion -> O -0.665626
120 +Technique -> pH -0.667545
121 +Phase -> O -0.674680
122 +Air -> Phase -0.701860
123 +Air -> Temp -0.825208
124 +Gtype -> Anti -0.861670
125 +O -> Med -0.887587
126 +O -> OD -1.001120
127 +Technique -> O -1.008992
128 +Gtype -> Med -1.115268
129 +Supp -> Med -1.155087
130 +Gtype -> O -1.446717
131 +O -> Air -1.553604
132 +Substrain -> O -1.560176
133 +Med -> Supp -1.895924
134 +
135 +
136 +Top positive:
137 +4.079246 Gtype b'lemma[:1]:\xce\xb4'
138 +3.875929 Technique b'lemma[:2]:Ch'
139 +3.769086 O b'lemma[:2]:re'
140 +3.421852 Air b'word:Aerobic'
141 +3.297057 O b'word:Cra'
142 +3.280691 Air b'lemma:anaerobic'
143 +3.279696 O b'lemma:_'
144 +3.279696 O b'lemma[:1]:_'
145 +3.279696 O b'word:_'
146 +3.054016 O b'-1:word:tag'
147 +2.950170 O b'-1:lemma:tag'
148 +2.923311 Supp b'-1:word:Cra'
149 +2.917103 O b'lemma:-'
150 +2.917103 O b'word:-'
151 +2.673438 O b'lemma:2'
152 +2.673438 O b'word:2'
153 +2.601916 Anti b'lemma[:2]:an'
154 +2.548804 Air b'word:Anaerobic'
155 +2.540901 O b'lemma:with'
156 +2.540901 O b'word:with'
157 +2.527977 Supp b'+1:lemma:\xc2\xb5m'
158 +2.527977 Supp b'+1:word:\xc2\xb5M'
159 +2.513465 Gtype b'lemma[:2]:pk'
160 +2.467865 Air b'lemma[:2]:ae'
161 +2.465142 Supp b'-1:lemma:vol'
162 +2.465142 Supp b'-1:word:vol'
163 +2.430821 O b'word:A'
164 +2.414676 O b'+1:lemma:pq'
165 +2.414676 O b'+1:word:PQ'
166 +2.381594 Supp b'lemma:arginine'
167 +2.365113 Strain b'+1:lemma:substr'
168 +2.365113 Strain b'+1:word:substr'
169 +2.358430 O b'lemma:3'
170 +2.358430 O b'word:3'
171 +2.281060 O b'+1:postag:RB'
172 +2.274077 Gtype b'lemma:wt'
173 +2.274077 Gtype b'lemma[:2]:wt'
174 +2.271330 Gtype b'lemma[:1]:w'
175 +2.262507 Gtype b'lemma[:2]:de'
176 +2.230752 O b'-1:word:Aerobic'
177 +2.223212 Gtype b'lemma[:2]:ar'
178 +2.204027 Gtype b'word:WT'
179 +2.203708 Supp b'lemma:pq'
180 +2.203708 Supp b'lemma[:2]:pq'
181 +2.203708 Supp b'word:PQ'
182 +2.195032 Technique b'lemma[:2]:rn'
183 +2.177901 Air b'lemma[:2]:an'
184 +2.152688 O b'lemma[:2]:ge'
185 +2.152187 O b'lemma:1'
186 +2.152187 O b'word:1'
187 +2.147937 Phase b'lemma:stationary'
188 +2.147937 Phase b'word:stationary'
189 +2.139876 Phase b'lemma:mid-log'
190 +2.139876 Phase b'word:mid-log'
191 +2.082317 O b'lemma:Custom'
192 +2.082317 O b'lemma[:2]:Cu'
193 +2.082317 O b'word:Custom'
194 +2.081417 Technique b'lemma[:1]:C'
195 +2.059825 O b'lemma:b'
196 +2.059825 O b'word:B'
197 +2.051640 Technique b'word:ChIPSeq'
198 +2.044050 Supp b'lemma:Iron'
199 +2.044050 Supp b'lemma[:2]:Ir'
200 +2.044050 Supp b'word:Iron'
201 +2.044050 Supp b'+1:word:Deficient'
202 +2.034480 O b'-1:lemma:ChIP-exo'
203 +2.021616 Technique b'lemma[:2]:ch'
204 +2.004928 Substrain b'lemma[:2]:mg'
205 +1.992585 Gtype b'lemma[:1]:f'
206 +1.988583 Gtype b'lemma[:2]:cr'
207 +1.983400 O b'+1:postag:NNP'
208 +1.976657 O b'lemma[:1]:h'
209 +1.975191 O b'-1:lemma:lb'
210 +1.975191 O b'-1:word:LB'
211 +1.973175 Supp b'lemma[:1]:I'
212 +1.969412 O b'lemma:.'
213 +1.969412 O b'postag:.'
214 +1.969412 O b'postag[:1]:.'
215 +1.969412 O b'word:.'
216 +1.954569 Phase b'lemma[:2]:ex'
217 +1.946311 O b'lemma:delta'
218 +1.946311 O b'word:delta'
219 +1.942336 Supp b'postag:VBP'
220 +1.917047 Technique b'word:ChIP-Seq'
221 +1.916217 Supp b'-1:lemma:Cra'
222 +1.899702 Supp b'lemma:rifampicin'
223 +1.899702 Supp b'word:rifampicin'
224 +1.898554 Supp b'-1:lemma:with'
225 +1.898554 Supp b'-1:word:with'
226 +1.892222 Air b'lemma[:1]:a'
227 +1.892185 Supp b'lemma[:2]:ri'
228 +1.887353 Technique b'lemma:chipseq'
229 +1.852638 O b'-1:lemma:0.3'
230 +1.852638 O b'-1:word:0.3'
231 +1.839355 pH b'lemma[:2]:ph'
232 +1.838181 Gtype b'hGreek'
233 +1.837034 O b'-1:lemma:glucose'
234 +1.837034 O b'-1:word:glucose'
235 +1.821428 Gtype b'lemma:arca8myc'
236 +1.821428 Gtype b'word:ArcA8myc'
237 +1.821045 Med b'lemma:MOPS'
238 +1.821045 Med b'lemma[:1]:M'
239 +1.821045 Med b'lemma[:2]:MO'
240 +1.821045 Med b'word:MOPS'
241 +1.813601 Technique b'-1:lemma:chip-exo'
242 +1.811851 Gversion b'word:ChIP-Seq'
243 +1.810109 O b'lemma:ompr'
244 +1.810109 O b'word:OmpR'
245 +1.807989 Supp b'-1:lemma:+'
246 +1.807989 Supp b'-1:word:+'
247 +1.805457 OD b'lemma[:1]:o'
248 +1.789186 Gtype b'lemma[:1]:t'
249 +1.787157 Supp b'lemma[:2]:gl'
250 +1.784556 Gtype b'symb'
251 +1.769736 Gversion b'lemma:chip-seq'
252 +1.769144 O b'lemma[:2]:om'
253 +1.756478 Temp b'+1:lemma:in'
254 +1.756478 Temp b'+1:word:in'
255 +1.750825 O b'lemma:rpob'
256 +1.750825 O b'word:RpoB'
257 +1.744454 Gversion b'-1:lemma:nc'
258 +1.744454 Gversion b'-1:word:NC'
259 +1.743983 Technique b'lemma:ChIP-exo'
260 +1.738988 Gtype b'-1:lemma:\xe2\x88\x86'
261 +1.738988 Gtype b'-1:word:\xe2\x88\x86'
262 +1.737321 Supp b'lemma[:2]:30'
263 +1.719994 Gversion b'lemma[:2]:00'
264 +1.691622 Med b'+1:lemma:0.4'
265 +1.691622 Med b'+1:word:0.4'
266 +1.690183 Supp b'lemma:acetate'
267 +1.690183 Supp b'word:acetate'
268 +1.680098 Gtype b'lemma[:1]:W'
269 +1.669697 Phase b'lemma[:1]:e'
270 +1.654569 O b'-1:lemma:into'
271 +1.654569 O b'-1:word:into'
272 +1.646155 Agit b'+1:lemma:rpm'
273 +1.646155 Agit b'+1:word:rpm'
274 +1.639372 Supp b'lemma[:2]:ni'
275 +1.637838 Supp b'-1:postag:CC'
276 +1.633069 Med b'-1:lemma:ml'
277 +1.633069 Med b'-1:word:ml'
278 +1.626970 Gtype b'-1:postag:VBG'
279 +1.625020 O b'+1:word:ChIP-Seq'
280 +1.592935 Air b'lemma[:1]:A'
281 +1.581837 Supp b'lemma[:2]:ac'
282 +1.581166 Gversion b'lemma:asm584v2'
283 +1.581166 Gversion b'word:ASM584v2'
284 +1.576758 Gversion b'lemma[:2]:as'
285 +1.566133 Air b'lemma:Aerobic'
286 +1.566133 Air b'lemma[:2]:Ae'
287 +1.556942 Air b'+1:postag:IN'
288 +1.540738 Air b'postag:RB'
289 +1.540738 Air b'postag[:1]:R'
290 +1.540738 Air b'postag[:2]:RB'
291 +1.537191 O b'-1:lemma:phase'
292 +1.537191 O b'-1:word:phase'
293 +1.528090 Air b'-1:lemma:-'
294 +1.528090 Air b'-1:word:-'
295 +1.527198 Gtype b'lemma:type'
296 +1.527198 Gtype b'lemma[:2]:ty'
297 +1.527198 Gtype b'word:type'
298 +1.520984 O b'lemma[:1]:C'
299 +1.511967 Air b'-1:postag::'
300 +1.501971 O b'postag::'
301 +1.501971 O b'postag[:1]::'
302 +1.500818 Gtype b'lemma[:2]:wi'
303 +1.494237 O b'+1:word:were'
304 +1.488912 O b'postag:CC'
305 +1.488912 O b'postag[:2]:CC'
306 +1.488215 Agit b'lemma:rpm'
307 +1.488215 Agit b'word:rpm'
308 +1.486647 Supp b'+1:lemma:1'
309 +1.486647 Supp b'+1:word:1'
310 +1.483688 Temp b'-1:lemma:\xcf\x8332'
311 +1.483688 Temp b'-1:word:\xcf\x8332'
312 +1.483025 Air b'word:anaerobic'
313 +1.481168 O b'lemma:a'
314 +1.480233 Gversion b'lemma:nc'
315 +1.480233 Gversion b'lemma[:2]:nc'
316 +1.480233 Gversion b'word:NC'
317 +1.465788 O b'-1:lemma:anaerobic'
318 +1.464120 O b'+1:lemma:od600'
319 +1.464120 O b'+1:word:OD600'
320 +1.459126 O b'+1:postag:VBN'
321 +1.449825 Phase b'lemma:exponential'
322 +1.449825 Phase b'word:exponential'
323 +1.443892 O b'-1:lemma:0.3-0.35'
324 +1.443892 O b'-1:word:0.3-0.35'
325 +1.429105 Technique b'symb'
326 +1.428375 O b'lemma[:2]:ga'
327 +1.423820 Technique b'word:ChIP-exo'
328 +1.422037 O b'lemma:chip'
329 +1.420614 O b'isNumber'
330 +1.417035 pH b'+1:postag:CD'
331 +1.414508 Phase b'lemma[:2]:st'
332 +1.410028 O b'isLower'
333 +1.399604 Med b'+1:postag:CD'
334 +1.398736 Med b'+1:lemma:2.0'
335 +1.398736 Med b'+1:word:2.0'
336 +1.374106 O b'-1:lemma:wt'
337 +
338 +
339 +Top negative:
340 +-0.314411 Supp b'postag[:1]:N'
341 +-0.314411 Supp b'postag[:2]:NN'
342 +-0.316666 OD b'+1:lemma:0.4'
343 +-0.316666 OD b'+1:word:0.4'
344 +-0.319400 O b'+1:word:for'
345 +-0.319616 Substrain b'isLower'
346 +-0.329221 O b'+1:lemma:supplement'
347 +-0.329221 O b'+1:word:supplemented'
348 +-0.334710 Gtype b'+1:lemma:-rrb-'
349 +-0.334710 Gtype b'+1:word:-RRB-'
350 +-0.335324 Supp b'+1:lemma:nacl'
351 +-0.335324 Supp b'+1:word:NaCl'
352 +-0.338195 Phase b'lemma[:2]:pa'
353 +-0.343360 Med b'+1:postag:NNS'
354 +-0.343534 O b'+1:postag:IN'
355 +-0.344316 Temp b'postag:NN'
356 +-0.345297 O b'lemma:wt'
357 +-0.345297 O b'lemma[:2]:wt'
358 +-0.355887 Air b'isLower'
359 +-0.356867 O b'-1:lemma:from'
360 +-0.358831 O b'lemma:c'
361 +-0.359856 O b'lemma:37'
362 +-0.359856 O b'lemma[:2]:37'
363 +-0.359856 O b'word:37'
364 +-0.360624 O b'-1:lemma:final'
365 +-0.360624 O b'-1:word:final'
366 +-0.361530 Med b'-1:postag:NN'
367 +-0.362367 Technique b'-1:postag::'
368 +-0.362612 O b'lemma:of'
369 +-0.362612 O b'lemma[:2]:of'
370 +-0.362612 O b'word:of'
371 +-0.368278 O b'-1:word:from'
372 +-0.374267 O b'-1:lemma:30'
373 +-0.374267 O b'-1:word:30'
374 +-0.376045 O b'lemma:glucose'
375 +-0.376045 O b'word:glucose'
376 +-0.379443 Gtype b'lemma[:2]:rp'
377 +-0.382310 Gtype b'lemma[:1]:s'
378 +-0.383855 Supp b'-1:lemma:-lrb-'
379 +-0.383855 Supp b'-1:word:-LRB-'
380 +-0.385029 O b'-1:lemma:delta'
381 +-0.385029 O b'-1:word:delta'
382 +-0.390702 O b'-1:lemma:dissolve'
383 +-0.390702 O b'+1:lemma:methanol'
384 +-0.390702 O b'-1:word:dissolved'
385 +-0.390702 O b'+1:word:methanol'
386 +-0.395103 O b'-1:lemma:nsrr'
387 +-0.395103 O b'-1:word:NsrR'
388 +-0.399202 Supp b'lemma[:1]:a'
389 +-0.403898 O b'-1:lemma:1'
390 +-0.403898 O b'-1:word:1'
391 +-0.404014 Temp b'hGreek'
392 +-0.406161 O b'-1:lemma:\xe2\x88\x86'
393 +-0.406161 O b'-1:word:\xe2\x88\x86'
394 +-0.417535 O b'-1:postag:IN'
395 +-0.418404 Supp b'-1:postag:-LRB-'
396 +-0.428053 O b'-1:lemma:ph'
397 +-0.428053 O b'-1:word:pH'
398 +-0.432185 Supp b'postag:JJ'
399 +-0.432252 O b'lemma[:1]:L'
400 +-0.435862 Air b'-1:lemma:or'
401 +-0.435862 Air b'-1:word:or'
402 +-0.437167 Supp b'postag[:1]:J'
403 +-0.437167 Supp b'postag[:2]:JJ'
404 +-0.439673 Vess b'hUpper'
405 +-0.439673 Vess b'hLower'
406 +-0.443285 O b'lemma[:2]:gl'
407 +-0.444141 Temp b'isLower'
408 +-0.449813 Supp b'lemma[:2]:an'
409 +-0.455745 O b'-1:lemma:37'
410 +-0.455745 O b'-1:word:37'
411 +-0.465238 O b'lemma[:2]:ni'
412 +-0.469627 Supp b'lemma[:1]:s'
413 +-0.472157 Air b'+1:postag:JJ'
414 +-0.477980 O b'lemma:2h'
415 +-0.477980 O b'-1:lemma:additional'
416 +-0.477980 O b'lemma[:2]:2h'
417 +-0.477980 O b'word:2h'
418 +-0.477980 O b'-1:word:additional'
419 +-0.493105 Agit b'symb'
420 +-0.493489 O b'lemma:methanol'
421 +-0.493489 O b'word:methanol'
422 +-0.495166 O b'lemma:mid-log'
423 +-0.495166 O b'word:mid-log'
424 +-0.497445 Air b'-1:postag:JJ'
425 +-0.499724 O b'+1:lemma:g/l'
426 +-0.499724 O b'+1:word:g/L'
427 +-0.501348 O b'+1:lemma:mm'
428 +-0.501348 O b'+1:word:mM'
429 +-0.507557 O b'lemma[:2]:me'
430 +-0.508454 Gtype b'postag[:1]:V'
431 +-0.508454 Gtype b'postag[:2]:VB'
432 +-0.508460 Gtype b'postag::'
433 +-0.508460 Gtype b'postag[:1]::'
434 +-0.529784 O b'+1:lemma:at'
435 +-0.529784 O b'+1:word:at'
436 +-0.537465 O b'lemma[:1]:0'
437 +-0.542594 Supp b'-1:lemma:.'
438 +-0.542594 Supp b'-1:postag:.'
439 +-0.542594 Supp b'-1:word:.'
440 +-0.546062 O b'lemma[:1]:4'
441 +-0.548259 O b'-1:lemma:rpob'
442 +-0.548259 O b'-1:word:RpoB'
443 +-0.552281 O b'-1:lemma:IP'
444 +-0.552281 O b'-1:word:IP'
445 +-0.553960 Phase b'postag:JJ'
446 +-0.562367 O b'lemma:0.3'
447 +-0.562367 O b'word:0.3'
448 +-0.573524 Med b'-1:postag:CD'
449 +-0.574613 Technique b'postag:NN'
450 +-0.578197 O b'-1:lemma:chip-exo'
451 +-0.579399 O b'+1:lemma:1m'
452 +-0.579399 O b'+1:word:1M'
453 +-0.584655 O b'word:ChIP-exo'
454 +-0.585801 O b'lemma:soxs-8myc'
455 +-0.585801 O b'word:soxS-8myc'
456 +-0.599558 O b'lemma[:2]:ar'
457 +-0.600604 O b'word:cells'
458 +-0.606899 O b'+1:lemma:until'
459 +-0.606899 O b'+1:word:until'
460 +-0.607019 Air b'symb'
461 +-0.640639 pH b'isNumber'
462 +-0.656724 O b'+1:word:ChIP-exo'
463 +-0.664247 O b'+1:lemma:2.0'
464 +-0.664247 O b'+1:word:2.0'
465 +-0.664520 Med b'+1:postag:IN'
466 +-0.664576 O b'-1:lemma:co2'
467 +-0.664576 O b'-1:word:CO2'
468 +-0.665370 O b'+1:lemma:rep2'
469 +-0.665370 O b'+1:word:rep2'
470 +-0.671212 O b'lemma[:2]:mg'
471 +-0.724471 Agit b'hUpper'
472 +-0.724471 Agit b'hLower'
473 +-0.739501 O b'lemma[:1]:I'
474 +-0.755281 O b'lemma[:2]:30'
475 +-0.762653 pH b'isLower'
476 +-0.780673 Air b'postag:NN'
477 +-0.793295 Supp b'symb'
478 +-0.826875 O b'-1:postag::'
479 +-0.853583 O b'lemma[:2]:ri'
480 +-0.880894 O b'-1:lemma:until'
481 +-0.880894 O b'-1:word:until'
482 +-0.884934 Supp b'+1:postag:VBN'
483 +-0.893108 O b'lemma[:1]:k'
484 +-0.902922 O b'lemma[:2]:ae'
485 +-0.926126 O b'lemma:rifampicin'
486 +-0.926126 O b'word:rifampicin'
487 +-0.929148 Med b'-1:postag:IN'
488 +-0.936796 Anti b'postag:NNP'
489 +-0.952559 Supp b'-1:postag:NNP'
490 +-0.962974 Med b'symb'
491 +-0.981592 Technique b'isNumber'
492 +-1.019994 O b'-1:lemma:2'
493 +-1.019994 O b'-1:word:2'
494 +-1.023240 O b'+1:lemma:1'
495 +-1.023240 O b'+1:word:1'
496 +-1.023642 O b'-1:lemma:ml'
497 +-1.023642 O b'-1:word:ml'
498 +-1.035572 Phase b'hUpper'
499 +-1.035572 Phase b'hLower'
500 +-1.036376 O b'lemma[:1]:n'
501 +-1.044060 Gtype b'lemma[:1]:c'
502 +-1.051632 Phase b'postag[:1]:J'
503 +-1.051632 Phase b'postag[:2]:JJ'
504 +-1.074102 Gtype b'lemma[:1]:a'
505 +-1.077239 O b'+1:lemma:+'
506 +-1.077239 O b'+1:word:+'
507 +-1.084820 O b'+1:lemma:in'
508 +-1.084820 O b'+1:word:in'
509 +-1.107631 Gtype b'isNumber'
510 +-1.117617 Supp b'+1:postag:-LRB-'
511 +-1.129026 Technique b'isLower'
512 +-1.149735 Supp b'hGreek'
513 +-1.156144 Supp b'+1:lemma:-lrb-'
514 +-1.156144 Supp b'+1:word:-LRB-'
515 +-1.229045 Gtype b'lemma[:1]:r'
516 +-1.231933 O b'+1:postag:VBG'
517 +-1.262559 Gversion b'isLower'
518 +-1.277633 O b'-1:lemma:fresh'
519 +-1.277633 O b'-1:word:fresh'
520 +-1.381763 OD b'+1:postag:NN'
521 +-1.472372 O b'+1:lemma:2'
522 +-1.472372 O b'+1:word:2'
523 +-1.571807 Gtype b'isUpper'
524 +-1.677845 Supp b'lemma[:1]:c'
525 +-1.687951 O b'-1:lemma:vol'
526 +-1.687951 O b'-1:word:vol'
527 +-1.695736 Supp b'+1:lemma:,'
528 +-1.695736 Supp b'+1:postag:,'
529 +-1.695736 Supp b'+1:word:,'
530 +-1.723752 O b'-1:lemma:sample'
531 +-1.785262 Phase b'-1:postag:JJ'
532 +-1.895238 O b'postag:VBP'
533 +-1.908071 O b'-1:postag:VBG'
534 +-2.171310 O b'-1:lemma:_'
535 +-2.171310 O b'-1:word:_'
536 +-2.491839 O b'lemma[:2]:fl'
537 +-2.929187 O b'-1:lemma::'
538 +-2.929187 O b'-1:word::'
539 +-2.957042 Temp b'+1:postag:IN'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.14521637026904505, 'c2': 0.02160263268998293}
5 +best CV score:0.8705560896194018
6 +model size: 0.08M
7 +
8 +Flat F1: 0.7637642434421422
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 0.952 0.870 0.909 23
14 + Med 1.000 0.925 0.961 53
15 + Temp 0.923 0.828 0.873 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.875 0.933 0.903 15
19 + Air 0.545 0.348 0.425 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.857 0.847 0.852 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.609 0.791 0.688 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.774 0.769 0.764 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 5.770024
32 +Agit -> Agit 5.591940
33 +Med -> Med 5.238745
34 +OD -> OD 5.208128
35 +Supp -> Supp 5.009566
36 +Anti -> Anti 4.435661
37 +Phase -> Phase 4.266553
38 +Air -> Air 4.183832
39 +Gversion -> Gversion 4.181832
40 +Gtype -> Gtype 4.159817
41 +O -> O 4.029919
42 +Technique -> Technique 3.397241
43 +pH -> pH 2.835139
44 +Substrain -> Gtype 1.746378
45 +Gtype -> Supp 1.354961
46 +Air -> O 1.123193
47 +O -> Technique 0.956245
48 +O -> Supp 0.860304
49 +Technique -> Air 0.720188
50 +Gtype -> Air 0.705752
51 +Supp -> O 0.675087
52 +Med -> O 0.607714
53 +O -> Gtype 0.602507
54 +Temp -> O 0.586988
55 +Gtype -> pH 0.132889
56 +O -> Phase 0.103874
57 +O -> Anti 0.059111
58 +O -> Med 0.043976
59 +O -> Temp 0.037475
60 +Phase -> O 0.007325
61 +OD -> O 0.001147
62 +Temp -> Med -0.004820
63 +O -> OD -0.007911
64 +Air -> Agit -0.018394
65 +Technique -> O -0.037749
66 +Air -> Supp -0.052877
67 +Supp -> Gtype -0.118891
68 +Gtype -> Technique -0.146943
69 +Air -> Gtype -0.162470
70 +OD -> Air -0.407752
71 +Supp -> Med -0.519051
72 +Gtype -> Med -0.520149
73 +Gtype -> O -0.569447
74 +O -> Air -0.679735
75 +Phase -> OD -0.846881
76 +Substrain -> O -1.190575
77 +Technique -> Gtype -1.334054
78 +Med -> Supp -1.751616
79 +
80 +
81 +Top unlikely transitions:
82 +Temp -> Temp 5.770024
83 +Agit -> Agit 5.591940
84 +Med -> Med 5.238745
85 +OD -> OD 5.208128
86 +Supp -> Supp 5.009566
87 +Anti -> Anti 4.435661
88 +Phase -> Phase 4.266553
89 +Air -> Air 4.183832
90 +Gversion -> Gversion 4.181832
91 +Gtype -> Gtype 4.159817
92 +O -> O 4.029919
93 +Technique -> Technique 3.397241
94 +pH -> pH 2.835139
95 +Substrain -> Gtype 1.746378
96 +Gtype -> Supp 1.354961
97 +Air -> O 1.123193
98 +O -> Technique 0.956245
99 +O -> Supp 0.860304
100 +Technique -> Air 0.720188
101 +Gtype -> Air 0.705752
102 +Supp -> O 0.675087
103 +Med -> O 0.607714
104 +O -> Gtype 0.602507
105 +Temp -> O 0.586988
106 +Gtype -> pH 0.132889
107 +O -> Phase 0.103874
108 +O -> Anti 0.059111
109 +O -> Med 0.043976
110 +O -> Temp 0.037475
111 +Phase -> O 0.007325
112 +OD -> O 0.001147
113 +Temp -> Med -0.004820
114 +O -> OD -0.007911
115 +Air -> Agit -0.018394
116 +Technique -> O -0.037749
117 +Air -> Supp -0.052877
118 +Supp -> Gtype -0.118891
119 +Gtype -> Technique -0.146943
120 +Air -> Gtype -0.162470
121 +OD -> Air -0.407752
122 +Supp -> Med -0.519051
123 +Gtype -> Med -0.520149
124 +Gtype -> O -0.569447
125 +O -> Air -0.679735
126 +Phase -> OD -0.846881
127 +Substrain -> O -1.190575
128 +Technique -> Gtype -1.334054
129 +Med -> Supp -1.751616
130 +
131 +
132 +Top positive:
133 +6.295982 O b'lemma:2'
134 +6.088107 O b'lemma:1'
135 +5.499374 Phase b'lemma:mid-log'
136 +5.468207 Anti b'-2:lemma:antibody'
137 +5.389740 O b'-2:lemma:_'
138 +5.310900 O b'lemma:_'
139 +5.124562 Air b'lemma:anaerobic'
140 +5.038711 Gtype b'lemma:wt'
141 +4.975908 Supp b'lemma:pq'
142 +4.858798 O b'lemma:3'
143 +4.706721 Technique b'lemma:chipseq'
144 +4.694852 Air b'lemma:Aerobic'
145 +4.682998 Gtype b'lemma:type'
146 +4.617298 O b'postag:IN'
147 +4.544190 O b'-2:lemma:flagtag'
148 +4.540916 Technique b'lemma:ChIP-exo'
149 +4.378201 Gtype b'lemma:\xce\xb4cra'
150 +4.141678 Air b'lemma:aerobic'
151 +4.102798 Gtype b'lemma:\xe2\x88\x86'
152 +4.083658 Gtype b'+1:lemma:type'
153 +4.039276 Med b'lemma:MOPS'
154 +3.938693 Technique b'lemma:rna-seq'
155 +3.794686 O b'lemma:rpob'
156 +3.792227 O b'-1:lemma:ChIP-exo'
157 +3.784855 O b'lemma:-'
158 +3.774100 O b'lemma:b'
159 +3.764975 Supp b'lemma:acetate'
160 +3.691796 Gtype b'-2:lemma:genotype/variation'
161 +3.672526 Supp b'lemma:Iron'
162 +3.672526 Supp b'-2:lemma:Anaerobic'
163 +3.617838 Technique b'lemma:chip-seq'
164 +3.604759 Supp b'lemma:no3'
165 +3.568114 Supp b'lemma:nacl'
166 +3.558608 Phase b'-2:lemma:phase'
167 +3.526017 O b'lemma:.'
168 +3.526017 O b'postag:.'
169 +3.449878 Gtype b'-1:lemma:\xe2\x88\x86'
170 +3.446438 Supp b'lemma:glucose'
171 +3.403359 Med b'lemma:lb'
172 +3.365642 Gtype b'lemma:wild-type'
173 +3.350130 Air b'postag:RB'
174 +3.342253 O b'+2:lemma:\xc2\xb0c'
175 +3.335770 Supp b'lemma:nh4cl'
176 +3.326472 O b'lemma:Cra'
177 +3.318880 Gtype b'lemma:\xce\xb4fur'
178 +3.295001 O b'postag:CC'
179 +3.262632 O b'postag:VBN'
180 +3.206029 O b'lemma:a'
181 +3.195357 Gtype b'-2:lemma:genotype'
182 +3.193335 Technique b'+2:lemma:ph5'
183 +3.187683 O b'postag::'
184 +3.127353 Gtype b'lemma:flag-tag'
185 +3.127353 Gtype b'-1:lemma:c-terminal'
186 +3.121057 Supp b'lemma:nitrate'
187 +3.112065 Gtype b'lemma:\xce\xb4soxr'
188 +3.087827 Substrain b'lemma:mg1655'
189 +3.047270 Supp b'+2:lemma:iptg'
190 +3.025373 Supp b'-1:lemma:Cra'
191 +2.966835 Air b'-1:lemma:ChIP-Seq'
192 +2.963423 Supp b'-1:lemma:with'
193 +2.920292 Gtype b'lemma:dfnr'
194 +2.878192 O b'lemma:with'
195 +2.811966 O b'+1:lemma:arca-8myc'
196 +2.806521 O b'-2:lemma:medium'
197 +2.792197 Gtype b'-2:lemma:affyexp'
198 +2.777524 Gversion b'lemma:chip-seq'
199 +2.764298 Technique b'lemma:rnaseq'
200 +2.751592 O b'-1:lemma:tag'
201 +2.744091 Air b'-2:lemma:IP'
202 +2.741300 Supp b'+1:lemma:1'
203 +2.735516 O b'lemma:CEL'
204 +2.729762 O b'lemma:rep2'
205 +2.702452 Med b'lemma:m63'
206 +2.699796 O b'lemma:harbor'
207 +2.669950 Supp b'lemma:arginine'
208 +2.661545 O b'-1:lemma:0.3'
209 +2.602799 Gtype b'+1:lemma:with'
210 +2.599935 O b'-1:lemma:\xc2\xb0c'
211 +2.587031 Technique b'lemma:ChIP-Seq'
212 +2.568848 Gtype b'-2:lemma:delta'
213 +2.543114 Gtype b'lemma:nsrr'
214 +2.539452 Substrain b'-2:lemma:substr'
215 +2.534370 O b'postag:DT'
216 +2.513411 Gtype b'lemma:pk4854'
217 +2.512728 O b'lemma:chip'
218 +2.505650 O b'+1:postag:RB'
219 +2.493029 Temp b'-2:lemma:\xcf\x8332'
220 +2.454541 Anti b'+1:lemma:antibody'
221 +2.447213 Anti b'+2:lemma:antibody'
222 +2.436325 Gtype b'-1:lemma:rpob'
223 +2.430821 pH b'lemma:ph5'
224 +2.430821 pH b'+1:lemma:.5'
225 +2.411045 O b'postag:VBG'
226 +2.347885 Supp b'-2:lemma:media'
227 +2.345716 pH b'+1:postag:CD'
228 +2.342949 Technique b'-2:lemma:Fur'
229 +2.340323 O b'-1:lemma:glucose'
230 +2.326562 O b'-1:lemma:lb'
231 +2.318033 Gtype b'+1:lemma:ph5'
232 +2.318033 Gtype b'+2:lemma:.5'
233 +2.312502 Temp b'lemma:43'
234 +2.307908 Supp b'lemma:Leu'
235 +2.307908 Supp b'-2:lemma:Lrp'
236 +2.283291 O b'-1:lemma:media'
237 +2.278223 Gtype b'lemma:\xce\xb4oxyr'
238 +2.277778 Air b'-1:lemma:-'
239 +2.269087 Substrain b'+1:lemma:phtpg'
240 +2.262780 Supp b'-2:lemma:agent'
241 +2.259842 Temp b'-1:lemma:43'
242 +2.259677 O b'lemma:ompr'
243 +2.241905 Supp b'lemma:Adenine'
244 +2.238893 Gtype b'lemma:\xce\xb4ompr'
245 +2.238698 Supp b'+1:lemma:\xc2\xb5m'
246 +2.212658 Med b'+2:lemma:b2'
247 +2.198189 O b'-2:lemma:myc'
248 +2.163443 Phase b'lemma:exponential'
249 +2.163443 Phase b'lemma:stationary'
250 +2.159726 O b'+1:postag:NNP'
251 +2.156490 Phase b'-1:lemma:mid-log'
252 +2.151371 Gversion b'lemma:.2'
253 +2.151371 Gversion b'-1:lemma:u00096'
254 +2.140308 O b'-1:lemma:type'
255 +2.138372 O b'lemma:ml'
256 +2.128755 Med b'+2:postag:CC'
257 +2.123732 Technique b'-1:lemma:chip-exo'
258 +2.118518 Technique b'-1:lemma:IP'
259 +2.117337 Strain b'+1:lemma:substr'
260 +2.117337 Strain b'-2:lemma:str'
261 +2.099392 Agit b'lemma:rpm'
262 +2.096807 Supp b'+1:lemma:2'
263 +2.096794 Strain b'lemma:k-12'
264 +2.091407 O b'+2:lemma:cra'
265 +2.063564 O b'lemma:\xcf\x8332'
266 +2.050789 Supp b'lemma:rifampicin'
267 +2.049024 Gversion b'lemma:nc'
268 +2.033164 O b'+1:lemma:pq'
269 +2.013358 Temp b'-1:lemma:\xcf\x8332'
270 +1.966874 Temp b'+1:lemma:\xc2\xb0c'
271 +1.966521 O b'-1:lemma:anaerobic'
272 +1.957027 O b'lemma:culture'
273 +1.952247 O b'-2:lemma:min'
274 +1.929194 O b'+1:lemma:condition'
275 +1.923035 Supp b'-1:postag:CC'
276 +1.911344 Temp b'lemma:\xc2\xb0c'
277 +1.902035 Gversion b'+2:lemma:000913'
278 +1.892926 Technique b'-1:lemma:input'
279 +1.877608 Supp b'lemma:fructose'
280 +1.876135 O b'+1:lemma:250'
281 +1.868225 O b'lemma:Custom'
282 +1.867543 Air b'lemma:anaeroibc'
283 +1.865515 O b'+2:postag:JJ'
284 +1.861583 Supp b'-1:lemma:+'
285 +1.856696 O b'lemma:s'
286 +1.856578 Supp b'+1:lemma:_'
287 +1.851722 Gversion b'lemma:u00096'
288 +1.851722 Gversion b'+1:lemma:.2'
289 +1.838273 Phase b'lemma:phase'
290 +1.825249 Med b'lemma:broth'
291 +1.825249 Med b'-1:lemma:L'
292 +1.817848 O b'+1:lemma:coli'
293 +1.805545 Med b'+1:lemma:0.4'
294 +1.803317 O b'-1:lemma:into'
295 +1.801603 Agit b'+1:lemma:rpm'
296 +1.799565 Gtype b'-1:postag:VBG'
297 +1.796090 Supp b'+2:lemma:rifampicin'
298 +1.784603 O b'+1:lemma:chip-seq'
299 +1.777806 Med b'lemma:media'
300 +1.769076 O b'lemma:trpr'
301 +1.768678 Gtype b'lemma:ptac'
302 +1.760760 Gtype b'+1:lemma:flagtag'
303 +1.757550 Gversion b'-2:lemma:nc'
304 +1.745784 O b'lemma:soxs'
305 +1.745784 O b'lemma:soxr'
306 +1.739107 O b'-2:lemma:~'
307 +1.736949 Med b'+2:lemma:b1'
308 +1.736832 OD b'+1:lemma:of'
309 +1.735882 O b'postag:NNS'
310 +1.709175 O b'+2:lemma:70'
311 +1.708616 O b'postag:VBD'
312 +1.705680 O b'-1:lemma:aerobically'
313 +1.700488 O b'lemma:argr'
314 +1.696537 Temp b'-1:lemma:sample'
315 +1.694740 O b'+1:lemma:acetate'
316 +1.690209 O b'lemma:affyexp'
317 +1.689269 Med b'lemma:minimal'
318 +1.687607 Agit b'+2:lemma:at'
319 +1.683096 O b'lemma:purr'
320 +1.680453 Gversion b'-2:lemma:build'
321 +1.674723 O b'+2:lemma:fructose'
322 +1.673798 Vess b'lemma:flask'
323 +1.673798 Vess b'-1:lemma:warm'
324 +1.673798 Vess b'-2:lemma:pre'
325 +1.673798 Vess b'+2:lemma:43'
326 +1.671743 O b'-1:lemma:Aerobic'
327 +1.670303 O b'+2:lemma:polyclonal'
328 +1.644434 O b'+1:lemma:wt'
329 +1.641217 Gtype b'+2:lemma:glucose'
330 +1.620167 O b'lemma:genotype/variation'
331 +1.613899 Air b'lemma:aerobically'
332 +1.603327 Temp b'-1:lemma:37'
333 +
334 +
335 +Top negative:
336 +-0.098715 O b'-1:lemma:g/l'
337 +-0.098977 O b'+2:lemma:0.2'
338 +-0.099326 O b'lemma:m63'
339 +-0.099882 O b'-1:lemma:rifampicin'
340 +-0.106371 O b'-1:lemma:37'
341 +-0.107922 Med b'-1:postag:CD'
342 +-0.110064 O b'-2:lemma:-lrb-'
343 +-0.117142 O b'+1:lemma:of'
344 +-0.123918 Supp b'+2:lemma:.'
345 +-0.123918 Supp b'+2:postag:.'
346 +-0.131710 O b'+1:lemma:culture'
347 +-0.132661 O b'+1:postag:-LRB-'
348 +-0.136103 O b'+1:lemma:95'
349 +-0.137008 Technique b'-1:postag::'
350 +-0.138812 O b'-1:lemma:final'
351 +-0.138847 O b'lemma:7.6'
352 +-0.138847 O b'+1:lemma:;'
353 +-0.139546 Supp b'+1:lemma:-rrb-'
354 +-0.140317 Supp b'+2:lemma:dpd'
355 +-0.142690 O b'postag:RB'
356 +-0.143532 O b'lemma:;'
357 +-0.143532 O b'-1:lemma:7.6'
358 +-0.144217 O b'+2:lemma:reference'
359 +-0.145656 Temp b'-2:postag:NN'
360 +-0.146454 O b'lemma:anaerobic'
361 +-0.147525 Air b'-1:postag:JJ'
362 +-0.149095 O b'-1:lemma:0.2'
363 +-0.151176 O b'-1:lemma:contain'
364 +-0.151476 Air b'+2:postag:IN'
365 +-0.152942 O b'+1:lemma:fecl2'
366 +-0.154443 Med b'postag:CD'
367 +-0.158960 Supp b'-1:lemma:-lrb-'
368 +-0.169851 Med b'-1:postag:NN'
369 +-0.176135 OD b'postag:NN'
370 +-0.178701 Supp b'-1:postag:-LRB-'
371 +-0.180805 O b'lemma:methanol'
372 +-0.180805 O b'-2:lemma:dissolve'
373 +-0.182601 Supp b'+2:lemma:glucose'
374 +-0.185072 Supp b'postag:CC'
375 +-0.189120 O b'+2:lemma:-rrb-'
376 +-0.192885 O b'+1:lemma:dissolve'
377 +-0.199989 O b'-1:lemma:1m'
378 +-0.199989 O b'+2:lemma:7.6'
379 +-0.201669 O b'-2:lemma:nh4cl'
380 +-0.201904 O b'-2:lemma:the'
381 +-0.202079 O b'-1:lemma:of'
382 +-0.202589 Supp b'+1:lemma:acetate'
383 +-0.204371 Supp b'+1:lemma:rifampicin'
384 +-0.207950 Gtype b'-2:postag:CD'
385 +-0.215109 O b'-1:lemma:iptg'
386 +-0.217533 O b'lemma:nitrogen'
387 +-0.221912 O b'lemma:37'
388 +-0.229060 O b'-1:lemma:n2'
389 +-0.236563 Air b'-2:postag:CC'
390 +-0.241693 O b'-2:lemma:IP'
391 +-0.248054 O b'-2:lemma:aerobically'
392 +-0.248583 Med b'+1:postag:NN'
393 +-0.249693 Phase b'+1:postag:NN'
394 +-0.251280 O b'+2:lemma:tag'
395 +-0.260336 O b'+1:lemma:10'
396 +-0.266374 O b'-1:lemma:until'
397 +-0.269589 O b'lemma:ph'
398 +-0.275513 O b'-1:lemma:dfnr'
399 +-0.277525 Supp b'+1:postag:VBN'
400 +-0.277770 O b'-1:lemma:fresh'
401 +-0.287656 O b'+2:lemma:250'
402 +-0.295216 O b'lemma:nh4cl'
403 +-0.296463 O b'-2:lemma:mm'
404 +-0.296923 O b'lemma:\xe2\x88\x86'
405 +-0.301390 O b'-1:postag:IN'
406 +-0.303644 O b'+1:postag:IN'
407 +-0.305259 O b'lemma:fecl2'
408 +-0.310628 Gtype b'-1:postag:NN'
409 +-0.311296 O b'lemma:k-12'
410 +-0.316958 O b'+1:lemma:%'
411 +-0.317571 O b'+2:lemma:.'
412 +-0.317571 O b'+2:postag:.'
413 +-0.317988 Phase b'-2:postag:NN'
414 +-0.321822 O b'lemma:minimal'
415 +-0.322289 O b'-1:lemma:minimal'
416 +-0.322560 O b'-2:lemma:genome'
417 +-0.332944 O b'-2:postag::'
418 +-0.333295 O b'lemma:dissolve'
419 +-0.333295 O b'+2:lemma:methanol'
420 +-0.338620 O b'-2:lemma:anaerobically'
421 +-0.345174 O b'-1:postag::'
422 +-0.363808 Gtype b'-2:lemma:\xe2\x88\x86'
423 +-0.382684 O b'-2:lemma:rpob'
424 +-0.387944 Supp b'+1:postag:-RRB-'
425 +-0.394192 O b'-2:lemma:fresh'
426 +-0.395680 O b'+1:lemma:minimal'
427 +-0.396198 O b'-2:lemma:phase'
428 +-0.398092 O b'-2:lemma:dpd'
429 +-0.402236 Supp b'+2:lemma:-rrb-'
430 +-0.410787 O b'+1:lemma:1m'
431 +-0.410787 O b'-2:lemma:vol'
432 +-0.419351 O b'-1:lemma:cra'
433 +-0.439916 Med b'+1:postag:IN'
434 +-0.447763 Supp b'+2:postag:-RRB-'
435 +-0.451257 O b'-2:lemma:until'
436 +-0.454676 Supp b'-2:lemma:treat'
437 +-0.461514 Supp b'+1:postag:NNS'
438 +-0.473439 O b'+2:lemma:at'
439 +-0.489525 Supp b'-2:postag:NNS'
440 +-0.490480 O b'-2:lemma:a'
441 +-0.491484 Supp b'+1:lemma:,'
442 +-0.491484 Supp b'+1:postag:,'
443 +-0.497187 O b'lemma:aerobically'
444 +-0.503626 Supp b'-1:postag:NNP'
445 +-0.509023 Supp b'lemma:10'
446 +-0.516721 O b'+1:lemma:+'
447 +-0.521588 O b'lemma:nitrate'
448 +-0.522828 O b'-2:lemma::'
449 +-0.525160 O b'+1:lemma:g/l'
450 +-0.528490 O b'+1:lemma:supplement'
451 +-0.536875 Anti b'+2:postag:JJ'
452 +-0.541770 O b'-1:lemma:ml'
453 +-0.549347 O b'+1:lemma:mm'
454 +-0.554922 O b'-1:lemma:rpob'
455 +-0.555832 O b'-1:lemma:dissolve'
456 +-0.555832 O b'+1:lemma:methanol'
457 +-0.556682 O b'+1:postag:VBG'
458 +-0.560238 O b'-1:lemma:co2'
459 +-0.588187 O b'+2:lemma:a'
460 +-0.589107 Anti b'+1:lemma:anti-fur'
461 +-0.592147 Med b'-2:postag:VBN'
462 +-0.594320 O b'-2:lemma:2'
463 +-0.596090 O b'+2:lemma:add'
464 +-0.599346 O b'-2:lemma:supplement'
465 +-0.602638 pH b'postag:NN'
466 +-0.603217 O b'-2:lemma:glucose'
467 +-0.605916 O b'lemma:mid-log'
468 +-0.609002 O b'+2:lemma:10'
469 +-0.610140 O b'-1:lemma:mm'
470 +-0.614692 Supp b'+1:lemma:-lrb-'
471 +-0.622267 Med b'+2:postag:VBN'
472 +-0.631151 O b'-1:lemma:vol'
473 +-0.631151 O b'-2:lemma:1/100'
474 +-0.631151 O b'+2:lemma:1m'
475 +-0.635308 O b'-2:lemma:media'
476 +-0.643298 Supp b'+1:postag:-LRB-'
477 +-0.667302 O b'-1:lemma:grow'
478 +-0.675604 O b'lemma:anaerobically'
479 +-0.678285 O b'postag:VBP'
480 +-0.712921 O b'-2:postag:DT'
481 +-0.743302 O b'-2:postag:RB'
482 +-0.797815 O b'-1:lemma:2'
483 +-0.803294 Supp b'-2:postag:JJ'
484 +-0.804094 O b'lemma:wt'
485 +-0.827192 O b'+2:lemma:+'
486 +-0.827898 O b'+1:lemma:until'
487 +-0.831996 O b'+2:lemma:mid-log'
488 +-0.837044 O b'lemma:2h'
489 +-0.837044 O b'-1:lemma:additional'
490 +-0.853094 O b'lemma:of'
491 +-0.881861 O b'lemma:aerobic'
492 +-0.919135 O b'-1:lemma:30'
493 +-0.952995 Air b'postag:NN'
494 +-0.973697 O b'+2:postag:-RRB-'
495 +-0.983024 Air b'+1:postag:JJ'
496 +-1.013906 O b'+2:lemma:fnr'
497 +-1.021974 O b'-2:postag:SYM'
498 +-1.037571 O b'+2:lemma:then'
499 +-1.048639 Med b'-2:lemma:grow'
500 +-1.054249 O b'lemma:media'
501 +-1.064365 Temp b'postag:NN'
502 +-1.088097 O b'-1:lemma:nsrr'
503 +-1.141026 Phase b'-1:postag:JJ'
504 +-1.155260 O b'+2:lemma:b'
505 +-1.165858 O b'-2:lemma:0.3'
506 +-1.175657 O b'-1:lemma:ph'
507 +-1.186408 O b'lemma:\xce\xb4fur'
508 +-1.229030 O b'+1:lemma:in'
509 +-1.249314 O b'+1:lemma:at'
510 +-1.257039 O b'lemma:rifampicin'
511 +-1.261781 Gtype b'+2:lemma:cra'
512 +-1.267291 O b'+1:lemma:2.0'
513 +-1.293131 O b'-2:lemma:rifampicin'
514 +-1.297426 O b'-1:lemma:sample'
515 +-1.317920 Phase b'postag:JJ'
516 +-1.321832 O b'-1:lemma:1'
517 +-1.346398 OD b'+2:lemma:aerobically'
518 +-1.353123 Supp b'+2:lemma:2'
519 +-1.353839 OD b'+1:postag:NN'
520 +-1.379126 Anti b'+2:lemma:polyclonal'
521 +-1.433804 O b'+2:lemma:rifampicin'
522 +-1.437034 Supp b'+2:postag:CD'
523 +-1.439161 Supp b'+2:lemma:1'
524 +-1.489609 Supp b'+2:lemma:fructose'
525 +-1.512780 O b'lemma:30'
526 +-1.553364 Supp b'postag:JJ'
527 +-1.601724 O b'-1:postag:VBG'
528 +-1.789724 O b'-1:lemma:IP'
529 +-1.863826 Gtype b'postag:VBG'
530 +-2.000634 Anti b'postag:NNP'
531 +-2.029905 O b'lemma:0.3'
532 +-2.144948 O b'+1:lemma:1'
533 +-2.735063 O b'+1:lemma:2'
534 +-3.945319 O b'-1:lemma::'
535 +-4.420879 O b'-1:lemma:_'
536 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.16617627893415826, 'c2': 0.016246283722594547}
5 +best CV score:0.8677299702871124
6 +model size: 0.09M
7 +
8 +Flat F1: 0.8100263815531699
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 1.000 0.913 0.955 23
14 + Med 1.000 0.943 0.971 53
15 + Temp 0.923 0.828 0.873 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.882 1.000 0.938 15
19 + Air 0.565 0.377 0.452 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.889 0.847 0.867 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.811 0.799 0.805 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.845 0.783 0.810 451
28 +
29 +
30 +Top likely transitions:
31 +Agit -> Agit 6.716949
32 +Temp -> Temp 6.219197
33 +OD -> OD 5.656758
34 +Med -> Med 5.510885
35 +Supp -> Supp 5.091598
36 +Anti -> Anti 4.770668
37 +Gversion -> Gversion 4.530576
38 +Phase -> Phase 4.343162
39 +Air -> Air 4.176421
40 +Gtype -> Gtype 3.872949
41 +O -> O 3.576273
42 +Technique -> Technique 3.464421
43 +pH -> pH 3.115258
44 +O -> Technique 0.821082
45 +Substrain -> Gtype 0.704368
46 +O -> Supp 0.589038
47 +Air -> O 0.511427
48 +Gtype -> Supp 0.510592
49 +Gtype -> Air 0.474170
50 +Temp -> O 0.189513
51 +O -> Gtype 0.176860
52 +Supp -> O 0.145242
53 +Technique -> Air 0.118103
54 +Med -> O 0.093609
55 +O -> Anti 0.090099
56 +Phase -> O 0.002296
57 +O -> Temp 0.002073
58 +O -> Phase 0.000043
59 +O -> Med -0.015745
60 +Gtype -> Med -0.042610
61 +Supp -> Med -0.045443
62 +O -> OD -0.113818
63 +Air -> Supp -0.115743
64 +Technique -> pH -0.124518
65 +OD -> Air -0.194892
66 +Gversion -> O -0.196158
67 +Gtype -> Technique -0.579025
68 +Technique -> O -0.829124
69 +O -> Air -0.902346
70 +Gtype -> O -0.925779
71 +Med -> Supp -1.463243
72 +Substrain -> O -1.475265
73 +
74 +
75 +Top unlikely transitions:
76 +Agit -> Agit 6.716949
77 +Temp -> Temp 6.219197
78 +OD -> OD 5.656758
79 +Med -> Med 5.510885
80 +Supp -> Supp 5.091598
81 +Anti -> Anti 4.770668
82 +Gversion -> Gversion 4.530576
83 +Phase -> Phase 4.343162
84 +Air -> Air 4.176421
85 +Gtype -> Gtype 3.872949
86 +O -> O 3.576273
87 +Technique -> Technique 3.464421
88 +pH -> pH 3.115258
89 +O -> Technique 0.821082
90 +Substrain -> Gtype 0.704368
91 +O -> Supp 0.589038
92 +Air -> O 0.511427
93 +Gtype -> Supp 0.510592
94 +Gtype -> Air 0.474170
95 +Temp -> O 0.189513
96 +O -> Gtype 0.176860
97 +Supp -> O 0.145242
98 +Technique -> Air 0.118103
99 +Med -> O 0.093609
100 +O -> Anti 0.090099
101 +Phase -> O 0.002296
102 +O -> Temp 0.002073
103 +O -> Phase 0.000043
104 +O -> Med -0.015745
105 +Gtype -> Med -0.042610
106 +Supp -> Med -0.045443
107 +O -> OD -0.113818
108 +Air -> Supp -0.115743
109 +Technique -> pH -0.124518
110 +OD -> Air -0.194892
111 +Gversion -> O -0.196158
112 +Gtype -> Technique -0.579025
113 +Technique -> O -0.829124
114 +O -> Air -0.902346
115 +Gtype -> O -0.925779
116 +Med -> Supp -1.463243
117 +Substrain -> O -1.475265
118 +
119 +
120 +Top positive:
121 +6.433973 Technique b'lemma[:2]:Ch'
122 +6.360815 O b'lemma:2'
123 +5.662452 Anti b'-2:lemma:antibody'
124 +5.635583 O b'lemma:1'
125 +5.375548 O b'-2:lemma:_'
126 +5.334062 O b'lemma:3'
127 +4.328011 Gtype b'lemma[:1]:\xce\xb4'
128 +4.274267 O b'lemma[:2]:re'
129 +4.227305 Phase b'lemma:mid-log'
130 +4.196090 Air b'lemma[:2]:ae'
131 +3.698810 Gtype b'-1:lemma:\xe2\x88\x86'
132 +3.689545 Substrain b'lemma[:2]:mg'
133 +3.405547 Air b'lemma:anaerobic'
134 +3.375450 Technique b'lemma:chipseq'
135 +3.355381 O b'-2:lemma:medium'
136 +3.263554 O b'lemma:with'
137 +3.228684 O b'lemma:b'
138 +3.212301 Technique b'lemma[:2]:rn'
139 +3.141015 Supp b'lemma:acetate'
140 +3.091356 Phase b'-2:lemma:phase'
141 +3.034572 Gtype b'lemma:type'
142 +3.034572 Gtype b'lemma[:2]:ty'
143 +3.019205 O b'-1:lemma:tag'
144 +2.976419 Air b'lemma[:2]:an'
145 +2.958977 O b'+2:lemma:\xc2\xb0c'
146 +2.937672 O b'lemma[:2]:ge'
147 +2.921253 O b'lemma[:1]:h'
148 +2.903272 Gtype b'-2:lemma:genotype/variation'
149 +2.893999 Supp b'lemma:arginine'
150 +2.891850 O b'lemma:-'
151 +2.870735 Supp b'-1:lemma:with'
152 +2.863673 Gtype b'+1:lemma:type'
153 +2.803093 Supp b'lemma:pq'
154 +2.803093 Supp b'lemma[:2]:pq'
155 +2.800209 O b'lemma:a'
156 +2.752078 Med b'+2:postag:CC'
157 +2.708393 Gtype b'lemma[:2]:pk'
158 +2.703014 Supp b'-1:lemma:Cra'
159 +2.629891 O b'lemma:_'
160 +2.629891 O b'lemma[:1]:_'
161 +2.596183 Supp b'lemma:Iron'
162 +2.596183 Supp b'lemma[:2]:Ir'
163 +2.596183 Supp b'-2:lemma:Anaerobic'
164 +2.569027 O b'+2:lemma:70'
165 +2.544018 Supp b'+2:lemma:iptg'
166 +2.496912 OD b'lemma[:1]:o'
167 +2.467329 Supp b'+1:lemma:\xc2\xb5m'
168 +2.456900 O b'lemma:.'
169 +2.456900 O b'postag:.'
170 +2.456900 O b'postag[:1]:.'
171 +2.453094 O b'+1:lemma:pq'
172 +2.442390 O b'-1:lemma:ChIP-exo'
173 +2.433484 Technique b'lemma[:2]:ch'
174 +2.399879 Anti b'+2:lemma:antibody'
175 +2.363621 Air b'lemma:Aerobic'
176 +2.363621 Air b'lemma[:2]:Ae'
177 +2.358981 Gtype b'-2:lemma:delta'
178 +2.342573 O b'-2:lemma:myc'
179 +2.331321 Supp b'lemma:no3'
180 +2.313202 Supp b'lemma:rifampicin'
181 +2.312753 Anti b'+1:lemma:antibody'
182 +2.297741 Supp b'lemma[:2]:ri'
183 +2.281621 Air b'-2:lemma:IP'
184 +2.278366 Med b'lemma:MOPS'
185 +2.278366 Med b'lemma[:1]:M'
186 +2.278366 Med b'lemma[:2]:MO'
187 +2.270655 OD b'lemma[:2]:od'
188 +2.263438 Gversion b'lemma:chip-seq'
189 +2.231180 O b'-1:lemma:glucose'
190 +2.206950 Gtype b'lemma:wt'
191 +2.206950 Gtype b'lemma[:2]:wt'
192 +2.187730 Supp b'lemma[:2]:gl'
193 +2.186178 pH b'+1:postag:CD'
194 +2.183027 Technique b'-1:lemma:input'
195 +2.179683 O b'lemma[:1]:-'
196 +2.172484 Air b'-1:lemma:ChIP-Seq'
197 +2.160518 Phase b'lemma:stationary'
198 +2.150044 Phase b'lemma[:2]:ex'
199 +2.145423 O b'postag:IN'
200 +2.145423 O b'postag[:1]:I'
201 +2.145423 O b'postag[:2]:IN'
202 +2.135783 O b'-1:lemma:lb'
203 +2.115167 O b'-1:lemma:anaerobic'
204 +2.106797 O b'lemma:rpob'
205 +2.094643 O b'+2:lemma:cra'
206 +2.046836 Gtype b'-2:postag:DT'
207 +2.035645 Gtype b'lemma:nsrr'
208 +2.035645 Gtype b'lemma[:2]:ns'
209 +2.034890 Technique b'lemma[:1]:C'
210 +2.033641 O b'+1:postag:RB'
211 +2.013660 Supp b'+1:lemma:1'
212 +2.011345 Supp b'lemma:fructose'
213 +2.002182 Supp b'-2:lemma:media'
214 +1.998148 Gtype b'hGreek'
215 +1.992371 Temp b'-1:lemma:\xcf\x8332'
216 +1.980657 O b'postag::'
217 +1.980657 O b'postag[:1]::'
218 +1.979866 Supp b'-1:lemma:final'
219 +1.974647 Med b'+2:lemma:b2'
220 +1.964961 Temp b'-1:lemma:sample'
221 +1.959365 Strain b'+1:lemma:substr'
222 +1.959365 Strain b'-2:lemma:str'
223 +1.931532 Temp b'-2:lemma:\xcf\x8332'
224 +1.926204 pH b'lemma[:2]:ph'
225 +1.924918 Supp b'-2:lemma:agent'
226 +1.911949 Supp b'lemma[:1]:n'
227 +1.895649 Supp b'+2:lemma:rifampicin'
228 +1.895108 Temp b'+2:postag:DT'
229 +1.894074 Air b'-1:lemma:-'
230 +1.892359 Gtype b'lemma:\xe2\x88\x86'
231 +1.892359 Gtype b'lemma[:1]:\xe2\x88\x86'
232 +1.886561 O b'-1:lemma:media'
233 +1.878120 Gtype b'lemma[:1]:w'
234 +1.849654 Air b'-1:lemma:co2'
235 +1.819415 Supp b'-1:lemma:+'
236 +1.819234 Med b'lemma:L'
237 +1.819234 Med b'+1:lemma:broth'
238 +1.811008 Anti b'lemma[:2]:an'
239 +1.807476 O b'-1:lemma:0.3'
240 +1.795731 O b'-1:postag:NNS'
241 +1.794762 O b'postag:CC'
242 +1.794762 O b'postag[:2]:CC'
243 +1.793009 Gtype b'-1:postag:VBG'
244 +1.774172 O b'lemma[:1]:C'
245 +1.769427 Temp b'lemma[:1]:3'
246 +1.750065 Med b'lemma:broth'
247 +1.750065 Med b'-1:lemma:L'
248 +1.750065 Med b'lemma[:2]:br'
249 +1.731860 Air b'lemma[:1]:A'
250 +1.729863 Supp b'-1:postag:CC'
251 +1.711219 Gtype b'+1:lemma:with'
252 +1.710940 O b'+1:postag:NNP'
253 +1.706914 O b'+1:lemma:coli'
254 +1.705143 Phase b'lemma[:1]:e'
255 +1.701557 O b'lemma:chip'
256 +1.688302 Gtype b'lemma[:2]:ar'
257 +1.687398 Gtype b'-1:lemma:_'
258 +1.679707 Supp b'-2:lemma:induce'
259 +1.674594 O b'+1:lemma:250'
260 +1.667872 O b'-2:lemma:ChIP-Seq'
261 +1.653279 Gtype b'+1:lemma::'
262 +1.627724 O b'lemma:A'
263 +1.622722 Supp b'lemma[:2]:ac'
264 +1.617166 O b'postag:VBN'
265 +1.614130 Substrain b'lemma[:1]:m'
266 +1.610355 Gtype b'-2:lemma:genotype'
267 +1.606189 O b'+2:postag:JJ'
268 +1.605995 O b'lemma:0.4'
269 +1.600992 Substrain b'+1:lemma:phtpg'
270 +1.591094 O b'-1:lemma:Aerobic'
271 +1.589843 Supp b'-1:lemma:_'
272 +1.579672 Air b'+1:postag:IN'
273 +1.566124 Temp b'lemma:43'
274 +1.566124 Temp b'lemma[:2]:43'
275 +1.563526 O b'-1:lemma:type'
276 +1.558846 Supp b'lemma[:2]:30'
277 +1.552158 O b'-1:lemma:aerobically'
278 +1.551882 Gversion b'lemma:nc'
279 +1.551882 Gversion b'lemma[:2]:nc'
280 +1.536623 Supp b'+1:lemma:_'
281 +1.536256 O b'-1:lemma:\xc2\xb0c'
282 +1.524621 Air b'lemma[:1]:a'
283 +1.511586 Temp b'-1:lemma:43'
284 +1.506306 O b'lemma[:1]:b'
285 +1.506103 Gtype b'lemma[:2]:wi'
286 +1.498210 Supp b'+1:lemma:2'
287 +1.491309 Gtype b'+1:lemma:flagtag'
288 +1.490390 Med b'+1:lemma:0.4'
289 +1.485550 O b'+2:lemma:fructose'
290 +1.479904 Supp b'lemma:Leu'
291 +1.479904 Supp b'lemma[:2]:Le'
292 +1.479904 Supp b'-2:lemma:Lrp'
293 +1.475818 Supp b'lemma:glucose'
294 +1.472393 Gtype b'lemma[:2]:fl'
295 +1.471237 Gtype b'-1:lemma:rpob'
296 +1.464668 Med b'lemma:lb'
297 +1.464668 Med b'lemma[:2]:lb'
298 +1.462458 O b'+1:lemma:mid-log'
299 +1.460719 Med b'+2:lemma:b1'
300 +1.459376 O b'lemma[:2]:ha'
301 +1.450388 Supp b'lemma[:1]:I'
302 +1.442333 Gtype b'-1:lemma:vector'
303 +1.442007 Med b'+1:lemma:g/l'
304 +1.435558 Supp b'lemma[:2]:ni'
305 +1.434864 O b'lemma:ml'
306 +1.434864 O b'lemma[:2]:ml'
307 +1.431207 Technique b'symb'
308 +1.424801 O b'lemma[:1]:r'
309 +1.421918 Supp b'+2:lemma:mid-log'
310 +1.413708 O b'lemma[:1]:s'
311 +1.383477 O b'-2:postag:FW'
312 +1.383257 O b'-1:lemma:into'
313 +1.382919 O b'lemma[:2]:ga'
314 +1.354991 O b'lemma[:1]:c'
315 +1.346926 Med b'+1:lemma:minimal'
316 +1.336028 Gtype b'lemma[:1]:f'
317 +1.335008 O b'+1:postag:VBN'
318 +1.329569 pH b'lemma:ph5'
319 +1.329569 pH b'+1:lemma:.5'
320 +1.326705 Gtype b'-2:lemma:affyexp'
321 +
322 +
323 +Top negative:
324 +-0.155596 Supp b'-1:lemma:10'
325 +-0.157470 O b'+2:lemma:ph'
326 +-0.158631 Supp b'lemma[:1]:s'
327 +-0.159053 O b'-1:lemma:from'
328 +-0.160270 Supp b'-1:lemma:-lrb-'
329 +-0.160524 Gtype b'+2:postag:CD'
330 +-0.161112 Air b'postag[:1]:N'
331 +-0.161112 Air b'postag[:2]:NN'
332 +-0.161768 pH b'postag:NN'
333 +-0.161818 OD b'symb'
334 +-0.167771 Supp b'-2:lemma:treat'
335 +-0.176292 O b'lemma:co2'
336 +-0.177197 O b'-1:lemma:g/l'
337 +-0.181198 O b'-1:lemma:1m'
338 +-0.181198 O b'+2:lemma:7.6'
339 +-0.186508 O b'-2:lemma:genome'
340 +-0.193058 Supp b'+2:lemma:-rrb-'
341 +-0.194306 Gtype b'lemma[:1]:h'
342 +-0.194450 Supp b'-1:postag:-LRB-'
343 +-0.195692 Anti b'symb'
344 +-0.197135 O b'-2:lemma:of'
345 +-0.201423 O b'-2:lemma:-lrb-'
346 +-0.201718 O b'-1:lemma:0.2'
347 +-0.207096 Temp b'postag[:1]:N'
348 +-0.207096 Temp b'postag[:2]:NN'
349 +-0.208637 Temp b'-2:postag:NN'
350 +-0.210816 Technique b'-2:postag:NN'
351 +-0.212139 O b'lemma:in'
352 +-0.215720 Supp b'+2:postag:-RRB-'
353 +-0.227268 O b'+2:lemma:-rrb-'
354 +-0.229192 O b'lemma:anaerobic'
355 +-0.229556 O b'+1:lemma:.'
356 +-0.229556 O b'+1:postag:.'
357 +-0.229949 Vess b'hUpper'
358 +-0.229949 Vess b'hLower'
359 +-0.232026 O b'lemma[:1]:n'
360 +-0.239837 O b'lemma:30'
361 +-0.244559 O b'lemma:anaerobically'
362 +-0.245601 Phase b'+1:postag:NN'
363 +-0.249673 O b'-2:lemma:aerobically'
364 +-0.251037 O b'-2:lemma:a'
365 +-0.251989 O b'lemma[:1]:d'
366 +-0.252202 Supp b'lemma[:1]:a'
367 +-0.253068 O b'-2:postag:-LRB-'
368 +-0.258564 O b'+1:lemma:mm'
369 +-0.260363 O b'-1:lemma:fresh'
370 +-0.261060 O b'-2:lemma:anaerobically'
371 +-0.262340 Air b'-1:lemma:or'
372 +-0.263528 pH b'postag[:1]:N'
373 +-0.263528 pH b'postag[:2]:NN'
374 +-0.265074 O b'-2:postag::'
375 +-0.265217 O b'-2:lemma:IP'
376 +-0.269304 Med b'+1:postag:NN'
377 +-0.285401 O b'-2:lemma:rpob'
378 +-0.295781 Supp b'lemma[:2]:an'
379 +-0.296045 O b'lemma[:2]:ar'
380 +-0.296575 O b'-2:lemma:fresh'
381 +-0.301913 O b'lemma:media'
382 +-0.307515 O b'lemma[:2]:gl'
383 +-0.308163 O b'-1:postag:IN'
384 +-0.311903 O b'-1:lemma:mm'
385 +-0.312159 O b'-2:lemma:nh4cl'
386 +-0.322974 Anti b'+2:postag:JJ'
387 +-0.325164 Supp b'-2:postag:JJ'
388 +-0.328654 Supp b'-1:postag:NNP'
389 +-0.332535 O b'-1:lemma:of'
390 +-0.341258 O b'lemma:methanol'
391 +-0.341258 O b'-2:lemma:dissolve'
392 +-0.346421 Supp b'+2:lemma:glucose'
393 +-0.353210 Technique b'-1:postag::'
394 +-0.357081 O b'+1:lemma:supplement'
395 +-0.362827 O b'lemma:2h'
396 +-0.362827 O b'-1:lemma:additional'
397 +-0.362827 O b'lemma[:2]:2h'
398 +-0.373258 O b'-1:lemma:final'
399 +-0.373355 O b'+2:lemma:at'
400 +-0.376466 Supp b'+1:postag:NNS'
401 +-0.379434 O b'-1:lemma:IP'
402 +-0.379949 O b'lemma[:1]:0'
403 +-0.380927 O b'-2:lemma:phase'
404 +-0.384075 O b'lemma:glucose'
405 +-0.384929 Med b'-1:postag:NN'
406 +-0.387171 O b'lemma[:1]:k'
407 +-0.388425 O b'lemma:\xe2\x88\x86'
408 +-0.388425 O b'lemma[:1]:\xe2\x88\x86'
409 +-0.390156 Temp b'postag:NN'
410 +-0.390965 O b'+1:lemma:g/l'
411 +-0.410576 Med b'-1:postag:CD'
412 +-0.411791 O b'+1:lemma:1m'
413 +-0.411791 O b'-2:lemma:vol'
414 +-0.418447 Gtype b'+1:lemma:-rrb-'
415 +-0.418498 O b'-1:lemma:rpob'
416 +-0.422355 O b'-2:lemma:until'
417 +-0.434852 Technique b'postag:NN'
418 +-0.435566 Supp b'postag:JJ'
419 +-0.439076 Supp b'postag[:1]:J'
420 +-0.439076 Supp b'postag[:2]:JJ'
421 +-0.442452 O b'-1:lemma:30'
422 +-0.455337 Med b'+2:postag:VBN'
423 +-0.455366 Air b'postag:NN'
424 +-0.461874 O b'+2:lemma:fnr'
425 +-0.467110 O b'-2:lemma:supplement'
426 +-0.468087 O b'-1:lemma:dissolve'
427 +-0.468087 O b'+1:lemma:methanol'
428 +-0.486893 Gtype b'-2:lemma:\xe2\x88\x86'
429 +-0.493653 O b'lemma[:1]:L'
430 +-0.497715 O b'+2:lemma:10'
431 +-0.498822 O b'lemma:of'
432 +-0.498822 O b'lemma[:2]:of'
433 +-0.507436 O b'lemma[:2]:0.'
434 +-0.510139 Med b'+1:postag:IN'
435 +-0.513295 O b'+2:lemma:tag'
436 +-0.514818 O b'+1:lemma:+'
437 +-0.532828 O b'-1:lemma:\xe2\x88\x86'
438 +-0.536622 O b'+2:lemma:mid-log'
439 +-0.538561 Supp b'+1:lemma:-lrb-'
440 +-0.543996 O b'+2:lemma:.'
441 +-0.543996 O b'+2:postag:.'
442 +-0.557797 Supp b'+1:postag:-LRB-'
443 +-0.568152 O b'+2:lemma:a'
444 +-0.585265 O b'-2:lemma:pahse'
445 +-0.590510 O b'-2:lemma:glucose'
446 +-0.590875 O b'+1:lemma:until'
447 +-0.605869 O b'-1:lemma:nsrr'
448 +-0.609882 O b'-2:lemma:dpd'
449 +-0.616188 Phase b'-1:postag:JJ'
450 +-0.618132 Air b'-1:postag:JJ'
451 +-0.624517 O b'-2:postag:SYM'
452 +-0.626955 Gtype b'lemma[:1]:c'
453 +-0.627961 Anti b'+2:lemma:polyclonal'
454 +-0.635434 Med b'symb'
455 +-0.635949 Phase b'postag[:1]:J'
456 +-0.635949 Phase b'postag[:2]:JJ'
457 +-0.636258 OD b'hUpper'
458 +-0.636258 OD b'hLower'
459 +-0.636750 O b'lemma[:2]:ri'
460 +-0.647062 O b'+1:postag:IN'
461 +-0.654730 O b'-2:lemma:2'
462 +-0.655657 Phase b'postag:JJ'
463 +-0.655964 Supp b'symb'
464 +-0.660356 O b'+2:lemma:+'
465 +-0.661231 O b'+1:lemma:2.0'
466 +-0.662145 O b'-2:lemma:media'
467 +-0.666872 O b'+2:lemma:b'
468 +-0.675694 Anti b'+1:lemma:anti-fur'
469 +-0.688911 O b'-1:lemma:co2'
470 +-0.690102 Gtype b'postag[:1]:V'
471 +-0.690102 Gtype b'postag[:2]:VB'
472 +-0.694375 Gtype b'lemma[:1]:r'
473 +-0.706144 O b'lemma[:2]:me'
474 +-0.718038 O b'lemma:mid-log'
475 +-0.724228 O b'lemma:rifampicin'
476 +-0.735529 O b'-2:lemma::'
477 +-0.743194 Med b'-2:postag:VBN'
478 +-0.786989 Supp b'+1:lemma:,'
479 +-0.786989 Supp b'+1:postag:,'
480 +-0.789461 O b'-2:postag:RB'
481 +-0.815790 Agit b'hUpper'
482 +-0.815790 Agit b'hLower'
483 +-0.824637 O b'+2:postag:-RRB-'
484 +-0.828411 O b'-1:lemma:cra'
485 +-0.834312 O b'+1:lemma:at'
486 +-0.864833 O b'-1:lemma:vol'
487 +-0.864833 O b'-2:lemma:1/100'
488 +-0.864833 O b'+2:lemma:1m'
489 +-0.896137 O b'-2:postag:DT'
490 +-0.903408 Phase b'hUpper'
491 +-0.903408 Phase b'hLower'
492 +-0.954592 Gtype b'lemma[:1]:a'
493 +-0.958219 Gtype b'+2:lemma:cra'
494 +-0.959188 O b'-1:lemma:ph'
495 +-1.006021 O b'-2:lemma:0.3'
496 +-1.010964 O b'lemma[:2]:ae'
497 +-1.026041 Med b'-2:lemma:grow'
498 +-1.037767 O b'-1:postag::'
499 +-1.062839 O b'-1:lemma:sample'
500 +-1.105188 Supp b'lemma[:1]:c'
501 +-1.107601 OD b'+1:postag:NN'
502 +-1.109127 Agit b'symb'
503 +-1.130040 Supp b'+2:postag:CD'
504 +-1.149221 O b'lemma:0.3'
505 +-1.151210 O b'-1:lemma:1'
506 +-1.172944 O b'lemma[:2]:30'
507 +-1.211332 O b'+2:lemma:then'
508 +-1.263528 Supp b'+2:lemma:fructose'
509 +-1.346921 O b'+2:lemma:rifampicin'
510 +-1.394447 O b'postag:VBP'
511 +-1.459260 Supp b'+2:lemma:1'
512 +-1.504864 Anti b'postag:NNP'
513 +-1.616873 O b'-1:lemma:2'
514 +-1.627873 Supp b'+2:lemma:2'
515 +-1.790611 O b'-1:postag:VBG'
516 +-1.925758 O b'-2:lemma:rifampicin'
517 +-2.014829 O b'+1:lemma:in'
518 +-2.028945 OD b'+2:lemma:aerobically'
519 +-2.287808 O b'+1:lemma:1'
520 +-2.798978 O b'+1:lemma:2'
521 +-2.902688 O b'lemma[:2]:fl'
522 +-3.487913 O b'-1:lemma::'
523 +-4.880856 O b'-1:lemma:_'
524 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.07838162718183349, 'c2': 0.05887606797757319}
5 +best CV score:0.87030448615518
6 +model size: 0.16M
7 +
8 +Flat F1: 0.7823412170507693
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 1.000 0.913 0.955 23
14 + Med 1.000 0.925 0.961 53
15 + Temp 0.917 0.759 0.830 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.875 0.933 0.903 15
19 + Air 0.556 0.362 0.439 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.849 0.859 0.854 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.686 0.813 0.744 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.798 0.778 0.782 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 4.911417
32 +OD -> OD 4.541853
33 +Med -> Med 4.405893
34 +Supp -> Supp 4.305413
35 +Agit -> Agit 4.272648
36 +Anti -> Anti 3.837258
37 +O -> O 3.787660
38 +Gtype -> Gtype 3.752687
39 +Gversion -> Gversion 3.628661
40 +Air -> Air 3.521236
41 +Phase -> Phase 3.320009
42 +Technique -> Technique 2.966375
43 +pH -> pH 2.362986
44 +Substrain -> Gtype 1.121355
45 +O -> Technique 0.896285
46 +Gtype -> Supp 0.860650
47 +O -> Supp 0.689540
48 +O -> Gtype 0.595827
49 +Gtype -> Air 0.530941
50 +Technique -> Air 0.501834
51 +Air -> O 0.490782
52 +Gtype -> pH 0.392258
53 +Temp -> O 0.195983
54 +Med -> O 0.164949
55 +Supp -> O 0.068384
56 +O -> Temp 0.032340
57 +O -> pH 0.007653
58 +O -> Strain -0.003076
59 +Gversion -> Gtype -0.013131
60 +Gversion -> Air -0.016755
61 +Supp -> Anti -0.028664
62 +Vess -> O -0.036195
63 +Agit -> Supp -0.042183
64 +Gtype -> Phase -0.047540
65 +Supp -> Technique -0.060640
66 +Supp -> pH -0.062714
67 +O -> Phase -0.068478
68 +Technique -> OD -0.069783
69 +Supp -> OD -0.073453
70 +O -> Agit -0.079631
71 +Phase -> Air -0.079705
72 +Anti -> Med -0.089699
73 +Anti -> Gtype -0.095119
74 +Gtype -> Gversion -0.100232
75 +Temp -> Air -0.116744
76 +OD -> O -0.129801
77 +Phase -> Med -0.135688
78 +Air -> Agit -0.138019
79 +Phase -> O -0.139580
80 +pH -> Supp -0.141124
81 +
82 +
83 +Top unlikely transitions:
84 +Supp -> Technique -0.060640
85 +Supp -> pH -0.062714
86 +O -> Phase -0.068478
87 +Technique -> OD -0.069783
88 +Supp -> OD -0.073453
89 +O -> Agit -0.079631
90 +Phase -> Air -0.079705
91 +Anti -> Med -0.089699
92 +Anti -> Gtype -0.095119
93 +Gtype -> Gversion -0.100232
94 +Temp -> Air -0.116744
95 +OD -> O -0.129801
96 +Phase -> Med -0.135688
97 +Air -> Agit -0.138019
98 +Phase -> O -0.139580
99 +pH -> Supp -0.141124
100 +Air -> Temp -0.146478
101 +OD -> Supp -0.177929
102 +Gversion -> O -0.195235
103 +Med -> Air -0.218392
104 +Technique -> Supp -0.218928
105 +O -> OD -0.220859
106 +Anti -> O -0.227867
107 +Anti -> Supp -0.241653
108 +Temp -> Med -0.265069
109 +Air -> Gtype -0.270959
110 +Gversion -> Supp -0.271472
111 +Phase -> OD -0.278093
112 +O -> Med -0.292957
113 +Supp -> Phase -0.310262
114 +Agit -> O -0.315551
115 +Technique -> O -0.317964
116 +Agit -> Air -0.327189
117 +Air -> Phase -0.332685
118 +Supp -> Gtype -0.386391
119 +Gtype -> Technique -0.394676
120 +Supp -> Air -0.403362
121 +Gtype -> Anti -0.428993
122 +Phase -> Supp -0.440098
123 +Air -> Supp -0.449326
124 +Technique -> pH -0.462944
125 +Air -> Med -0.469469
126 +OD -> Air -0.509283
127 +Gtype -> Med -0.621751
128 +Supp -> Med -0.659958
129 +Gtype -> O -0.671881
130 +O -> Air -0.712510
131 +Technique -> Gtype -1.006943
132 +Substrain -> O -1.077790
133 +Med -> Supp -1.486790
134 +
135 +
136 +Top positive:
137 +4.124249 Anti b'-2:lemma:antibody'
138 +3.641687 Air b'word:Aerobic'
139 +3.497811 O b'-2:lemma:_'
140 +3.258366 Air b'lemma:anaerobic'
141 +3.145552 O b'lemma:_'
142 +3.145552 O b'word:_'
143 +3.024671 Gtype b'-2:lemma:genotype/variation'
144 +2.928771 Air b'postag:RB'
145 +2.783339 Technique b'word:ChIP-Seq'
146 +2.697083 O b'lemma:2'
147 +2.697083 O b'word:2'
148 +2.670728 Phase b'-2:lemma:phase'
149 +2.637125 O b'postag::'
150 +2.618748 O b'word:Cra'
151 +2.610104 Technique b'word:ChIP-exo'
152 +2.571728 O b'lemma:1'
153 +2.571728 O b'word:1'
154 +2.511177 O b'postag:IN'
155 +2.472685 Gtype b'-2:lemma:genotype'
156 +2.396386 Supp b'lemma:pq'
157 +2.396386 Supp b'word:PQ'
158 +2.337247 O b'lemma:.'
159 +2.337247 O b'postag:.'
160 +2.337247 O b'word:.'
161 +2.332296 Technique b'lemma:rna-seq'
162 +2.320371 Gtype b'word:WT'
163 +2.309222 Gtype b'lemma:wt'
164 +2.274976 O b'-2:lemma:flagtag'
165 +2.273949 Technique b'word:ChIPSeq'
166 +2.258997 Technique b'lemma:ChIP-exo'
167 +2.199264 Air b'word:Anaerobic'
168 +2.142635 O b'-1:lemma:ChIP-exo'
169 +2.096906 Supp b'lemma:nh4cl'
170 +2.072818 Phase b'lemma:mid-log'
171 +2.072818 Phase b'word:mid-log'
172 +2.053563 Gtype b'lemma:wild-type'
173 +2.044128 O b'lemma:rpob'
174 +2.044128 O b'word:RpoB'
175 +2.020270 O b'postag:VBN'
176 +2.020107 O b'lemma:3'
177 +2.020107 O b'word:3'
178 +2.011865 Supp b'-1:word:Cra'
179 +1.983566 Gtype b'lemma:type'
180 +1.983566 Gtype b'word:type'
181 +1.975796 O b'lemma:-'
182 +1.975796 O b'word:-'
183 +1.959537 O b'-2:lemma:medium'
184 +1.955236 Gtype b'-2:lemma:affyexp'
185 +1.942868 Gtype b'lemma:\xce\xb4cra'
186 +1.939375 Gtype b'word:\xce\x94cra'
187 +1.938173 Technique b'lemma:chipseq'
188 +1.890561 Supp b'lemma:Iron'
189 +1.890561 Supp b'word:Iron'
190 +1.890561 Supp b'+1:word:Deficient'
191 +1.890561 Supp b'-2:lemma:Anaerobic'
192 +1.874088 Supp b'lemma:acetate'
193 +1.874088 Supp b'word:acetate'
194 +1.819674 pH b'+1:postag:CD'
195 +1.788257 O b'lemma:b'
196 +1.788257 O b'word:B'
197 +1.782368 O b'word:A'
198 +1.774988 O b'+2:lemma:\xc2\xb0c'
199 +1.768402 O b'+2:postag:JJ'
200 +1.762269 Gtype b'lemma:\xe2\x88\x86'
201 +1.762269 Gtype b'word:\xe2\x88\x86'
202 +1.746520 Med b'lemma:MOPS'
203 +1.746520 Med b'word:MOPS'
204 +1.723641 Supp b'+2:lemma:iptg'
205 +1.722034 Air b'lemma:Aerobic'
206 +1.705697 O b'-1:word:Aerobic'
207 +1.703316 Anti b'+2:lemma:antibody'
208 +1.702891 O b'postag:CC'
209 +1.699990 Supp b'-1:lemma:with'
210 +1.699990 Supp b'-1:word:with'
211 +1.688533 Supp b'lemma:glucose'
212 +1.688533 Supp b'word:glucose'
213 +1.674465 Supp b'lemma:no3'
214 +1.674465 Supp b'word:NO3'
215 +1.666479 Supp b'lemma:nacl'
216 +1.666479 Supp b'word:NaCl'
217 +1.641938 Supp b'lemma:arginine'
218 +1.638237 Substrain b'lemma:mg1655'
219 +1.638237 Substrain b'word:MG1655'
220 +1.637725 Supp b'+1:lemma:\xc2\xb5m'
221 +1.637725 Supp b'+1:word:\xc2\xb5M'
222 +1.636903 Med b'isUpper'
223 +1.611838 O b'+1:postag:NNP'
224 +1.611343 O b'lemma:a'
225 +1.607795 Technique b'+2:lemma:ph5'
226 +1.602510 Air b'word:anaerobic'
227 +1.599293 O b'+1:postag:RB'
228 +1.590988 Gtype b'-1:lemma:\xe2\x88\x86'
229 +1.590988 Gtype b'-1:word:\xe2\x88\x86'
230 +1.589631 Gtype b'-2:postag:DT'
231 +1.583352 Med b'+2:postag:CC'
232 +1.579250 O b'isLower'
233 +1.578231 Gtype b'+2:lemma:glucose'
234 +1.552685 Gtype b'+1:lemma:type'
235 +1.552685 Gtype b'+1:word:type'
236 +1.528892 Substrain b'-2:lemma:substr'
237 +1.515887 Supp b'lemma:nitrate'
238 +1.515887 Supp b'word:nitrate'
239 +1.503178 O b'-1:lemma:tag'
240 +1.473485 Gtype b'lemma:\xce\xb4soxr'
241 +1.473485 Gtype b'word:\xce\x94soxR'
242 +1.456677 O b'-1:word:tag'
243 +1.455346 Gtype b'lemma:flag-tag'
244 +1.455346 Gtype b'-1:lemma:c-terminal'
245 +1.455346 Gtype b'word:Flag-tag'
246 +1.455346 Gtype b'-1:word:C-terminal'
247 +1.451462 Technique b'word:RNA-Seq'
248 +1.434331 Supp b'-1:lemma:Cra'
249 +1.432730 O b'+1:word:ChIP-Seq'
250 +1.426572 O b'-1:lemma:anaerobic'
251 +1.425108 Gtype b'lemma:\xce\xb4fur'
252 +1.425108 Gtype b'word:\xce\x94fur'
253 +1.425025 Technique b'lemma:rnaseq'
254 +1.425025 Technique b'word:RNASeq'
255 +1.416012 O b'postag:VBG'
256 +1.415750 Gversion b'-2:lemma:nc'
257 +1.408974 Gversion b'lemma:chip-seq'
258 +1.379843 O b'+1:lemma:arca-8myc'
259 +1.379843 O b'+1:word:ArcA-8myc'
260 +1.374672 O b'lemma:with'
261 +1.374672 O b'word:with'
262 +1.374577 O b'lemma:Cra'
263 +1.373182 Gversion b'word:ChIP-Seq'
264 +1.372454 Gtype b'-2:lemma:delta'
265 +1.369100 O b'+2:lemma:cra'
266 +1.367463 O b'-1:lemma:0.3'
267 +1.367463 O b'-1:word:0.3'
268 +1.366576 O b'-2:lemma:myc'
269 +1.365144 O b'lemma:ompr'
270 +1.365144 O b'word:OmpR'
271 +1.358116 Supp b'-1:postag:CC'
272 +1.351356 Gtype b'postag:JJ'
273 +1.340661 Strain b'+1:lemma:substr'
274 +1.340661 Strain b'+1:word:substr'
275 +1.340661 Strain b'-2:lemma:str'
276 +1.337333 Gtype b'+1:lemma:with'
277 +1.337333 Gtype b'+1:word:with'
278 +1.331237 Supp b'-2:lemma:agent'
279 +1.319017 Air b'+1:postag:IN'
280 +1.313717 O b'lemma:harbor'
281 +1.313717 O b'word:harboring'
282 +1.311281 Technique b'-1:lemma:chip-exo'
283 +1.310088 Air b'lemma:aerobic'
284 +1.301908 Temp b'isNumber'
285 +1.296057 O b'lemma:chip'
286 +1.294021 Med b'+2:lemma:b2'
287 +1.290231 O b'+1:lemma:pq'
288 +1.290231 O b'+1:word:PQ'
289 +1.288968 Supp b'-2:lemma:media'
290 +1.285093 O b'-1:lemma:lb'
291 +1.285093 O b'-1:word:LB'
292 +1.284396 O b'-1:lemma:glucose'
293 +1.284396 O b'-1:word:glucose'
294 +1.281083 O b'-1:lemma:media'
295 +1.281083 O b'-1:word:media'
296 +1.280357 Med b'lemma:lb'
297 +1.280357 Med b'word:LB'
298 +1.277491 O b'+2:lemma:70'
299 +1.276083 Supp b'lemma:Leu'
300 +1.276083 Supp b'word:Leu'
301 +1.276083 Supp b'-2:lemma:Lrp'
302 +1.266271 Supp b'-2:lemma:induce'
303 +1.263127 Phase b'lemma:exponential'
304 +1.263127 Phase b'word:exponential'
305 +1.263127 Phase b'lemma:stationary'
306 +1.263127 Phase b'word:stationary'
307 +1.250254 Air b'-1:lemma:ChIP-Seq'
308 +1.250254 Air b'-1:word:ChIP-Seq'
309 +1.245392 Air b'-2:lemma:IP'
310 +1.237417 OD b'postag:CD'
311 +1.227048 Supp b'-1:lemma:+'
312 +1.227048 Supp b'-1:word:+'
313 +1.225001 Gtype b'-1:postag:VBG'
314 +1.222768 O b'lemma:Custom'
315 +1.222768 O b'word:Custom'
316 +1.222690 Strain b'lemma:k-12'
317 +1.222690 Strain b'word:K-12'
318 +1.211762 Gversion b'-2:lemma:build'
319 +1.207468 Supp b'+1:lemma:1'
320 +1.207468 Supp b'+1:word:1'
321 +1.200932 Air b'-1:lemma:-'
322 +1.200932 Air b'-1:word:-'
323 +1.200071 Gversion b'lemma:nc'
324 +1.200071 Gversion b'word:NC'
325 +1.192535 O b'+2:lemma:fructose'
326 +1.191869 Gtype b'postag:NN'
327 +1.189235 pH b'lemma:ph5'
328 +1.189235 pH b'+1:lemma:.5'
329 +1.189235 pH b'word:pH5'
330 +1.189235 pH b'+1:word:.5'
331 +1.180856 O b'lemma:at'
332 +1.174453 Supp b'lemma:rifampicin'
333 +1.174453 Supp b'word:rifampicin'
334 +1.166521 O b'lemma:argr'
335 +1.166521 O b'word:ArgR'
336 +1.162681 OD b'-1:postag:IN'
337 +
338 +
339 +Top negative:
340 +-0.311652 O b'+1:word:supplemented'
341 +-0.312640 O b'-2:lemma:control'
342 +-0.323532 O b'lemma:minimal'
343 +-0.323532 O b'word:minimal'
344 +-0.323844 O b'-1:lemma:ph'
345 +-0.323844 O b'-1:word:pH'
346 +-0.324956 O b'lemma:37'
347 +-0.324956 O b'word:37'
348 +-0.328081 O b'-1:lemma:cra'
349 +-0.328436 O b'lemma:methanol'
350 +-0.328436 O b'word:methanol'
351 +-0.328436 O b'-2:lemma:dissolve'
352 +-0.332352 O b'-1:lemma:37'
353 +-0.332352 O b'-1:word:37'
354 +-0.337835 Anti b'+1:lemma:anti-fur'
355 +-0.337835 Anti b'+1:word:anti-Fur'
356 +-0.338497 O b'+1:lemma:1m'
357 +-0.338497 O b'+1:word:1M'
358 +-0.338497 O b'-2:lemma:vol'
359 +-0.340286 O b'+2:lemma:add'
360 +-0.348253 Supp b'-1:lemma:10'
361 +-0.348253 Supp b'-1:word:10'
362 +-0.355580 O b'-2:lemma:10'
363 +-0.356547 O b'-2:lemma:minimal'
364 +-0.359288 O b'-1:lemma:ml'
365 +-0.359288 O b'-1:word:ml'
366 +-0.361424 O b'-1:lemma:dissolve'
367 +-0.361424 O b'+1:lemma:methanol'
368 +-0.361424 O b'-1:word:dissolved'
369 +-0.361424 O b'+1:word:methanol'
370 +-0.367262 Substrain b'isLower'
371 +-0.367703 Supp b'postag:CC'
372 +-0.367998 O b'-1:lemma:co2'
373 +-0.367998 O b'-1:word:CO2'
374 +-0.370364 O b'+2:lemma:-rrb-'
375 +-0.374065 O b'-2:lemma:genome'
376 +-0.375208 Supp b'-2:lemma:.'
377 +-0.375208 Supp b'-2:postag:.'
378 +-0.378770 O b'+2:lemma:at'
379 +-0.385912 O b'-1:lemma:fresh'
380 +-0.385912 O b'-1:word:fresh'
381 +-0.389093 Strain b'isLower'
382 +-0.389489 Temp b'-2:postag:NN'
383 +-0.390296 O b'lemma:aerobic'
384 +-0.391758 O b'-2:lemma:aerobically'
385 +-0.393460 O b'-1:lemma:2'
386 +-0.393460 O b'-1:word:2'
387 +-0.397723 O b'+1:lemma:mm'
388 +-0.397723 O b'+1:word:mM'
389 +-0.398729 O b'-1:lemma:rpob'
390 +-0.398729 O b'-1:word:RpoB'
391 +-0.401643 O b'-1:lemma:mm'
392 +-0.401643 O b'-1:word:mM'
393 +-0.402690 Supp b'-2:lemma:grow'
394 +-0.403521 O b'-2:lemma:pahse'
395 +-0.404104 O b'-1:lemma:grow'
396 +-0.411213 O b'lemma:nitrogen'
397 +-0.411213 O b'word:nitrogen'
398 +-0.411373 O b'+1:lemma:+'
399 +-0.411373 O b'+1:word:+'
400 +-0.415137 Gtype b'-2:postag:CD'
401 +-0.424176 O b'+1:word:ChIP-exo'
402 +-0.437712 O b'-2:postag:SYM'
403 +-0.445165 Supp b'+1:lemma:,'
404 +-0.445165 Supp b'+1:postag:,'
405 +-0.445165 Supp b'+1:word:,'
406 +-0.453707 O b'lemma:fructose'
407 +-0.453707 O b'word:fructose'
408 +-0.457985 O b'-2:lemma:nh4cl'
409 +-0.460272 O b'lemma:anaerobically'
410 +-0.460272 O b'word:anaerobically'
411 +-0.460899 O b'+2:lemma:reference'
412 +-0.461152 O b'lemma:2h'
413 +-0.461152 O b'-1:lemma:additional'
414 +-0.461152 O b'word:2h'
415 +-0.461152 O b'-1:word:additional'
416 +-0.465651 O b'-2:lemma:rpob'
417 +-0.467210 O b'lemma:anaerobic'
418 +-0.482841 Supp b'+1:lemma:-lrb-'
419 +-0.482841 Supp b'+1:word:-LRB-'
420 +-0.487123 O b'-1:lemma:\xe2\x88\x86'
421 +-0.487123 O b'-1:word:\xe2\x88\x86'
422 +-0.490214 O b'lemma:aerobically'
423 +-0.490214 O b'word:aerobically'
424 +-0.493650 O b'lemma:of'
425 +-0.493650 O b'word:of'
426 +-0.498710 O b'+2:lemma:mid-log'
427 +-0.499349 O b'-1:lemma:30'
428 +-0.499349 O b'-1:word:30'
429 +-0.499545 O b'+2:lemma:fnr'
430 +-0.506778 Supp b'+1:postag:-LRB-'
431 +-0.511585 O b'word:ChIP-exo'
432 +-0.513406 O b'-1:lemma:chip-exo'
433 +-0.517504 Supp b'+2:lemma:glucose'
434 +-0.518174 O b'+1:lemma:g/l'
435 +-0.518174 O b'+1:word:g/L'
436 +-0.520916 O b'-2:lemma:anaerobically'
437 +-0.530292 O b'+2:postag:-RRB-'
438 +-0.531458 Med b'-1:postag:NN'
439 +-0.532473 O b'lemma:nh4cl'
440 +-0.535293 O b'-2:lemma:IP'
441 +-0.538050 Gtype b'-2:lemma:\xe2\x88\x86'
442 +-0.542753 pH b'isUpper'
443 +-0.559582 O b'lemma:glucose'
444 +-0.559582 O b'word:glucose'
445 +-0.563071 O b'-2:lemma:dpd'
446 +-0.568245 O b'+1:lemma:until'
447 +-0.568245 O b'+1:word:until'
448 +-0.569455 Supp b'-2:postag:NNS'
449 +-0.569843 Air b'+1:postag:JJ'
450 +-0.573996 O b'lemma:\xce\xb4fur'
451 +-0.573996 O b'word:\xce\x94fur'
452 +-0.574100 O b'+1:postag:IN'
453 +-0.583141 O b'+1:lemma:in'
454 +-0.583141 O b'+1:word:in'
455 +-0.584211 O b'lemma:30'
456 +-0.584211 O b'word:30'
457 +-0.587080 Agit b'isUpper'
458 +-0.591540 Supp b'-1:postag:NNP'
459 +-0.595806 O b'-1:postag::'
460 +-0.599170 O b'-2:lemma:phase'
461 +-0.601235 O b'+2:lemma:b'
462 +-0.602048 O b'+2:lemma:then'
463 +-0.608335 O b'-1:lemma:1'
464 +-0.608335 O b'-1:word:1'
465 +-0.608384 Anti b'isUpper'
466 +-0.617575 O b'-2:lemma:until'
467 +-0.617630 O b'lemma:mid-log'
468 +-0.617630 O b'word:mid-log'
469 +-0.621155 O b'-2:lemma:fresh'
470 +-0.626295 O b'-2:lemma:media'
471 +-0.635986 Phase b'isUpper'
472 +-0.639041 O b'+2:lemma:250'
473 +-0.644310 Supp b'-2:postag:JJ'
474 +-0.653651 O b'+1:lemma:at'
475 +-0.653651 O b'+1:word:at'
476 +-0.658042 Supp b'-2:lemma:treat'
477 +-0.664028 Med b'+2:postag:VBN'
478 +-0.670366 O b'-1:lemma:nsrr'
479 +-0.670366 O b'-1:word:NsrR'
480 +-0.670591 O b'-2:postag:DT'
481 +-0.674143 O b'-1:lemma:vol'
482 +-0.674143 O b'-1:word:vol'
483 +-0.674143 O b'-2:lemma:1/100'
484 +-0.674143 O b'+2:lemma:1m'
485 +-0.698342 Med b'-2:postag:VBN'
486 +-0.715097 O b'+1:lemma:2.0'
487 +-0.715097 O b'+1:word:2.0'
488 +-0.721531 pH b'isLower'
489 +-0.742327 O b'-1:lemma:sample'
490 +-0.752266 O b'lemma:nitrate'
491 +-0.752266 O b'word:nitrate'
492 +-0.767047 O b'-2:lemma::'
493 +-0.787116 O b'lemma:rifampicin'
494 +-0.787116 O b'word:rifampicin'
495 +-0.792673 O b'+2:lemma:rifampicin'
496 +-0.797812 O b'+1:postag:VBG'
497 +-0.816000 O b'lemma:wt'
498 +-0.816563 O b'-1:lemma:IP'
499 +-0.816563 O b'-1:word:IP'
500 +-0.834409 O b'lemma:0.3'
501 +-0.834409 O b'word:0.3'
502 +-0.837034 Supp b'+2:postag:CD'
503 +-0.838689 O b'postag:VBP'
504 +-0.840231 Technique b'isNumber'
505 +-0.846346 Air b'postag:NN'
506 +-0.850091 Gtype b'postag:VBG'
507 +-0.854432 O b'-2:postag:RB'
508 +-0.883296 Gtype b'isLower'
509 +-0.888784 O b'lemma:media'
510 +-0.888784 O b'word:media'
511 +-0.895754 Gversion b'isLower'
512 +-0.904522 Temp b'postag:NN'
513 +-0.924495 O b'+2:lemma:+'
514 +-0.935152 Gtype b'+2:lemma:cra'
515 +-0.943613 Gtype b'isNumber'
516 +-0.975059 O b'postag:RB'
517 +-1.021612 Med b'-2:lemma:grow'
518 +-1.024421 Anti b'+2:lemma:polyclonal'
519 +-1.031484 O b'-2:lemma:rifampicin'
520 +-1.059929 O b'+1:lemma:1'
521 +-1.059929 O b'+1:word:1'
522 +-1.084068 Supp b'+2:lemma:fructose'
523 +-1.123084 Technique b'isLower'
524 +-1.132053 O b'-2:lemma:0.3'
525 +-1.168749 Supp b'+2:lemma:1'
526 +-1.183772 OD b'+1:postag:NN'
527 +-1.263288 OD b'+2:lemma:aerobically'
528 +-1.270174 Supp b'+2:lemma:2'
529 +-1.329893 Anti b'postag:NNP'
530 +-1.366300 O b'+1:lemma:2'
531 +-1.366300 O b'+1:word:2'
532 +-1.389405 Phase b'-1:postag:JJ'
533 +-1.607302 Supp b'postag:JJ'
534 +-1.676382 O b'-1:postag:VBG'
535 +-1.773723 O b'-1:lemma::'
536 +-1.773723 O b'-1:word::'
537 +-1.857508 Phase b'postag:JJ'
538 +-1.945074 O b'-1:lemma:_'
539 +-1.945074 O b'-1:word:_'
540 +
1 +********** TRAINING AND TESTING REPORT **********
2 +Training file: training-data-set-70.txt
3 +
4 +best params:{'c1': 0.04223966754804299, 'c2': 0.014836666503726496}
5 +best CV score:0.879077327666518
6 +model size: 0.17M
7 +
8 +Flat F1: 0.7889011067174645
9 + precision recall f1-score support
10 +
11 + OD 1.000 0.818 0.900 22
12 + pH 1.000 1.000 1.000 8
13 + Technique 1.000 0.913 0.955 23
14 + Med 1.000 0.962 0.981 53
15 + Temp 0.923 0.828 0.873 29
16 + Vess 1.000 1.000 1.000 1
17 + Agit 0.000 0.000 0.000 0
18 + Phase 0.882 1.000 0.938 15
19 + Air 0.556 0.362 0.439 69
20 + Anti 1.000 1.000 1.000 11
21 + Strain 0.000 0.000 0.000 1
22 + Gtype 0.864 0.824 0.843 85
23 + Substrain 0.000 0.000 0.000 0
24 + Supp 0.716 0.791 0.752 134
25 + Gversion 0.000 0.000 0.000 0
26 +
27 +avg / total 0.811 0.776 0.789 451
28 +
29 +
30 +Top likely transitions:
31 +Temp -> Temp 6.132401
32 +Agit -> Agit 5.867978
33 +Med -> Med 5.493382
34 +Anti -> Anti 4.899201
35 +Gversion -> Gversion 4.720775
36 +OD -> OD 4.580439
37 +Gtype -> Gtype 4.270714
38 +Supp -> Supp 4.146870
39 +O -> O 4.001305
40 +Phase -> Phase 3.777096
41 +Air -> Air 3.280795
42 +Technique -> Technique 3.042386
43 +pH -> pH 2.638890
44 +Substrain -> Gtype 0.639428
45 +O -> Technique 0.597359
46 +O -> Gtype 0.301586
47 +Med -> O 0.113620
48 +Gtype -> Air 0.032970
49 +Air -> O 0.023112
50 +Temp -> O 0.007528
51 +O -> Temp 0.000729
52 +Agit -> O -0.000016
53 +Supp -> Technique -0.000031
54 +O -> Strain -0.002771
55 +Phase -> Air -0.018013
56 +Technique -> OD -0.019361
57 +Supp -> OD -0.031281
58 +pH -> Supp -0.038004
59 +Air -> Gtype -0.060170
60 +OD -> Supp -0.078565
61 +Anti -> Gtype -0.100971
62 +Gtype -> Phase -0.118162
63 +Gtype -> Anti -0.120765
64 +Air -> Agit -0.147345
65 +Air -> Temp -0.149520
66 +Anti -> Air -0.164013
67 +Gversion -> Supp -0.166044
68 +Anti -> O -0.170392
69 +Gtype -> OD -0.173948
70 +Anti -> Supp -0.206647
71 +Air -> Technique -0.220913
72 +Technique -> Gtype -0.276956
73 +Supp -> Phase -0.297344
74 +Air -> Phase -0.306210
75 +O -> Supp -0.348583
76 +O -> Med -0.368131
77 +Phase -> OD -0.382985
78 +O -> Phase -0.399249
79 +OD -> Air -0.479222
80 +Phase -> Supp -0.503166
81 +
82 +
83 +Top unlikely transitions:
84 +Agit -> O -0.000016
85 +Supp -> Technique -0.000031
86 +O -> Strain -0.002771
87 +Phase -> Air -0.018013
88 +Technique -> OD -0.019361
89 +Supp -> OD -0.031281
90 +pH -> Supp -0.038004
91 +Air -> Gtype -0.060170
92 +OD -> Supp -0.078565
93 +Anti -> Gtype -0.100971
94 +Gtype -> Phase -0.118162
95 +Gtype -> Anti -0.120765
96 +Air -> Agit -0.147345
97 +Air -> Temp -0.149520
98 +Anti -> Air -0.164013
99 +Gversion -> Supp -0.166044
100 +Anti -> O -0.170392
101 +Gtype -> OD -0.173948
102 +Anti -> Supp -0.206647
103 +Air -> Technique -0.220913
104 +Technique -> Gtype -0.276956
105 +Supp -> Phase -0.297344
106 +Air -> Phase -0.306210
107 +O -> Supp -0.348583
108 +O -> Med -0.368131
109 +Phase -> OD -0.382985
110 +O -> Phase -0.399249
111 +OD -> Air -0.479222
112 +Phase -> Supp -0.503166
113 +Supp -> Air -0.513142
114 +Phase -> O -0.522452
115 +Air -> Med -0.532689
116 +Gtype -> Med -0.539650
117 +Technique -> Supp -0.572989
118 +Gtype -> Technique -0.595348
119 +Temp -> Med -0.595579
120 +Supp -> Gtype -0.613937
121 +Agit -> Air -0.625302
122 +OD -> O -0.641071
123 +Gversion -> O -0.641193
124 +Technique -> pH -0.782400
125 +Supp -> O -0.786822
126 +Supp -> Med -0.932343
127 +Air -> Supp -0.964410
128 +Technique -> O -1.106191
129 +O -> OD -1.374945
130 +Gtype -> O -1.475612
131 +Substrain -> O -1.620808
132 +Med -> Supp -1.753990
133 +O -> Air -1.843623
134 +
135 +
136 +Top positive:
137 +5.458764 Anti b'-2:lemma:antibody'
138 +4.034933 O b'-2:lemma:_'
139 +3.995360 Technique b'lemma[:2]:Ch'
140 +3.737588 O b'lemma[:2]:re'
141 +3.686472 Gtype b'lemma[:1]:\xce\xb4'
142 +3.552793 Phase b'-2:lemma:phase'
143 +3.469041 Air b'word:Aerobic'
144 +3.439462 O b'lemma:2'
145 +3.439462 O b'word:2'
146 +3.243564 Air b'lemma:anaerobic'
147 +3.182295 Supp b'-1:word:Cra'
148 +3.047566 Gtype b'-2:lemma:genotype/variation'
149 +3.006963 O b'-2:lemma:medium'
150 +2.984615 O b'lemma:1'
151 +2.984615 O b'word:1'
152 +2.738121 O b'lemma:-'
153 +2.738121 O b'word:-'
154 +2.718196 O b'+1:postag:RB'
155 +2.700586 O b'word:Cra'
156 +2.653876 Supp b'+2:lemma:iptg'
157 +2.620648 O b'lemma:with'
158 +2.620648 O b'word:with'
159 +2.599865 O b'lemma:_'
160 +2.599865 O b'lemma[:1]:_'
161 +2.599865 O b'word:_'
162 +2.575314 Med b'+2:postag:CC'
163 +2.571913 Supp b'lemma:arginine'
164 +2.568347 O b'word:A'
165 +2.546240 Gtype b'lemma[:2]:pk'
166 +2.527855 O b'lemma:3'
167 +2.527855 O b'word:3'
168 +2.379121 O b'-1:lemma:ChIP-exo'
169 +2.355972 O b'lemma[:2]:ge'
170 +2.355054 Technique b'lemma[:2]:rn'
171 +2.341062 Supp b'lemma:pq'
172 +2.341062 Supp b'lemma[:2]:pq'
173 +2.341062 Supp b'word:PQ'
174 +2.297430 O b'+2:lemma:cra'
175 +2.280338 O b'+2:lemma:\xc2\xb0c'
176 +2.251142 O b'+2:lemma:70'
177 +2.245883 Gtype b'word:WT'
178 +2.241264 Air b'word:Anaerobic'
179 +2.237739 Supp b'+2:lemma:for'
180 +2.234179 Anti b'+2:lemma:antibody'
181 +2.153589 O b'-1:lemma:tag'
182 +2.117142 O b'lemma[:1]:h'
183 +2.088934 Air b'lemma[:2]:ae'
184 +2.076795 Technique b'word:ChIPSeq'
185 +2.068935 Gtype b'hGreek'
186 +2.047132 O b'-2:lemma:mid-log'
187 +2.041924 O b'lemma:.'
188 +2.041924 O b'postag:.'
189 +2.041924 O b'postag[:1]:.'
190 +2.041924 O b'word:.'
191 +2.032495 Gtype b'-1:lemma:\xe2\x88\x86'
192 +2.032495 Gtype b'-1:word:\xe2\x88\x86'
193 +2.016938 Gtype b'lemma[:1]:w'
194 +2.000225 O b'+2:lemma:fructose'
195 +1.992930 Supp b'-1:lemma:with'
196 +1.992930 Supp b'-1:word:with'
197 +1.991763 Gtype b'-2:lemma:delta'
198 +1.990954 Substrain b'lemma[:2]:mg'
199 +1.983708 Phase b'+2:lemma:o.d.'
200 +1.977886 Supp b'-1:lemma:Cra'
201 +1.960916 Phase b'lemma:mid-log'
202 +1.960916 Phase b'word:mid-log'
203 +1.958437 Phase b'lemma[:2]:ex'
204 +1.945077 Supp b'-1:postag:CC'
205 +1.944452 Substrain b'lemma[:1]:m'
206 +1.940793 Gtype b'lemma:type'
207 +1.940793 Gtype b'lemma[:2]:ty'
208 +1.940793 Gtype b'word:type'
209 +1.931978 Technique b'lemma:chipseq'
210 +1.906924 O b'+1:postag:NNP'
211 +1.904357 O b'lemma:b'
212 +1.904357 O b'word:B'
213 +1.900770 O b'-1:word:tag'
214 +1.894287 Technique b'lemma:ChIP-exo'
215 +1.866276 Technique b'lemma[:1]:C'
216 +1.856571 O b'-1:word:Aerobic'
217 +1.849985 Supp b'lemma:Iron'
218 +1.849985 Supp b'lemma[:2]:Ir'
219 +1.849985 Supp b'word:Iron'
220 +1.849985 Supp b'+1:word:Deficient'
221 +1.849985 Supp b'-2:lemma:Anaerobic'
222 +1.843831 Supp b'lemma:acetate'
223 +1.843831 Supp b'word:acetate'
224 +1.843349 Air b'lemma[:1]:A'
225 +1.840981 Supp b'-2:lemma:media'
226 +1.831812 Technique b'lemma[:2]:ch'
227 +1.820538 Gtype b'-2:postag:DT'
228 +1.819950 Supp b'-1:lemma:+'
229 +1.819950 Supp b'-1:word:+'
230 +1.813584 Air b'lemma[:1]:a'
231 +1.788160 Gversion b'word:ChIP-Seq'
232 +1.786490 Gtype b'-2:lemma:genotype'
233 +1.771090 Gtype b'+1:lemma:type'
234 +1.771090 Gtype b'+1:word:type'
235 +1.765859 Gtype b'-2:lemma:affyexp'
236 +1.729836 Gversion b'lemma:chip-seq'
237 +1.715972 Air b'lemma[:2]:an'
238 +1.702637 O b'-2:lemma:myc'
239 +1.701777 O b'-2:lemma:flagtag'
240 +1.685559 Supp b'-2:lemma:agent'
241 +1.676248 Supp b'lemma[:1]:n'
242 +1.665235 Technique b'symb'
243 +1.650041 Technique b'-1:lemma:chip-exo'
244 +1.645472 O b'-1:lemma:anaerobic'
245 +1.628175 Gtype b'lemma[:2]:ar'
246 +1.620209 Supp b'postag:VBP'
247 +1.612275 O b'isLower'
248 +1.611690 O b'+1:lemma:pq'
249 +1.611690 O b'+1:word:PQ'
250 +1.609561 Supp b'lemma[:1]:I'
251 +1.608561 O b'-1:lemma:media'
252 +1.608561 O b'-1:word:media'
253 +1.597175 Med b'isUpper'
254 +1.593363 Gtype b'lemma:wt'
255 +1.593363 Gtype b'lemma[:2]:wt'
256 +1.593216 Med b'+2:lemma:b2'
257 +1.592123 Supp b'-2:lemma:induce'
258 +1.590184 Air b'lemma:Aerobic'
259 +1.590184 Air b'lemma[:2]:Ae'
260 +1.571628 Phase b'lemma:stationary'
261 +1.571628 Phase b'word:stationary'
262 +1.555960 Gtype b'lemma[:1]:f'
263 +1.552005 Med b'lemma:MOPS'
264 +1.552005 Med b'lemma[:1]:M'
265 +1.552005 Med b'lemma[:2]:MO'
266 +1.552005 Med b'word:MOPS'
267 +1.545220 Technique b'word:ChIP-Seq'
268 +1.544051 Supp b'lemma[:2]:gl'
269 +1.540691 O b'-1:lemma:0.3'
270 +1.540691 O b'-1:word:0.3'
271 +1.540357 Supp b'-2:lemma:argr'
272 +1.535458 Gtype b'symb'
273 +1.531560 O b'-2:lemma:fructose'
274 +1.530754 O b'-1:lemma:lb'
275 +1.530754 O b'-1:word:LB'
276 +1.528067 Supp b'-1:lemma:final'
277 +1.528067 Supp b'-1:word:final'
278 +1.513494 Phase b'-2:lemma:until'
279 +1.493341 Supp b'lemma:fructose'
280 +1.493341 Supp b'word:fructose'
281 +1.491583 Technique b'-1:lemma:input'
282 +1.491583 Technique b'-1:word:Input'
283 +1.487376 Supp b'lemma[:2]:ac'
284 +1.472895 pH b'+1:postag:CD'
285 +1.458550 O b'-1:lemma:glucose'
286 +1.458550 O b'-1:word:glucose'
287 +1.450710 Gtype b'-1:postag:VBG'
288 +1.450118 Air b'-1:postag::'
289 +1.449701 Supp b'+1:lemma:\xc2\xb5m'
290 +1.449701 Supp b'+1:word:\xc2\xb5M'
291 +1.430882 O b'+1:lemma:mid-log'
292 +1.430882 O b'+1:word:mid-log'
293 +1.420013 Strain b'+1:lemma:substr'
294 +1.420013 Strain b'+1:word:substr'
295 +1.420013 Strain b'-2:lemma:str'
296 +1.408799 O b'lemma:Custom'
297 +1.408799 O b'lemma[:2]:Cu'
298 +1.408799 O b'word:Custom'
299 +1.401545 O b'lemma:chip'
300 +1.400074 O b'lemma[:1]:C'
301 +1.398389 Gtype b'lemma[:2]:wi'
302 +1.394023 O b'lemma:rpob'
303 +1.394023 O b'word:RpoB'
304 +1.389762 Supp b'-2:lemma:supplement'
305 +1.388021 Supp b'lemma[:2]:ni'
306 +1.387197 Phase b'lemma[:1]:e'
307 +1.380149 Air b'postag:RB'
308 +1.380149 Air b'postag[:1]:R'
309 +1.380149 Air b'postag[:2]:RB'
310 +1.378405 O b'lemma:a'
311 +1.377531 O b'+1:word:ChIP-Seq'
312 +1.375054 Technique b'+2:lemma:ph5'
313 +1.371660 Anti b'+1:lemma:antibody'
314 +1.371660 Anti b'+1:word:antibody'
315 +1.355865 O b'postag:VBN'
316 +1.352947 OD b'lemma:0.3'
317 +1.352947 OD b'word:0.3'
318 +1.351689 Phase b'+1:lemma:phase'
319 +1.351689 Phase b'+1:word:phase'
320 +1.350446 pH b'lemma[:2]:ph'
321 +1.341756 O b'isNumber'
322 +1.330917 Supp b'+1:lemma:1'
323 +1.330917 Supp b'+1:word:1'
324 +1.327538 Gversion b'-2:lemma:build'
325 +1.323732 O b'lemma:ompr'
326 +1.323732 O b'word:OmpR'
327 +1.322881 Air b'+1:postag:IN'
328 +1.322130 Temp b'-1:lemma:43'
329 +1.322130 Temp b'-1:word:43'
330 +1.298940 O b'-1:lemma:aerobically'
331 +1.298940 O b'-1:word:aerobically'
332 +1.297300 OD b'lemma[:1]:o'
333 +1.295797 Med b'+1:lemma:0.4'
334 +1.295797 Med b'+1:word:0.4'
335 +1.282176 O b'+2:postag:JJ'
336 +1.271164 O b'+2:lemma:polyclonal'
337 +
338 +
339 +Top negative:
340 +-0.367697 Supp b'-1:postag:-LRB-'
341 +-0.371915 O b'-2:lemma:fresh'
342 +-0.376710 O b'+2:lemma:fnr'
343 +-0.377206 O b'lemma:2h'
344 +-0.377206 O b'-1:lemma:additional'
345 +-0.377206 O b'lemma[:2]:2h'
346 +-0.377206 O b'word:2h'
347 +-0.377206 O b'-1:word:additional'
348 +-0.379729 O b'-2:postag:-LRB-'
349 +-0.380849 Air b'-1:postag:JJ'
350 +-0.381928 Strain b'isLower'
351 +-0.382657 Med b'-1:postag:IN'
352 +-0.383403 O b'-2:lemma:at'
353 +-0.384281 Technique b'postag:NN'
354 +-0.389220 O b'-1:lemma:until'
355 +-0.389220 O b'-1:word:until'
356 +-0.393047 O b'lemma[:1]:4'
357 +-0.393058 O b'+2:lemma:follow'
358 +-0.393999 O b'-1:lemma:control'
359 +-0.393999 O b'-1:word:control'
360 +-0.395395 O b'lemma[:2]:0.'
361 +-0.396201 O b'-2:lemma:minimal'
362 +-0.397167 O b'lemma:37'
363 +-0.397167 O b'lemma[:2]:37'
364 +-0.397167 O b'word:37'
365 +-0.397928 Gtype b'lemma[:1]:g'
366 +-0.404533 O b'lemma:glucose'
367 +-0.404533 O b'word:glucose'
368 +-0.408273 O b'lemma:30'
369 +-0.408273 O b'word:30'
370 +-0.409885 O b'-2:lemma:supplement'
371 +-0.411648 O b'+1:lemma:mm'
372 +-0.411648 O b'+1:word:mM'
373 +-0.412683 O b'+2:lemma:at'
374 +-0.413401 O b'-2:lemma:of'
375 +-0.419317 O b'-2:postag:RB'
376 +-0.420417 Temp b'isLower'
377 +-0.424626 O b'+1:lemma:phase'
378 +-0.424626 O b'+1:word:phase'
379 +-0.426973 Supp b'lemma[:1]:s'
380 +-0.427251 O b'lemma:wt'
381 +-0.427251 O b'lemma[:2]:wt'
382 +-0.434566 O b'+2:lemma:mid-log'
383 +-0.434970 O b'lemma[:1]:0'
384 +-0.440663 O b'-1:lemma:37'
385 +-0.440663 O b'-1:word:37'
386 +-0.441568 Temp b'-2:postag:NN'
387 +-0.443930 O b'lemma[:1]:\xce\xb4'
388 +-0.458877 O b'-1:lemma:mm'
389 +-0.458877 O b'-1:word:mM'
390 +-0.463660 Supp b'+1:postag:NNS'
391 +-0.463834 O b'lemma:media'
392 +-0.463834 O b'word:media'
393 +-0.464229 O b'-2:lemma:a'
394 +-0.482105 O b'-1:lemma:\xe2\x88\x86'
395 +-0.482105 O b'-1:word:\xe2\x88\x86'
396 +-0.485832 O b'+1:word:ChIP-exo'
397 +-0.487845 Supp b'hGreek'
398 +-0.488218 O b'-2:lemma:nh4cl'
399 +-0.497873 Supp b'+2:postag:NNP'
400 +-0.503699 O b'-2:lemma:genome'
401 +-0.505194 O b'+2:lemma:b'
402 +-0.506273 O b'-1:lemma:IP'
403 +-0.506273 O b'-1:word:IP'
404 +-0.506359 O b'-1:lemma:nsrr'
405 +-0.506359 O b'-1:word:NsrR'
406 +-0.506668 O b'lemma[:1]:L'
407 +-0.507329 O b'+1:postag:VBG'
408 +-0.511337 O b'+1:lemma:2.0'
409 +-0.511337 O b'+1:word:2.0'
410 +-0.514855 Gtype b'lemma[:1]:h'
411 +-0.521122 Med b'-1:postag:NN'
412 +-0.525468 O b'+1:lemma:until'
413 +-0.525468 O b'+1:word:until'
414 +-0.529295 Med b'postag[:1]:C'
415 +-0.531677 O b'+1:lemma:+'
416 +-0.531677 O b'+1:word:+'
417 +-0.535215 O b'lemma[:2]:ar'
418 +-0.537907 O b'lemma[:2]:gl'
419 +-0.550679 Air b'isLower'
420 +-0.551345 O b'-1:lemma:ml'
421 +-0.551345 O b'-1:word:ml'
422 +-0.555528 Supp b'+1:lemma:,'
423 +-0.555528 Supp b'+1:postag:,'
424 +-0.555528 Supp b'+1:word:,'
425 +-0.562680 Gtype b'lemma[:1]:s'
426 +-0.564487 O b'lemma[:1]:p'
427 +-0.574109 Agit b'symb'
428 +-0.582388 O b'-2:postag:DT'
429 +-0.587178 O b'-2:lemma:rpob'
430 +-0.589072 O b'+2:lemma:.'
431 +-0.589072 O b'+2:postag:.'
432 +-0.595658 O b'+1:lemma:g/l'
433 +-0.595658 O b'+1:word:g/L'
434 +-0.596954 O b'-2:postag:SYM'
435 +-0.599195 O b'-1:lemma:rpob'
436 +-0.599195 O b'-1:word:RpoB'
437 +-0.604186 O b'+2:lemma:250'
438 +-0.608815 Supp b'+1:lemma:-lrb-'
439 +-0.608815 Supp b'+1:word:-LRB-'
440 +-0.609845 Anti b'isUpper'
441 +-0.610591 Technique b'isNumber'
442 +-0.617650 O b'-1:lemma:co2'
443 +-0.617650 O b'-1:word:CO2'
444 +-0.622948 Supp b'+1:postag:-LRB-'
445 +-0.641001 Agit b'hUpper'
446 +-0.641001 Agit b'hLower'
447 +-0.645491 O b'+1:postag:IN'
448 +-0.647159 Gtype b'postag[:1]:V'
449 +-0.647159 Gtype b'postag[:2]:VB'
450 +-0.659968 O b'lemma[:2]:ri'
451 +-0.661117 Anti b'+2:postag:JJ'
452 +-0.661585 O b'-1:lemma:1'
453 +-0.661585 O b'-1:word:1'
454 +-0.666026 O b'lemma:mid-log'
455 +-0.666026 O b'word:mid-log'
456 +-0.667310 O b'-2:lemma:pahse'
457 +-0.680963 Anti b'+2:lemma:polyclonal'
458 +-0.685918 Supp b'-1:postag:NNP'
459 +-0.698925 Supp b'-2:postag:JJ'
460 +-0.704054 O b'lemma[:1]:d'
461 +-0.707980 Gtype b'-2:lemma:\xe2\x88\x86'
462 +-0.710771 O b'+2:postag:-RRB-'
463 +-0.713877 O b'+1:lemma:at'
464 +-0.713877 O b'+1:word:at'
465 +-0.722406 O b'-2:lemma::'
466 +-0.722643 O b'lemma:rifampicin'
467 +-0.722643 O b'word:rifampicin'
468 +-0.728674 Med b'-2:postag:VBN'
469 +-0.732685 Supp b'-2:lemma:treat'
470 +-0.742976 Anti b'+1:lemma:anti-fur'
471 +-0.742976 Anti b'+1:word:anti-Fur'
472 +-0.744610 O b'lemma[:1]:k'
473 +-0.745118 O b'-2:lemma:aerobically'
474 +-0.745353 O b'lemma:anaerobic'
475 +-0.747402 O b'+1:lemma:in'
476 +-0.747402 O b'+1:word:in'
477 +-0.757568 O b'+2:lemma:tag'
478 +-0.766874 O b'lemma[:1]:I'
479 +-0.768100 O b'-1:postag::'
480 +-0.770179 O b'-1:lemma:2'
481 +-0.770179 O b'-1:word:2'
482 +-0.779152 Gtype b'isNumber'
483 +-0.782788 Technique b'isLower'
484 +-0.789896 Phase b'hUpper'
485 +-0.789896 Phase b'hLower'
486 +-0.799453 O b'lemma[:1]:n'
487 +-0.803431 pH b'isLower'
488 +-0.809089 O b'-2:postag::'
489 +-0.822848 O b'-2:lemma:dpd'
490 +-0.835295 Supp b'lemma[:2]:an'
491 +-0.862215 O b'-2:lemma:phase'
492 +-0.867939 Gtype b'lemma[:1]:c'
493 +-0.884275 Supp b'+2:lemma:glucose'
494 +-0.904915 Gtype b'lemma[:1]:a'
495 +-0.913465 O b'-1:lemma:vol'
496 +-0.913465 O b'-1:word:vol'
497 +-0.913465 O b'-2:lemma:1/100'
498 +-0.913465 O b'+2:lemma:1m'
499 +-0.930521 Supp b'+2:postag:CD'
500 +-0.948465 O b'-2:lemma:rifampicin'
501 +-0.960959 O b'-2:lemma:until'
502 +-0.973929 O b'+2:lemma:+'
503 +-0.989863 OD b'+1:postag:NN'
504 +-0.993812 Med b'symb'
505 +-1.066464 Gtype b'+2:lemma:cra'
506 +-1.076108 Gtype b'isUpper'
507 +-1.103829 O b'lemma[:2]:ae'
508 +-1.145933 Supp b'-2:lemma:grow'
509 +-1.165009 O b'+2:lemma:then'
510 +-1.184152 Phase b'postag[:1]:J'
511 +-1.184152 Phase b'postag[:2]:JJ'
512 +-1.216149 Gtype b'lemma[:1]:r'
513 +-1.241717 O b'+2:lemma:rifampicin'
514 +-1.245965 Gversion b'isLower'
515 +-1.248268 O b'lemma[:2]:30'
516 +-1.250676 O b'+1:lemma:1'
517 +-1.250676 O b'+1:word:1'
518 +-1.261488 Phase b'postag:JJ'
519 +-1.293329 Anti b'postag:NNP'
520 +-1.311043 Supp b'lemma[:1]:c'
521 +-1.361957 O b'-1:lemma:sample'
522 +-1.486502 Phase b'-1:postag:JJ'
523 +-1.506572 Supp b'+2:lemma:1'
524 +-1.518267 Supp b'+2:lemma:2'
525 +-1.561744 O b'-2:lemma:0.3'
526 +-1.581023 O b'-2:lemma:media'
527 +-1.615538 Med b'-2:lemma:grow'
528 +-1.662192 O b'+1:lemma:2'
529 +-1.662192 O b'+1:word:2'
530 +-1.765435 Med b'+2:postag:VBN'
531 +-1.789575 Supp b'+2:lemma:fructose'
532 +-1.949737 O b'postag:VBP'
533 +-2.303519 O b'-1:postag:VBG'
534 +-2.349341 O b'lemma[:2]:fl'
535 +-2.351661 OD b'+2:lemma:aerobically'
536 +-2.480527 O b'-1:lemma::'
537 +-2.480527 O b'-1:word::'
538 +-2.674521 O b'-1:lemma:_'
539 +-2.674521 O b'-1:word:_'
540 +