report_training-data-set-70.fStopWords_False.fSymbols_False.txt
16.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.12296384618516742, 'c2': 0.001984598547992556}
best CV score:0.7223961938319273
model size: 0.07M
Flat F1: 0.6328175528971997
precision recall f1-score support
OD 1.000 0.211 0.348 57
Sample 0.000 0.000 0.000 0
Technique 0.000 0.000 0.000 1
Med 1.000 0.933 0.966 30
Temp 1.000 0.615 0.762 13
Serie 0.000 0.000 0.000 0
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 7
Phase 0.750 0.923 0.828 13
Air 0.000 0.000 0.000 0
Anti 1.000 0.778 0.875 18
Strain 0.000 0.000 0.000 0
Gtype 0.884 0.551 0.679 69
Supp 0.582 0.629 0.605 62
Gversion 0.750 1.000 0.857 6
avg / total 0.831 0.569 0.633 276
Top likely transitions:
OD -> OD 6.108207
Supp -> Supp 5.784131
Gtype -> Gtype 5.685374
Med -> Med 5.675327
Temp -> Temp 5.658789
Anti -> Anti 5.031069
Gversion -> Gversion 4.774936
O -> O 4.746806
Phase -> Phase 3.575687
O -> Gtype 1.949659
OD -> Phase 1.047851
O -> Gversion 0.297969
O -> Supp 0.232142
O -> OD 0.059228
O -> Anti 0.035022
O -> Med 0.022142
Supp -> Phase -0.082451
Supp -> Med -0.113938
Gtype -> Anti -0.177348
Vess -> O -0.195425
Phase -> O -0.308432
Anti -> O -0.319054
Phase -> Supp -0.418345
O -> Technique -0.434100
Temp -> O -0.447292
Gtype -> O -0.546043
Supp -> O -0.756881
OD -> O -0.842201
Technique -> O -0.857966
Med -> O -0.892055
Substrain -> O -0.970661
O -> Phase -1.142533
Temp -> Med -1.246193
O -> Temp -1.298927
Phase -> OD -1.766899
Med -> Supp -2.805768
Top unlikely transitions:
OD -> OD 6.108207
Supp -> Supp 5.784131
Gtype -> Gtype 5.685374
Med -> Med 5.675327
Temp -> Temp 5.658789
Anti -> Anti 5.031069
Gversion -> Gversion 4.774936
O -> O 4.746806
Phase -> Phase 3.575687
O -> Gtype 1.949659
OD -> Phase 1.047851
O -> Gversion 0.297969
O -> Supp 0.232142
O -> OD 0.059228
O -> Anti 0.035022
O -> Med 0.022142
Supp -> Phase -0.082451
Supp -> Med -0.113938
Gtype -> Anti -0.177348
Vess -> O -0.195425
Phase -> O -0.308432
Anti -> O -0.319054
Phase -> Supp -0.418345
O -> Technique -0.434100
Temp -> O -0.447292
Gtype -> O -0.546043
Supp -> O -0.756881
OD -> O -0.842201
Technique -> O -0.857966
Med -> O -0.892055
Substrain -> O -0.970661
O -> Phase -1.142533
Temp -> Med -1.246193
O -> Temp -1.298927
Phase -> OD -1.766899
Med -> Supp -2.805768
Top positive:
9.769521 OD b'+1:lemma:stationary'
6.573116 Gtype b'-1:lemma:express'
6.473813 O b'lemma:at'
6.401320 Gtype b'+1:lemma:chip'
5.580269 Med b'word:MOPS'
5.580269 Med b'lemma:MOPS'
5.309599 Med b'-1:lemma:ml'
5.105098 Gtype b'+1:lemma:knock-out'
5.053371 Anti b'+1:lemma:antibody'
4.833626 O b'+1:lemma:until'
4.685736 Supp b'word:nitrate'
4.685736 Supp b'lemma:nitrate'
4.578724 Temp b'-1:lemma:sample'
4.338845 O b'+1:postag:VBP'
4.326050 Gversion b'word:NC'
4.326050 Gversion b'lemma:nc'
4.207716 O b'word:.'
4.207716 O b'lemma:.'
3.987856 O b'lemma:chip'
3.914979 Phase b'-1:lemma:mid-log'
3.836351 O b'+1:postag:NNP'
3.819222 O b'word:-'
3.819222 O b'lemma:-'
3.774330 Gtype b'-1:lemma:wild'
3.733417 Supp b'word:2h'
3.733417 Supp b'lemma:2h'
3.733417 Supp b'-1:lemma:additional'
3.729860 O b'word::'
3.729860 O b'lemma::'
3.704485 O b'-1:lemma:glucose'
3.673897 O b'-1:lemma:-'
3.584033 Phase b'word:mid-log'
3.584033 Phase b'lemma:mid-log'
3.580641 Gtype b'-1:lemma:k-12'
3.329333 Gtype b'word:Flag-tag'
3.329333 Gtype b'lemma:flag-tag'
3.329333 Gtype b'-1:lemma:c-terminal'
3.313956 O b'+1:lemma:coli'
3.198151 Supp b'-1:lemma:vol'
3.190691 O b'-1:lemma:lb'
3.168840 Supp b'-1:lemma:with'
3.134166 Gtype b'word:wild'
3.134166 Gtype b'lemma:wild'
3.092894 O b'word:K-12'
3.092894 O b'lemma:k-12'
3.060219 O b'word:MG1655'
3.060219 O b'lemma:mg1655'
3.020191 Supp b'+1:lemma:\xc2\xb5m'
2.959207 Gversion b'word:ASM584v2'
2.959207 Gversion b'lemma:asm584v2'
2.895464 Gversion b'word:000913'
2.895464 Gversion b'lemma:000913'
2.892382 Vess b'word:flask'
2.892382 Vess b'lemma:flask'
2.892382 Vess b'-1:lemma:warm'
2.889484 Temp b'+1:lemma:and'
2.864457 OD b'word:OD450'
2.864457 OD b'lemma:od450'
2.844361 O b'+1:lemma:od600'
2.760592 O b'word:Custom'
2.760592 O b'lemma:Custom'
2.760592 O b'+1:lemma:anti-fur'
2.749995 Supp b'-1:lemma:without'
2.744252 Phase b'word:exponential'
2.744252 Phase b'lemma:exponential'
2.724404 Phase b'word:stationary'
2.724404 Phase b'lemma:stationary'
2.704170 Supp b'word:glucose'
2.704170 Supp b'lemma:glucose'
2.702637 Med b'-1:lemma:LB'
2.698915 Med b'word:LB'
2.648788 O b'+1:lemma:fructose'
2.522068 Supp b'word:arginine'
2.522068 Supp b'lemma:arginine'
2.521702 O b'+1:lemma:0.2'
2.510135 Gtype b'-1:postag:VBG'
2.500086 Med b'word:L'
2.500086 Med b'lemma:L'
2.500086 Med b'+1:lemma:broth'
2.438147 Gversion b'-1:lemma:nc'
2.435933 O b'word:with'
2.435933 O b'lemma:with'
2.424051 OD b'word:OD600'
2.424051 OD b'lemma:od600'
2.397049 Gversion b'word:U00096'
2.397049 Gversion b'lemma:u00096'
2.397049 Gversion b'+1:lemma:.2'
2.393316 Temp b'-1:lemma:37'
2.384921 Med b'+1:lemma:g/l'
2.343273 O b'lemma:for'
2.287420 O b'-1:lemma:\xc2\xb0c'
2.277217 Supp b'word:acetate'
2.277217 Supp b'lemma:acetate'
2.240391 Gtype b'word:WT'
2.227767 O b'word:Cells'
2.223138 Temp b'+1:lemma:\xc2\xb0c'
2.207530 Gtype b'word:PK4854'
2.207530 Gtype b'lemma:pk4854'
2.204777 Technique b'word:RNA-Seq'
2.200781 Anti b'-1:lemma:polymerase'
2.180743 O b'-1:lemma:phase'
2.168855 Supp b'+1:lemma:at'
2.168107 Supp b'+1:lemma:arginine'
2.152384 O b'word:Crosslink'
2.152384 O b'lemma:Crosslink'
2.151825 O b'word:for'
2.146611 Supp b'+1:lemma:hour'
2.145369 Phase b'+1:lemma:for'
2.130614 Supp b'+1:lemma:and'
2.127595 Temp b'word:\xc2\xb0C'
2.127595 Temp b'lemma:\xc2\xb0c'
2.118493 Med b'word:broth'
2.118493 Med b'lemma:broth'
2.118493 Med b'-1:lemma:L'
2.100174 Supp b'word:Adenine'
2.100174 Supp b'lemma:Adenine'
2.099073 Supp b'word:hours'
2.099073 Supp b'lemma:hour'
2.091461 Technique b'lemma:rna-seq'
2.081270 O b'+1:lemma:0.4'
2.069166 OD b'word:0.3'
2.069166 OD b'lemma:0.3'
2.025934 O b'-1:lemma:tag'
2.016740 Med b'word:M63'
2.016740 Med b'lemma:m63'
1.951269 Supp b'-1:lemma:final'
1.951255 Anti b'+1:lemma:polyclonal'
1.940092 O b'+1:postag:CD'
1.937643 O b'word:supplemented'
1.937643 O b'lemma:supplement'
1.928437 Gtype b'word:oxyR-8myc'
1.928437 Gtype b'lemma:oxyr-8myc'
1.925371 Gtype b'word:soxR-8myc'
1.925371 Gtype b'lemma:soxr-8myc'
1.905981 Supp b'-1:lemma:+'
1.904247 Gversion b'word:.2'
1.904247 Gversion b'lemma:.2'
1.904247 Gversion b'-1:lemma:u00096'
1.895301 Supp b'-1:lemma::'
1.888517 Med b'+1:lemma:minimal'
1.888285 O b'+1:lemma:nitrate'
1.887992 O b'+1:lemma:anaerobic'
1.887095 Phase b'word:phase'
1.887095 Phase b'lemma:phase'
1.873844 Anti b'word:\xcf\x8332'
1.873844 Anti b'lemma:\xcf\x8332'
1.858813 Med b'lemma:LB'
1.856133 O b'-1:lemma:-80'
1.850543 Supp b'+1:lemma:be'
1.822229 Supp b'word:leucine'
1.822229 Supp b'lemma:leucine'
1.818630 O b'-1:lemma:flag-tag'
1.810937 O b'-1:lemma:_'
1.808829 Supp b'word:fructose'
1.808829 Supp b'lemma:fructose'
1.805654 OD b'word:A'
1.799136 Gversion b'lemma:chip-seq'
1.792100 Phase b'-1:lemma:until'
1.787880 Anti b'-1:lemma:monoclonal'
1.786790 Anti b'word:anti-myc'
1.786790 Anti b'lemma:anti-myc'
1.784685 Substrain b'word:MG1655star'
1.784685 Substrain b'lemma:mg1655star'
1.778678 Technique b'lemma:chip-seq'
1.778353 O b'word:<Air>'
1.751251 Supp b'-1:lemma:1mm'
1.746035 Supp b'+1:lemma:1/100'
1.733606 Anti b'+1:lemma:from'
1.729502 Technique b'word:ChIP-Seq'
1.725373 O b'+1:lemma:mid-log'
1.712485 Gversion b'word:ChIP-Seq'
1.709357 O b'-1:postag:NNS'
1.693666 O b'word:-RRB-'
1.693666 O b'lemma:-rrb-'
1.656579 Supp b'word:rifampicin'
1.656579 Supp b'lemma:rifampicin'
1.654463 O b'lemma:to'
1.646916 Anti b'word:anti-RpoB'
1.646916 Anti b'lemma:anti-rpob'
1.641270 Vess b'-1:postag:VBN'
1.634955 Anti b'word:SeqA'
1.634955 Anti b'lemma:seqa'
1.626876 Med b'+1:lemma:-lrb-'
1.621826 O b'lemma:<air>'
1.616807 Supp b'word:tryptophan'
1.616807 Supp b'lemma:tryptophan'
1.616807 Supp b'-1:lemma:mg/l'
1.584459 O b'lemma:anaerobic'
1.571612 Med b'word:medium'
1.571612 Med b'lemma:medium'
1.550300 Vess b'+1:lemma:at'
1.549343 O b'word:B'
1.547972 Gtype b'-1:lemma:\xe2\x88\x86'
1.546575 OD b'+1:lemma:oxyr-8myc'
1.546406 O b'+1:lemma:aerobic'
1.528557 Supp b'+1:lemma:300'
1.505854 Temp b'-1:lemma:at'
1.482950 Supp b'-1:postag:IN'
1.477298 Med b'-1:lemma:glucose'
1.476212 O b'-1:lemma:media'
Top negative:
-0.033522 O b'lemma:20'
-0.038038 Anti b'isupper()'
-0.038347 OD b'isupper()'
-0.045438 O b'word:phosphate'
-0.045438 O b'lemma:phosphate'
-0.046287 O b'word:LB'
-0.047486 Gtype b'-1:lemma:,'
-0.047486 Gtype b'-1:postag:,'
-0.047973 O b'+1:lemma:culture'
-0.048635 Med b'-1:postag:NN'
-0.048786 Supp b'-1:lemma:,'
-0.048786 Supp b'-1:postag:,'
-0.049214 Temp b'-1:postag:NN'
-0.055607 Supp b'word:vol'
-0.055607 Supp b'lemma:vol'
-0.055607 Supp b'-1:lemma:1/100'
-0.057967 O b'word:150'
-0.057967 O b'lemma:150'
-0.057967 O b'+1:lemma:mg/ml'
-0.065780 Med b'+1:lemma:media'
-0.067419 O b'-1:lemma:rifampicin'
-0.068390 O b'+1:lemma:medium'
-0.069566 Gtype b'+1:postag:JJ'
-0.069740 Supp b'word:10'
-0.069740 Supp b'lemma:10'
-0.072097 Supp b'-1:lemma:;'
-0.074848 Supp b'-1:lemma:.'
-0.074848 Supp b'-1:postag:.'
-0.075753 O b'word:min'
-0.075753 O b'lemma:min'
-0.076584 Supp b'-1:lemma:-lrb-'
-0.079109 Supp b'word:uM'
-0.079109 Supp b'lemma:um'
-0.079109 Supp b'-1:lemma:250'
-0.083627 O b'-1:lemma:m9'
-0.084086 Anti b'-1:postag:NN'
-0.084792 Supp b'-1:postag:-LRB-'
-0.085223 Med b'-1:postag:IN'
-0.090978 O b'-1:postag:VBN'
-0.091287 Supp b'-1:lemma:10'
-0.093086 Gtype b'isupper()'
-0.097632 O b'word:1M'
-0.097632 O b'lemma:1m'
-0.098995 O b'-1:lemma:-lrb-'
-0.102411 O b'-1:lemma:Fur'
-0.109527 Gtype b'+1:postag:IN'
-0.110624 O b'word:7.6'
-0.110624 O b'lemma:7.6'
-0.110624 O b'-1:lemma:ph'
-0.110624 O b'+1:lemma:;'
-0.115984 O b'-1:lemma:and'
-0.116488 O b'-1:lemma:soxr-8myc'
-0.125176 O b'+1:lemma:min'
-0.130318 Phase b'-1:lemma:at'
-0.132219 O b'word:crp'
-0.132219 O b'lemma:crp'
-0.135378 Supp b'word:,'
-0.135378 Supp b'lemma:,'
-0.135758 Supp b'word:250'
-0.135758 Supp b'lemma:250'
-0.135758 Supp b'+1:lemma:um'
-0.145733 Supp b'word:and'
-0.145733 Supp b'lemma:and'
-0.148126 O b'-1:lemma:.'
-0.148126 O b'-1:postag:.'
-0.149156 O b'-1:lemma:until'
-0.157548 O b'+1:lemma:\xc2\xb5m'
-0.160930 O b'+1:lemma:tag'
-0.162355 O b'+1:lemma:phase'
-0.193262 O b'-1:lemma:2'
-0.194523 O b'-1:lemma:43'
-0.194585 OD b'+1:lemma:~'
-0.205225 O b'-1:postag:CC'
-0.217098 Temp b'+1:postag:IN'
-0.217465 O b'word:%'
-0.217465 O b'lemma:%'
-0.221369 Supp b'+1:lemma:acetate'
-0.229086 O b'word:43'
-0.229086 O b'lemma:43'
-0.230011 O b'-1:lemma:delta'
-0.230917 O b'+1:postag:NNS'
-0.233042 Supp b'+1:postag:VBN'
-0.234678 Med b'+1:postag:CC'
-0.239379 O b'word:pH'
-0.239379 O b'lemma:ph'
-0.239379 O b'+1:lemma:7.6'
-0.240087 O b'+1:lemma:mg1655'
-0.243533 O b'word:0.1'
-0.243533 O b'lemma:0.1'
-0.247022 Anti b'+1:postag:JJ'
-0.254824 O b'-1:lemma:sodium'
-0.257080 O b'+1:lemma:dissolve'
-0.258767 Supp b'-1:postag:NN'
-0.262289 O b'+1:postag:IN'
-0.268651 O b'-1:lemma:od600'
-0.269420 O b'-1:postag:-LRB-'
-0.278544 O b'-1:lemma:0.2'
-0.280204 O b'word:of'
-0.280204 O b'lemma:of'
-0.289904 Gtype b'word:,'
-0.289904 Gtype b'lemma:,'
-0.293848 O b'+1:lemma:sample'
-0.302903 O b'-1:lemma:mid-log'
-0.308508 O b'+1:lemma:1m'
-0.316766 O b'lemma:sample'
-0.317700 O b'-1:lemma:1m'
-0.317700 O b'+1:lemma:ph'
-0.325320 O b'-1:postag:IN'
-0.347306 O b'-1:lemma:contain'
-0.353650 O b'word:media'
-0.353650 O b'lemma:media'
-0.361308 O b'+1:lemma:-rrb-'
-0.363822 O b'-1:lemma:mm'
-0.370286 O b'-1:lemma:analyze'
-0.371370 O b'word:medium'
-0.371370 O b'lemma:medium'
-0.371526 O b'word:0.2'
-0.371526 O b'lemma:0.2'
-0.388795 Med b'+1:lemma:m9'
-0.389065 O b'-1:lemma:fresh'
-0.396824 Phase b'isupper()'
-0.407829 O b'word:methanol'
-0.407829 O b'lemma:methanol'
-0.409652 O b'word:dissolved'
-0.409652 O b'lemma:dissolve'
-0.420228 Supp b'word:.'
-0.420228 Supp b'lemma:.'
-0.420585 O b'-1:lemma:final'
-0.435371 O b'word:mid-log'
-0.435371 O b'lemma:mid-log'
-0.449869 Med b'+1:postag:CD'
-0.453920 O b'-1:lemma:or'
-0.462008 O b'+1:lemma:in'
-0.464988 O b'word:rifampicin'
-0.464988 O b'lemma:rifampicin'
-0.468866 O b'word:30'
-0.468866 O b'lemma:30'
-0.490303 O b'+1:lemma:-lrb-'
-0.521128 O b'-1:lemma:of'
-0.549971 O b'+1:postag:-RRB-'
-0.552220 O b'-1:lemma:minimal'
-0.554112 Supp b'+1:postag:CD'
-0.554855 Gtype b'islower()'
-0.556777 O b'word:OD600'
-0.556777 O b'lemma:od600'
-0.558779 O b'+1:lemma:antibody'
-0.581083 Med b'islower()'
-0.588156 Supp b'+1:postag:JJ'
-0.596135 O b'+1:lemma:.'
-0.596135 O b'+1:postag:.'
-0.603800 O b'word:glucose'
-0.603800 O b'lemma:glucose'
-0.604194 Temp b'-1:postag:CD'
-0.610067 O b'word:minimal'
-0.610067 O b'lemma:minimal'
-0.613419 Supp b'isupper()'
-0.618968 O b'word:phase'
-0.618968 O b'lemma:phase'
-0.635118 O b'-1:lemma:ml'
-0.635236 O b'-1:lemma:dissolve'
-0.635236 O b'+1:lemma:methanol'
-0.649664 O b'word:37'
-0.649664 O b'lemma:37'
-0.653888 Med b'+1:postag:NN'
-0.672366 Temp b'islower()'
-0.679024 O b'word:cra'
-0.697555 OD b'+1:postag:NN'
-0.726875 Supp b'islower()'
-0.736892 O b'-1:lemma:growth'
-0.744187 O b'-1:lemma:phosphate'
-0.779776 O b'+1:lemma:minimal'
-0.789382 O b'-1:lemma:rna'
-0.925215 O b'+1:lemma:supplement'
-0.938635 O b'+1:lemma:contain'
-0.959966 O b'+1:lemma:1/100'
-1.005759 Gtype b'isNumber()'
-1.020571 O b'+1:postag:-LRB-'
-1.023100 O b'+1:lemma:g/l'
-1.034129 O b'+1:lemma:chip'
-1.057527 O b'+1:lemma:0.3'
-1.097957 O b'-1:lemma:37'
-1.274500 O b'lemma:tag'
-1.286152 O b'-1:postag::'
-1.303774 O b'-1:lemma:the'
-1.380700 O b'+1:lemma:oxyr-8myc'
-1.396643 O b'-1:lemma:k-12'
-1.508771 O b'+1:lemma:at'
-1.634199 Supp b'+1:lemma:,'
-1.634199 Supp b'+1:postag:,'
-1.651060 O b'+1:lemma:for'
-1.669291 O b'-1:lemma:30'
-1.845334 Gversion b'islower()'
-1.912631 Supp b'+1:postag:-LRB-'
-1.916490 Supp b'+1:lemma:-lrb-'
-2.376915 O b'+1:lemma:<air>'
-2.666827 O b'-1:lemma:vol'
-2.728484 OD b'-1:postag:CD'
-3.072458 O b'-1:lemma:sample'
-3.939533 O b'-1:lemma::'
-4.411137 Phase b'-1:postag:JJ'