report_Run7_v2.txt
16.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.34336652460986966, 'c2': 0.023606219975479584}
best CV score:0.8168890593472544
model size: 0.09M
Flat F1: 0.8128223676488701
precision recall f1-score support
OD 0.652 0.405 0.500 37
pH 1.000 1.000 1.000 12
Technique 0.917 1.000 0.957 22
Med 1.000 0.860 0.925 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.789 0.882 19
Air 0.793 0.742 0.767 62
Anti 0.900 1.000 0.947 9
Strain 1.000 1.000 1.000 1
Gtype 0.933 0.783 0.851 106
Substrain 0.000 0.000 0.000 1
Supp 0.887 0.691 0.777 136
Gversion 0.000 0.000 0.000 0
avg / total 0.885 0.758 0.813 480
Top likely transitions:
OD -> OD 6.265824
Agit -> Agit 5.869478
Temp -> Temp 5.488519
Med -> Med 5.144823
Air -> Air 4.771777
Supp -> Supp 4.593228
Anti -> Anti 4.550151
Gversion -> Gversion 4.348826
Gtype -> Gtype 4.225492
Technique -> Technique 3.988233
O -> O 3.774876
Phase -> Phase 3.702756
Gtype -> Supp 1.903024
pH -> pH 1.848213
O -> Supp 1.594685
O -> Technique 0.980723
Air -> O 0.980311
O -> Gtype 0.683784
Med -> O 0.581107
Substrain -> Gtype 0.524869
Technique -> Air 0.483765
OD -> Phase 0.447540
Supp -> O 0.422763
Temp -> O 0.383324
Gtype -> pH 0.198547
O -> Anti 0.179783
Gtype -> Air 0.052812
O -> Agit -0.004958
Technique -> pH -0.041609
Anti -> O -0.097695
Gtype -> O -0.117083
Technique -> O -0.151878
Phase -> OD -0.322970
O -> Air -0.457974
OD -> O -0.476767
Substrain -> O -0.946090
Med -> Supp -0.949223
Agit -> O -1.116660
Top unlikely transitions:
OD -> OD 6.265824
Agit -> Agit 5.869478
Temp -> Temp 5.488519
Med -> Med 5.144823
Air -> Air 4.771777
Supp -> Supp 4.593228
Anti -> Anti 4.550151
Gversion -> Gversion 4.348826
Gtype -> Gtype 4.225492
Technique -> Technique 3.988233
O -> O 3.774876
Phase -> Phase 3.702756
Gtype -> Supp 1.903024
pH -> pH 1.848213
O -> Supp 1.594685
O -> Technique 0.980723
Air -> O 0.980311
O -> Gtype 0.683784
Med -> O 0.581107
Substrain -> Gtype 0.524869
Technique -> Air 0.483765
OD -> Phase 0.447540
Supp -> O 0.422763
Temp -> O 0.383324
Gtype -> pH 0.198547
O -> Anti 0.179783
Gtype -> Air 0.052812
O -> Agit -0.004958
Technique -> pH -0.041609
Anti -> O -0.097695
Gtype -> O -0.117083
Technique -> O -0.151878
Phase -> OD -0.322970
O -> Air -0.457974
OD -> O -0.476767
Substrain -> O -0.946090
Med -> Supp -0.949223
Agit -> O -1.116660
Top positive:
6.357696 Air b'word:Aerobic'
5.077540 Air b'lemma:anaerobic'
4.552250 Anti b'-2:lemma:antibody'
4.360948 O b'-2:lemma:_'
4.109534 Supp b'lemma:nh4cl'
4.096460 Technique b'word:ChIP-Seq'
3.897866 O b'-2:lemma:flagtag'
3.611278 Gtype b'-2:lemma:genotype/variation'
3.568906 O b'postag:IN'
3.460895 O b'lemma:_'
3.460895 O b'word:_'
3.428391 O b'lemma:1'
3.428391 O b'word:1'
3.423275 Technique b'lemma:rna-seq'
3.314745 Med b'word:LB'
3.281392 Technique b'word:ChIP-exo'
3.278176 O b'-1:lemma:ChIP-exo'
3.237196 O b'lemma:rpob'
3.237196 O b'word:RpoB'
3.225531 Supp b'lemma:nitrate'
3.225531 Supp b'word:nitrate'
3.161794 O b'postag::'
3.139068 Air b'word:Anaerobic'
3.139066 O b'+2:lemma:\xc2\xb0c'
3.077701 Gtype b'lemma:type'
3.077701 Gtype b'word:type'
3.075219 Technique b'word:ChIPSeq'
2.840391 O b'lemma:2'
2.840391 O b'word:2'
2.815117 Gtype b'-2:lemma:delta'
2.722376 O b'lemma:3'
2.722376 O b'word:3'
2.706348 O b'word:Cra'
2.634028 O b'+1:postag:RB'
2.495581 Substrain b'lemma:mg1655'
2.495581 Substrain b'word:MG1655'
2.448736 Med b'+2:lemma:with'
2.443674 Gtype b'-2:lemma:affyexp'
2.381962 O b'lemma:.'
2.381962 O b'postag:.'
2.381962 O b'word:.'
2.367230 Strain b'lemma:k-12'
2.367230 Strain b'word:K-12'
2.361973 Gtype b'lemma:wild-type'
2.355120 Phase b'lemma:mid-log'
2.355120 Phase b'word:mid-log'
2.319375 Gtype b'-2:lemma:genotype'
2.302990 Supp b'lemma:no3'
2.302990 Supp b'word:NO3'
2.291837 Med b'lemma:MOPS'
2.291837 Med b'word:MOPS'
2.228985 Technique b'lemma:ChIP-exo'
2.205076 Supp b'-2:lemma:supplement'
2.202918 Supp b'+1:lemma:\xc2\xb5m'
2.202918 Supp b'+1:word:\xc2\xb5M'
2.172606 Gtype b'+1:lemma:type'
2.172606 Gtype b'+1:word:type'
2.164705 Gtype b'+2:lemma:glucose'
2.129804 Gtype b'-1:lemma:\xe2\x88\x86'
2.129804 Gtype b'-1:word:\xe2\x88\x86'
2.103445 Anti b'+2:lemma:antibody'
2.089681 Air b'postag:RB'
2.074019 Technique b'+2:lemma:ph5'
2.072624 O b'+2:lemma:fructose'
2.067658 OD b'lemma:od600'
2.067658 OD b'word:OD600'
2.038104 Supp b'-2:lemma:shake'
2.013357 Supp b'lemma:Iron'
2.013357 Supp b'word:Iron'
2.013357 Supp b'+1:word:Deficient'
2.013357 Supp b'-2:lemma:Anaerobic'
1.980749 O b'+1:word:ChIP-Seq'
1.962748 O b'postag:CC'
1.930693 Supp b'lemma:pq'
1.930693 Supp b'word:PQ'
1.924374 O b'-1:word:Aerobic'
1.923374 O b'word:A'
1.922702 O b'lemma:b'
1.922702 O b'word:B'
1.917919 Supp b'-2:lemma:purr'
1.903045 O b'+1:postag:NNP'
1.888802 O b'+1:postag:VBP'
1.886279 Gtype b'lemma:wt'
1.882619 Phase b'-2:lemma:phase'
1.877260 Phase b'lemma:stationary'
1.877260 Phase b'word:stationary'
1.875788 O b'-2:lemma:~'
1.867119 O b'postag:DT'
1.857025 Technique b'lemma:chipseq'
1.833096 O b'postag:VBN'
1.829071 Gversion b'-2:lemma:nc'
1.818881 Gtype b'word:WT'
1.787639 O b'lemma:culture'
1.786961 O b'-1:lemma:tag'
1.786604 Gtype b'postag:-RRB-'
1.737297 Supp b'-1:word:Cra'
1.734300 O b'-2:lemma:mg/ml'
1.722597 O b'-2:lemma:medium'
1.711409 Technique b'lemma:rnaseq'
1.711409 Technique b'word:RNASeq'
1.696030 O b'lemma:chip'
1.694223 O b'lemma:-'
1.694223 O b'word:-'
1.694171 Supp b'-2:lemma:for'
1.691174 O b'lemma:Custom'
1.691174 O b'word:Custom'
1.681320 O b'word:Lrp'
1.673790 Gtype b'postag:JJ'
1.647891 O b'isLower'
1.643397 Gtype b'lemma:nsrr'
1.643397 Gtype b'word:NsrR'
1.632308 Supp b'lemma:arginine'
1.604616 O b'lemma:a'
1.600294 Gtype b'lemma:\xce\xb4cra'
1.596463 Gtype b'word:\xce\x94cra'
1.580527 Med b'+2:lemma:b2'
1.575286 O b'-1:lemma:lb'
1.556401 Temp b'-1:word:sample'
1.544845 O b'+2:lemma:polyclonal'
1.543849 Med b'isUpper'
1.541896 Supp b'+2:lemma:rifampicin'
1.535125 Supp b'lemma:glucose'
1.535125 Supp b'word:glucose'
1.524619 pH b'+1:postag:CD'
1.521913 Supp b'-1:postag:CC'
1.517190 O b'+2:lemma:chipseq'
1.503740 Supp b'-2:lemma:Lrp'
1.457179 O b'-2:lemma:co2'
1.447917 O b'-1:lemma:anaerobic'
1.447594 Temp b'-1:lemma:sample'
1.439709 Gtype b'lemma:\xe2\x88\x86'
1.439709 Gtype b'word:\xe2\x88\x86'
1.437966 Temp b'isNumber'
1.431128 Supp b'lemma:acetate'
1.431128 Supp b'word:acetate'
1.427889 Supp b'+1:lemma:2'
1.427889 Supp b'+1:word:2'
1.392397 Gversion b'lemma:nc'
1.392397 Gversion b'word:NC'
1.390556 pH b'lemma:ph5'
1.390556 pH b'+1:lemma:.5'
1.390556 pH b'word:pH5'
1.390556 pH b'+1:word:.5'
1.384470 Supp b'lemma:iptg'
1.384470 Supp b'word:IPTG'
1.365812 O b'-2:lemma:\xe2\x88\x86'
1.364153 Air b'+2:postag:NNP'
1.333446 Gtype b'+1:lemma:with'
1.333446 Gtype b'+1:word:with'
1.324361 Gtype b'-2:postag::'
1.306165 O b'+2:postag:JJ'
1.289867 Gtype b'-2:postag:DT'
1.281577 Gversion b'lemma:chip-seq'
1.270110 Gversion b'+2:lemma:000913'
1.267124 Vess b'lemma:flask'
1.267124 Vess b'-1:lemma:warm'
1.267124 Vess b'word:flask'
1.267124 Vess b'-1:word:warmed'
1.267124 Vess b'-2:lemma:pre'
1.267124 Vess b'+2:lemma:43'
1.263332 O b'+1:postag::'
1.249814 O b'isNumber'
1.246752 Air b'-2:postag:CD'
1.242966 Supp b'+1:lemma:1'
1.242966 Supp b'+1:word:1'
1.216506 Supp b'lemma:fructose'
1.216506 Supp b'word:fructose'
1.209634 Med b'lemma:media'
1.209634 Med b'word:media'
1.202005 O b'-1:lemma:media'
1.202005 O b'-1:word:media'
1.199774 Med b'+2:lemma:medium'
1.187094 Gtype b'-1:lemma:rpob'
1.187094 Gtype b'-1:word:RpoB'
1.183792 Supp b'lemma:dpd'
1.183792 Supp b'word:DPD'
1.180639 Air b'+2:lemma:37'
1.176560 Anti b'+1:lemma:antibody'
1.176560 Anti b'+1:word:antibody'
1.169611 O b'-1:word:tag'
1.165769 Gtype b'lemma:flag-tag'
1.165769 Gtype b'-1:lemma:c-terminal'
1.165769 Gtype b'word:Flag-tag'
1.165769 Gtype b'-1:word:C-terminal'
1.164325 Agit b'-2:postag:IN'
1.148652 O b'-2:lemma:mid-log'
1.147844 Gtype b'postag:NN'
1.145961 Phase b'isLower'
1.133253 O b'-2:lemma:ChIP-Seq'
1.129704 Temp b'-2:lemma:\xcf\x8332'
1.125804 Med b'+1:lemma:0.4'
1.125804 Med b'+1:word:0.4'
1.122954 Temp b'isUpper'
1.117074 Temp b'lemma:43'
1.117074 Temp b'word:43'
1.113876 Med b'+1:postag:JJ'
1.094450 Air b'-1:lemma:ChIP-Seq'
1.094450 Air b'-1:word:ChIP-Seq'
1.088461 Air b'-2:lemma:IP'
1.083576 Supp b'+1:lemma:_'
Top negative:
-0.031330 O b'-1:word:~'
-0.033235 Air b'+2:lemma:and'
-0.033826 Air b'-2:lemma:n2'
-0.033868 Temp b'+1:lemma:to'
-0.033868 Temp b'+1:postag:TO'
-0.033868 Temp b'+1:word:to'
-0.034239 O b'-2:lemma:-lrb-'
-0.034758 OD b'+2:postag:CD'
-0.034948 O b'+2:lemma:reference'
-0.046088 Air b'-1:postag:RB'
-0.047770 Temp b'+2:lemma:to'
-0.047770 Temp b'+2:postag:TO'
-0.048579 O b'-1:lemma:rpob'
-0.048579 O b'-1:word:RpoB'
-0.051599 Supp b'+1:lemma:-rrb-'
-0.051599 Supp b'+1:word:-RRB-'
-0.055182 O b'+2:lemma:anaerobically'
-0.055535 Air b'-1:lemma:,'
-0.055535 Air b'-1:postag:,'
-0.055535 Air b'-1:word:,'
-0.056326 OD b'+1:postag:CD'
-0.059220 O b'+1:word:cells'
-0.061552 O b'+1:word:ChIP-exo'
-0.064402 O b'lemma:fecl2'
-0.064402 O b'word:FeCl2'
-0.066340 Supp b'+1:postag:-RRB-'
-0.068654 Air b'-2:postag:CC'
-0.073218 Temp b'isLower'
-0.075683 O b'-2:lemma:%'
-0.080063 Supp b'postag:CC'
-0.081265 O b'-1:lemma:the'
-0.081974 Air b'postag:CD'
-0.082604 O b'-2:lemma:anaerobically'
-0.084445 Gtype b'-2:postag:IN'
-0.085948 O b'lemma:grow'
-0.086016 O b'lemma:20'
-0.086016 O b'word:20'
-0.087663 OD b'+2:postag:-LRB-'
-0.088450 pH b'postag:NN'
-0.089437 O b'-1:word:the'
-0.090217 O b'+1:lemma:mg1655'
-0.090217 O b'+1:word:MG1655'
-0.090497 O b'+1:lemma:300'
-0.090497 O b'+1:word:300'
-0.091900 O b'+1:lemma:fecl2'
-0.091900 O b'+1:word:FeCl2'
-0.091900 O b'-2:lemma:0.1'
-0.095208 Air b'+2:postag:IN'
-0.097435 O b'+1:postag:-RRB-'
-0.102615 Gtype b'+2:lemma:,'
-0.102615 Gtype b'+2:postag:,'
-0.104429 O b'-1:lemma:iptg'
-0.104429 O b'-1:word:IPTG'
-0.104998 O b'+1:lemma:.'
-0.104998 O b'+1:postag:.'
-0.104998 O b'+1:word:.'
-0.107677 O b'lemma:k-12'
-0.107677 O b'word:K-12'
-0.113262 O b'-2:lemma:cell'
-0.116418 Supp b'-2:postag:IN'
-0.122812 O b'+2:lemma:-rrb-'
-0.124242 Supp b'-1:postag:NNP'
-0.126377 Anti b'+1:lemma:anti-fur'
-0.126377 Anti b'+1:word:anti-Fur'
-0.154570 O b'-1:postag:-LRB-'
-0.156192 O b'-2:postag:VBG'
-0.158047 O b'-2:postag:-LRB-'
-0.163530 Phase b'-2:postag:NN'
-0.165038 O b'-1:postag:VBN'
-0.172203 O b'-2:lemma:to'
-0.172203 O b'-2:postag:TO'
-0.175281 O b'+2:lemma:b'
-0.177331 O b'-2:lemma::'
-0.178997 O b'-1:lemma:37'
-0.178997 O b'-1:word:37'
-0.180982 Supp b'isUpper'
-0.196878 O b'lemma:dissolve'
-0.196878 O b'word:dissolved'
-0.196878 O b'+2:lemma:methanol'
-0.207471 Gtype b'-1:postag:NN'
-0.209158 Air b'isLower'
-0.211564 O b'+2:lemma:mg1655'
-0.216055 O b'+1:lemma:phase'
-0.216055 O b'+1:word:phase'
-0.218804 Anti b'+2:lemma:polyclonal'
-0.225020 Temp b'-2:postag:NN'
-0.226672 O b'+1:lemma:g/l'
-0.226672 O b'+1:word:g/L'
-0.236399 O b'-2:lemma:minimal'
-0.242119 Technique b'isNumber'
-0.243045 Air b'+1:postag:JJ'
-0.244256 Supp b'-2:postag:NN'
-0.247335 O b'lemma:2h'
-0.247335 O b'-1:lemma:additional'
-0.247335 O b'word:2h'
-0.247335 O b'-1:word:additional'
-0.249741 Air b'-1:postag:JJ'
-0.253398 O b'+1:word:hours'
-0.261331 O b'-2:lemma:50'
-0.267379 O b'+1:lemma:supplement'
-0.267379 O b'+1:word:supplemented'
-0.268909 O b'word:WT'
-0.273891 O b'+2:postag:RB'
-0.274312 O b'lemma:anaerobically'
-0.274312 O b'word:anaerobically'
-0.295438 Phase b'isUpper'
-0.297401 O b'+2:lemma:+'
-0.299315 O b'-1:lemma:co2'
-0.299315 O b'-1:word:CO2'
-0.310768 O b'-1:lemma:IP'
-0.310768 O b'-1:word:IP'
-0.319418 Anti b'+2:postag:JJ'
-0.325955 Supp b'+2:lemma:-rrb-'
-0.334787 Med b'+1:postag:NN'
-0.341329 O b'lemma:37'
-0.341329 O b'word:37'
-0.342615 O b'-1:lemma:dissolve'
-0.342615 O b'+1:lemma:methanol'
-0.342615 O b'-1:word:dissolved'
-0.342615 O b'+1:word:methanol'
-0.346343 O b'+1:postag:NNS'
-0.346399 Agit b'isUpper'
-0.361710 O b'lemma:methanol'
-0.361710 O b'word:methanol'
-0.361710 O b'-2:lemma:dissolve'
-0.365689 O b'+2:postag:-RRB-'
-0.371703 O b'lemma:media'
-0.371703 O b'word:media'
-0.375913 Med b'+1:postag:IN'
-0.375941 O b'-2:lemma:supplement'
-0.389209 O b'-1:lemma:nsrr'
-0.389209 O b'-1:word:NsrR'
-0.391437 Phase b'postag:JJ'
-0.407412 O b'-2:lemma:IP'
-0.420031 Supp b'+2:postag:NNS'
-0.428280 O b'lemma:glucose'
-0.428280 O b'word:glucose'
-0.431094 O b'-2:postag:RB'
-0.438906 Supp b'+2:postag:-RRB-'
-0.441532 O b'lemma:wt'
-0.462990 OD b'lemma:-lrb-'
-0.462990 OD b'word:-LRB-'
-0.468593 O b'-2:lemma:a'
-0.475940 O b'+1:postag:VBG'
-0.483738 Gtype b'isUpper'
-0.485302 OD b'-2:postag:JJ'
-0.500326 O b'+2:lemma:medium'
-0.515103 O b'-2:postag:DT'
-0.547995 O b'+1:lemma:in'
-0.547995 O b'+1:word:in'
-0.556608 O b'lemma:rifampicin'
-0.556608 O b'word:rifampicin'
-0.582146 OD b'isNumber'
-0.583100 O b'-1:postag:IN'
-0.592764 O b'lemma:phase'
-0.592764 O b'word:phase'
-0.595239 O b'lemma:mid-log'
-0.595239 O b'word:mid-log'
-0.605400 O b'lemma:of'
-0.605400 O b'word:of'
-0.607669 O b'-1:lemma:grow'
-0.614149 Temp b'postag:NN'
-0.632028 Supp b'+2:lemma:2'
-0.641055 Supp b'-2:postag:JJ'
-0.646532 OD b'+1:postag:NN'
-0.649552 O b'-1:lemma:sample'
-0.673065 O b'-2:lemma:rpob'
-0.685360 Supp b'+2:postag:NN'
-0.718536 Gversion b'isLower'
-0.726489 O b'+2:lemma:rifampicin'
-0.728168 Med b'-2:postag:VBN'
-0.728191 O b'-1:postag::'
-0.732227 O b'-1:lemma:2'
-0.732227 O b'-1:word:2'
-0.751907 Technique b'isLower'
-0.756257 O b'+1:postag:IN'
-0.788494 O b'postag:RB'
-0.793328 O b'+1:lemma:at'
-0.793328 O b'+1:word:at'
-0.803395 Med b'-1:postag:NN'
-0.840583 O b'-1:postag:VBG'
-0.849074 O b'word:cells'
-0.860641 O b'+2:lemma:mid-log'
-0.885639 Gtype b'isLower'
-0.907497 OD b'postag:-LRB-'
-0.937466 O b'-2:lemma:rifampicin'
-0.975829 Gtype b'isNumber'
-1.088007 O b'-2:lemma:phase'
-1.095557 Air b'postag:NN'
-1.113735 O b'+1:lemma:2'
-1.113735 O b'+1:word:2'
-1.264849 Anti b'postag:NNP'
-1.412293 O b'+1:lemma:1'
-1.412293 O b'+1:word:1'
-1.493591 Supp b'postag:JJ'
-1.991234 O b'-1:lemma::'
-1.991234 O b'-1:word::'
-2.151194 Supp b'+2:postag:CD'
-2.386492 O b'-1:lemma:_'
-2.386492 O b'-1:word:_'