Estefani Gaytan Nunez

upload

Showing 24 changed files with 4282 additions and 0 deletions
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.20934163148847484, 'c2': 5.972574594521125e-05}
best CV score:0.8647885676297223
model size: 0.05M
Flat F1: 0.7568876974674016
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 0.955 0.913 0.933 23
Med 1.000 0.925 0.961 53
Temp 1.000 0.690 0.816 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.882 1.000 0.938 15
Air 0.543 0.362 0.435 69
Anti 0.786 1.000 0.880 11
Strain 0.000 0.000 0.000 1
Gtype 0.863 0.812 0.836 85
Substrain 0.000 0.000 0.000 0
Supp 0.607 0.784 0.684 134
Gversion 0.000 0.000 0.000 0
avg / total 0.774 0.758 0.757 451
Top likely transitions:
Temp -> Temp 6.372271
Agit -> Agit 6.336087
Med -> Med 5.775362
Supp -> Supp 5.599916
OD -> OD 5.514939
Anti -> Anti 5.400862
Phase -> Phase 5.214849
Air -> Air 4.870530
Gtype -> Gtype 4.826805
O -> O 4.601602
Gversion -> Gversion 4.343416
Technique -> Technique 4.055366
pH -> pH 3.102886
Substrain -> Gtype 1.934887
Air -> O 1.597978
O -> Supp 1.582569
Gtype -> Supp 1.508336
O -> Technique 1.406325
O -> Gtype 1.205559
O -> Temp 0.833133
Supp -> O 0.751090
O -> Phase 0.676762
Temp -> O 0.670412
Gtype -> Air 0.616010
Med -> O 0.606543
O -> Anti 0.311020
Technique -> Air 0.225375
OD -> O 0.192356
O -> Med 0.168459
Phase -> O 0.168068
O -> Gversion 0.143784
O -> pH 0.078839
O -> OD 0.075883
Strain -> O 0.069008
Gtype -> Med -0.229150
Gtype -> O -0.660221
O -> Air -0.746238
Substrain -> O -0.760215
Phase -> OD -0.885028
Technique -> Gtype -1.154393
Med -> Supp -1.787447
Top unlikely transitions:
Temp -> Temp 6.372271
Agit -> Agit 6.336087
Med -> Med 5.775362
Supp -> Supp 5.599916
OD -> OD 5.514939
Anti -> Anti 5.400862
Phase -> Phase 5.214849
Air -> Air 4.870530
Gtype -> Gtype 4.826805
O -> O 4.601602
Gversion -> Gversion 4.343416
Technique -> Technique 4.055366
pH -> pH 3.102886
Substrain -> Gtype 1.934887
Air -> O 1.597978
O -> Supp 1.582569
Gtype -> Supp 1.508336
O -> Technique 1.406325
O -> Gtype 1.205559
O -> Temp 0.833133
Supp -> O 0.751090
O -> Phase 0.676762
Temp -> O 0.670412
Gtype -> Air 0.616010
Med -> O 0.606543
O -> Anti 0.311020
Technique -> Air 0.225375
OD -> O 0.192356
O -> Med 0.168459
Phase -> O 0.168068
O -> Gversion 0.143784
O -> pH 0.078839
O -> OD 0.075883
Strain -> O 0.069008
Gtype -> Med -0.229150
Gtype -> O -0.660221
O -> Air -0.746238
Substrain -> O -0.760215
Phase -> OD -0.885028
Technique -> Gtype -1.154393
Med -> Supp -1.787447
Top positive:
10.190054 Supp b'lemma:Iron'
9.112733 O b'lemma:_'
7.880021 Air b'lemma:anaerobic'
7.549281 Technique b'lemma:ChIP-exo'
7.465093 Strain b'+1:lemma:substr'
7.422565 Med b'lemma:MOPS'
7.415432 Phase b'lemma:exponential'
7.415432 Phase b'lemma:stationary'
7.398082 Air b'-1:lemma:ChIP-Seq'
7.166683 O b'lemma:2'
7.146221 O b'lemma:-'
7.132564 Supp b'lemma:pq'
6.977885 Air b'lemma:aerobic'
6.744626 Gtype b'lemma:wt'
6.631092 Phase b'lemma:mid-log'
6.560555 O b'lemma:1'
6.447344 Technique b'lemma:chipseq'
6.389162 Gtype b'lemma:\xce\xb4cra'
6.371591 Gversion b'lemma:asm584v2'
6.280253 Supp b'+1:lemma:\xc2\xb5m'
6.224426 O b'lemma:3'
6.224046 O b'-1:lemma:tag'
6.202217 Air b'lemma:Aerobic'
6.046396 O b'lemma:rpob'
5.857469 Gversion b'lemma:nc'
5.848042 O b'lemma:Custom'
5.738285 Gtype b'lemma:wild-type'
5.702974 O b'lemma:Cra'
5.644004 O b'lemma:b'
5.533358 Gtype b'lemma:flag-tag'
5.533358 Gtype b'-1:lemma:c-terminal'
5.521522 O b'lemma:a'
5.328505 Gtype b'lemma:type'
5.276884 Supp b'lemma:nacl'
5.252181 O b'-1:lemma:ChIP-exo'
5.206802 Gtype b'+1:lemma:ph5'
5.150774 Technique b'lemma:ChIP-Seq'
5.068213 O b'postag:IN'
5.035817 O b'lemma:rep1'
4.983308 Vess b'lemma:flask'
4.983308 Vess b'-1:lemma:warm'
4.943797 O b'lemma:rep3'
4.854929 Supp b'-1:lemma:Cra'
4.845807 Gtype b'lemma:\xe2\x88\x86'
4.794055 Gtype b'+1:lemma:type'
4.645526 Gtype b'lemma:arca8myc'
4.620794 Gtype b'lemma:\xce\xb4fur'
4.559495 O b'lemma:\xcf\x8332'
4.549068 O b'-1:lemma:type'
4.420072 Supp b'lemma:rifampicin'
4.386336 Technique b'lemma:rna-seq'
4.375116 Supp b'lemma:acetate'
4.337376 O b'lemma:ompr'
4.249331 O b'postag:CC'
4.232009 Supp b'lemma:nitrate'
4.189729 Supp b'lemma:glucose'
4.169856 Substrain b'lemma:mg1655'
4.152149 Gtype b'lemma:dfnr'
4.143746 Supp b'lemma:nh4cl'
4.056334 O b'+1:lemma:pq'
4.052385 Gtype b'lemma:pk4854'
4.037199 Gtype b'lemma:fnr8myc'
3.985929 Anti b'lemma:none'
3.957986 Gtype b'lemma:delta-fnr'
3.954172 Med b'lemma:lb'
3.939606 Supp b'-1:lemma:+'
3.935266 Technique b'lemma:chip-seq'
3.929436 O b'postag:VBN'
3.908650 Med b'-1:lemma:ml'
3.884593 Gversion b'-1:lemma:nc'
3.878464 Anti b'lemma:\xcf\x8332'
3.863072 O b'lemma:.'
3.863072 O b'postag:.'
3.860626 O b'lemma:chip'
3.854556 Technique b'lemma:rnaseq'
3.854116 O b'postag::'
3.783932 Agit b'+1:lemma:rpm'
3.781020 Gtype b'lemma:\xce\xb4ompr'
3.721025 Air b'-1:lemma:-'
3.718911 Agit b'lemma:rpm'
3.658315 O b'-1:lemma:glucose'
3.650822 Gtype b'-1:lemma:\xe2\x88\x86'
3.626592 Gtype b'lemma:nsrr'
3.625435 Supp b'-1:lemma:with'
3.621959 Med b'+1:lemma:0.4'
3.534472 Gtype b'lemma:\xce\xb4soxr'
3.534031 Supp b'lemma:no3'
3.532340 O b'-1:lemma:0.3'
3.529130 Temp b'-1:lemma:sample'
3.523066 O b'lemma:with'
3.479777 OD b'lemma:od450'
3.451557 Gtype b'lemma:delta-arca'
3.421135 Anti b'lemma:seqa'
3.408004 O b'lemma:rep2'
3.394045 O b'-1:lemma:Aerobic'
3.391719 Gversion b'lemma:chip-seq'
3.391021 Supp b'lemma:dpd'
3.352795 O b'-1:lemma:lb'
3.334642 Technique b'-1:lemma:IP'
3.303675 O b'-1:lemma:0.3-0.35'
3.272998 Air b'lemma:anaerobically'
3.245394 Supp b'lemma:Leu'
3.235692 Supp b'lemma:Fe'
3.223774 Technique b'-1:lemma:chip-exo'
3.209091 Air b'lemma:aerobically'
3.198219 O b'postag:VBG'
3.170084 O b'lemma:harbor'
3.168019 Temp b'-1:lemma:43'
3.163951 Supp b'lemma:arginine'
3.160003 Gtype b'+1:lemma:pq'
3.153015 Anti b'+1:lemma:antibody'
3.115121 Strain b'lemma:k-12'
3.074034 Technique b'+1:lemma:chip-exo'
3.071794 Air b'postag:RB'
3.069425 Substrain b'lemma:mg1655star'
3.062133 Supp b'lemma:Adenine'
3.060892 Air b'lemma:anaeroibc'
3.056193 Temp b'-1:lemma:\xcf\x8332'
3.042361 Supp b'+1:lemma:mm'
3.019482 Gtype b'-1:lemma:ptac'
2.998564 O b'-1:lemma:into'
2.994519 pH b'lemma:5.5'
2.973677 Gversion b'lemma:.2'
2.973677 Gversion b'-1:lemma:u00096'
2.973445 O b'-1:lemma:\xc2\xb0c'
2.958977 Med b'lemma:media'
2.930356 O b'+1:lemma:chip-seq'
2.896285 Gtype b'+1:lemma:with'
2.895621 Gtype b'-1:lemma:rpob'
2.894434 Gtype b'+1:lemma:flagtag'
2.893824 O b'lemma:CEL'
2.871221 O b'+1:lemma:arca-8myc'
2.854072 Temp b'+1:lemma:in'
2.819779 Gtype b'-1:lemma:delta'
2.816913 O b'+1:lemma:250'
2.810671 O b'+1:lemma:od600'
2.799953 O b'+1:lemma:mid-log'
2.791088 Supp b'-1:lemma:vol'
2.782754 Gversion b'lemma:u00096'
2.782754 Gversion b'+1:lemma:.2'
2.762169 O b'lemma:ml'
2.714953 O b'+1:postag:NNP'
2.698964 Gtype b'lemma:\xce\xb4oxyr'
2.673462 Gtype b'-1:postag:VBG'
2.672861 Med b'lemma:L'
2.672861 Med b'+1:lemma:broth'
2.665253 O b'+1:lemma:acetate'
2.647014 Phase b'-1:lemma:mid-log'
2.641349 Med b'lemma:m63'
2.626973 Substrain b'+1:lemma:phtpg'
2.622910 Gversion b'lemma:000913'
2.605384 O b'lemma::'
2.594999 pH b'+1:postag:CD'
2.538219 O b'lemma:s'
2.537207 Med b'lemma:broth'
2.537207 Med b'-1:lemma:L'
2.527821 pH b'lemma:ph5'
2.527821 pH b'+1:lemma:.5'
2.515586 O b'+1:lemma:coli'
2.509864 Supp b'+1:lemma:1'
2.508452 Med b'-1:lemma:fresh'
2.494059 Supp b'lemma:fructose'
2.475120 Temp b'lemma:43'
2.463892 O b'-1:lemma:rpm'
2.434009 Anti b'lemma:anti-myc'
2.426928 O b'+1:postag:RB'
2.422956 Gtype b'+1:lemma:_'
2.396151 Med b'+1:lemma:2.0'
2.362312 O b'lemma:condition'
2.343697 O b'+1:lemma:or'
2.310123 Med b'-1:lemma:glucose'
2.304110 Temp b'lemma:\xc2\xb0c'
2.250756 Gtype b'+1:postag::'
2.243158 Med b'lemma:minimal'
2.236131 Temp b'-1:lemma:37'
2.232349 Temp b'+1:lemma:\xc2\xb0c'
2.215545 Gtype b'lemma:deltaseqa'
2.215545 Gtype b'-1:lemma:old'
2.168823 O b'postag:NNS'
2.142782 pH b'+1:lemma:5.5'
2.133425 O b'lemma:at'
2.127108 Gtype b'-1:lemma:factor'
2.118652 O b'lemma:agitation'
2.100263 Med b'+1:lemma:minimal'
2.099378 Supp b'+1:lemma:min'
2.099095 Phase b'lemma:phase'
2.087589 Supp b'+1:lemma:and'
2.072277 O b'+1:lemma:sparging'
2.066002 Supp b'+1:lemma:Deficient'
2.054354 O b'+1:lemma:anti-fur'
2.053062 O b'lemma:genotype/variation'
2.041875 O b'-1:lemma:l1'
2.039194 Gtype b'-1:lemma::'
2.031026 Supp b'-1:lemma:\xc2\xb5m'
2.010199 O b'postag:DT'
2.007491 O b'lemma:culture'
2.000485 O b'postag:VBD'
1.989764 OD b'lemma:0.3-0.35'
1.982940 Supp b'-1:lemma:sodium'
1.980905 Gversion b'postag:CD'
Top negative:
0.039807 OD b'+1:postag:,'
0.039185 O b'lemma:um'
0.035254 O b'+1:postag:JJ'
0.030051 O b'+1:postag:SYM'
0.027873 Supp b'-1:lemma:m'
0.025396 O b"lemma:'s"
0.025396 O b'postag:POS'
0.025396 O b'-1:lemma:manufacturer'
0.025396 O b'+1:lemma:instruction'
0.024333 Supp b'-1:lemma:rifampicin'
0.023672 O b'+1:lemma:more'
0.023672 O b'+1:postag:JJR'
0.017097 O b'lemma:final'
0.014163 O b'+1:lemma:_'
0.013275 O b'lemma:mm'
0.012005 Anti b'lemma:subunit'
0.012005 Anti b'+1:lemma:\xce\xb2'
0.003898 Gtype b'lemma:Fur'
0.002851 Gtype b'+1:postag:NNP'
0.002190 O b'lemma:%'
0.001946 Gtype b'lemma:transcription'
0.001946 Gtype b'+1:lemma:factor'
0.001569 O b'postag:VBZ'
0.001233 O b'lemma:short'
0.001233 O b'+1:lemma:rnase'
0.000698 Air b'-1:lemma:anaerobically'
0.000218 Med b'+1:postag:VBG'
0.000187 Temp b'-1:lemma:control'
0.000185 Gversion b'-1:lemma::'
0.000177 Phase b'-1:postag::'
0.000113 Supp b'+1:postag:RB'
0.000102 O b'+1:lemma:37'
0.000083 Supp b'lemma:ph'
0.000058 Phase b'+1:lemma:.'
0.000058 Phase b'+1:postag:.'
0.000058 Supp b'-1:lemma:ph'
0.000040 O b'-1:lemma:m63'
0.000031 Gtype b'+1:lemma:,'
0.000031 Gtype b'+1:postag:,'
0.000023 O b'lemma:IP'
0.000019 Supp b'lemma:300'
0.000019 Supp b'+1:lemma:\xc2\xb5l'
0.000009 Temp b'lemma:sample'
0.000007 Temp b'-1:lemma:see'
0.000007 Supp b'lemma:feso4'
0.000004 O b'lemma:acetate'
0.000002 O b'lemma:1:500'
0.000002 O b'-1:lemma:back'
0.000001 Temp b'lemma:control'
-0.000014 O b'+1:lemma:7.6'
-0.000065 O b'lemma:aerobic'
-0.000130 O b'-1:lemma:rpob'
-0.000160 Temp b'postag:JJ'
-0.000216 O b'+1:lemma:contain'
-0.000303 O b'-1:lemma:minimal'
-0.000303 O b'+1:lemma:of'
-0.000650 O b'+1:lemma:95'
-0.002197 O b'-1:lemma:m'
-0.002284 O b'-1:postag:JJ'
-0.002326 O b'+1:lemma:,'
-0.002326 O b'+1:postag:,'
-0.003392 Gtype b'+1:postag:NN'
-0.004717 O b'+1:lemma:fnr'
-0.005411 Gversion b'+1:postag:NN'
-0.005829 Air b'-1:postag:VBN'
-0.010227 Supp b'+1:lemma:of'
-0.011686 O b'+1:lemma:~'
-0.011760 Gtype b'+1:postag:CD'
-0.011855 O b'-1:lemma:5'
-0.012174 O b'-1:lemma:.'
-0.012174 O b'-1:postag:.'
-0.014528 O b'lemma:20'
-0.016459 Agit b'postag:NN'
-0.020390 Technique b'-1:lemma::'
-0.022295 O b'+1:lemma:-lrb-'
-0.023051 O b'lemma:od600'
-0.027871 Gtype b'postag:VBG'
-0.033411 O b'-1:postag:-RRB-'
-0.035506 O b'lemma:m63'
-0.038491 Supp b'-1:lemma:10'
-0.043161 O b'+1:lemma:%'
-0.051673 Gtype b'-1:postag:NN'
-0.053797 Supp b'lemma:mm'
-0.055546 Air b'+1:postag:CD'
-0.061075 O b'lemma:1m'
-0.068387 O b'+1:lemma:for'
-0.069911 O b'+1:lemma:-rrb-'
-0.075578 O b'-1:lemma:37'
-0.091100 O b'-1:lemma:of'
-0.094169 O b'-1:lemma:until'
-0.099146 Supp b'postag:CD'
-0.114939 O b'+1:lemma:ph'
-0.122454 O b'lemma:wt'
-0.126051 O b'-1:postag:DT'
-0.132413 Anti b'+1:postag:JJ'
-0.132521 O b'-1:lemma:iptg'
-0.138791 O b'-1:lemma:from'
-0.142787 Supp b'+1:postag:-RRB-'
-0.148089 Gtype b'postag:CD'
-0.160312 O b'-1:lemma:-lrb-'
-0.163006 O b'+1:postag:-LRB-'
-0.163639 O b'lemma:medium'
-0.171523 Supp b'+1:lemma:fructose'
-0.175650 Air b'postag:CD'
-0.176789 Gtype b'-1:lemma:mg1655'
-0.209413 Supp b'lemma:10'
-0.229904 Agit b'-1:postag:NN'
-0.233855 Supp b'postag:NN'
-0.254205 Supp b'-1:lemma:dpd'
-0.270474 O b'-1:lemma:\xe2\x88\x86'
-0.322402 O b'-1:lemma:1m'
-0.327143 Supp b'postag:CC'
-0.327477 O b'lemma:aerobically'
-0.334210 Med b'-1:postag:CD'
-0.337947 O b'lemma:mid-log'
-0.351998 O b'lemma:minimal'
-0.358452 Phase b'-1:lemma:at'
-0.378720 O b'-1:lemma:final'
-0.390763 Supp b'+1:lemma:dpd'
-0.395432 Med b'postag:CD'
-0.398943 O b'lemma:37'
-0.413061 O b'+1:lemma:+'
-0.428486 Gtype b'+1:lemma:-lrb-'
-0.435167 O b'lemma:\xce\xb4fur'
-0.445119 O b'-1:lemma:delta'
-0.454315 O b'+1:lemma:.'
-0.454315 O b'+1:postag:.'
-0.455581 Supp b'-1:lemma:-lrb-'
-0.463177 Phase b'+1:postag:NN'
-0.463568 Med b'+1:postag:NN'
-0.470619 Supp b'+1:postag:VBN'
-0.474915 O b'-1:lemma:sample'
-0.477379 Med b'-1:postag:NN'
-0.481602 O b'lemma:nitrogen'
-0.484766 O b'lemma:ph'
-0.504002 O b'-1:postag:-LRB-'
-0.509161 Supp b'-1:postag:-LRB-'
-0.510610 Air b'-1:lemma:or'
-0.519487 O b'+1:lemma:supplement'
-0.524200 O b'-1:lemma:n2'
-0.543542 O b'+1:postag:IN'
-0.594418 pH b'postag:NN'
-0.615404 O b'-1:postag:IN'
-0.619607 O b'-1:lemma:cra'
-0.656333 O b'-1:lemma:mm'
-0.688334 O b'+1:postag:-RRB-'
-0.710435 O b'-1:lemma:ml'
-0.716844 O b'lemma:anaerobically'
-0.730927 O b'+1:lemma:until'
-0.747298 O b'lemma:media'
-0.754688 O b'+1:lemma:mm'
-0.768772 Med b'-1:postag:IN'
-0.775892 Supp b'+1:lemma:acetate'
-0.788215 Technique b'-1:postag::'
-0.802700 O b'-1:lemma:co2'
-0.820146 O b'lemma:methanol'
-0.863797 Supp b'-1:postag:NNP'
-0.897516 O b'+1:lemma:at'
-0.907339 O b'+1:lemma:2.0'
-0.918505 O b'-1:postag::'
-0.945238 O b'-1:lemma:dissolve'
-0.945238 O b'+1:lemma:methanol'
-0.959379 Supp b'+1:lemma:rifampicin'
-0.986744 O b'-1:lemma:nsrr'
-0.993158 O b'+1:lemma:g/l'
-1.104395 O b'lemma:2h'
-1.104395 O b'-1:lemma:additional'
-1.107657 O b'+1:lemma:1m'
-1.131265 O b'+1:postag:VBG'
-1.178961 O b'lemma:of'
-1.201973 O b'-1:lemma:fresh'
-1.209225 O b'postag:VBP'
-1.212055 O b'-1:lemma:ph'
-1.241153 Phase b'-1:postag:JJ'
-1.329472 O b'lemma:30'
-1.335303 Temp b'postag:NN'
-1.335403 Supp b'+1:lemma:-lrb-'
-1.347076 O b'-1:lemma:IP'
-1.360555 Air b'+1:postag:JJ'
-1.373057 Supp b'+1:postag:-LRB-'
-1.494837 O b'-1:lemma:30'
-1.531593 Anti b'postag:NNP'
-1.575468 Phase b'postag:JJ'
-1.599957 Temp b'+1:postag:IN'
-1.732683 Air b'postag:NN'
-1.747609 OD b'+1:postag:NN'
-1.793039 O b'lemma:rifampicin'
-1.853309 Supp b'postag:JJ'
-1.930986 O b'+1:lemma:1'
-1.993172 Supp b'+1:lemma:,'
-1.993172 Supp b'+1:postag:,'
-2.085464 O b'-1:lemma:2'
-2.086023 O b'+1:lemma:in'
-2.172547 O b'lemma:0.3'
-2.181747 O b'-1:postag:VBG'
-2.236719 O b'-1:lemma:1'
-2.380060 O b'-1:lemma:vol'
-2.454507 O b'+1:lemma:2'
-4.266439 O b'-1:lemma:_'
-4.856613 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.09054223875215395, 'c2': 0.0023518182968725273}
best CV score:0.8689308276485085
model size: 0.08M
Flat F1: 0.7754340877116509
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 1.000 0.913 0.955 23
Med 1.000 0.943 0.971 53
Temp 1.000 0.690 0.816 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.882 1.000 0.938 15
Air 0.543 0.362 0.435 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.868 0.776 0.820 85
Substrain 0.000 0.000 0.000 0
Supp 0.700 0.784 0.739 134
Gversion 0.000 0.000 0.000 0
avg / total 0.810 0.754 0.775 451
Top likely transitions:
Agit -> Agit 7.626632
Temp -> Temp 7.045132
Supp -> Supp 6.489590
Med -> Med 6.142134
Anti -> Anti 5.984670
Phase -> Phase 5.518817
OD -> OD 5.345555
Gtype -> Gtype 5.176791
Gversion -> Gversion 5.166638
Air -> Air 4.592159
O -> O 4.393244
pH -> pH 3.826126
Technique -> Technique 3.732727
O -> Supp 1.143221
O -> Technique 0.922279
Air -> O 0.769652
Substrain -> Gtype 0.641263
O -> Gtype 0.629642
Gtype -> Supp 0.604798
O -> Temp 0.290538
O -> Phase 0.232866
Temp -> O 0.222973
Supp -> O 0.152297
Gtype -> Air 0.133118
Phase -> O 0.090499
O -> Anti 0.049867
Strain -> O 0.017575
Phase -> Air 0.001750
O -> pH 0.000507
Med -> O 0.000355
pH -> O 0.000352
Air -> Agit -0.001348
Phase -> OD -0.008421
Air -> Supp -0.050545
Agit -> Air -0.052125
Anti -> O -0.059732
OD -> O -0.120587
Technique -> pH -0.229765
O -> Med -0.258685
Technique -> OD -0.288848
Air -> Phase -0.339706
O -> OD -0.391929
Gtype -> Med -0.529268
Air -> Temp -0.898446
Supp -> Med -0.919358
Technique -> O -1.026211
OD -> Air -1.149284
O -> Air -1.334781
Gtype -> O -1.407268
Med -> Supp -1.495850
Top unlikely transitions:
Temp -> Temp 7.045132
Supp -> Supp 6.489590
Med -> Med 6.142134
Anti -> Anti 5.984670
Phase -> Phase 5.518817
OD -> OD 5.345555
Gtype -> Gtype 5.176791
Gversion -> Gversion 5.166638
Air -> Air 4.592159
O -> O 4.393244
pH -> pH 3.826126
Technique -> Technique 3.732727
O -> Supp 1.143221
O -> Technique 0.922279
Air -> O 0.769652
Substrain -> Gtype 0.641263
O -> Gtype 0.629642
Gtype -> Supp 0.604798
O -> Temp 0.290538
O -> Phase 0.232866
Temp -> O 0.222973
Supp -> O 0.152297
Gtype -> Air 0.133118
Phase -> O 0.090499
O -> Anti 0.049867
Strain -> O 0.017575
Phase -> Air 0.001750
O -> pH 0.000507
Med -> O 0.000355
pH -> O 0.000352
Air -> Agit -0.001348
Phase -> OD -0.008421
Air -> Supp -0.050545
Agit -> Air -0.052125
Anti -> O -0.059732
OD -> O -0.120587
Technique -> pH -0.229765
O -> Med -0.258685
Technique -> OD -0.288848
Air -> Phase -0.339706
O -> OD -0.391929
Gtype -> Med -0.529268
Air -> Temp -0.898446
Supp -> Med -0.919358
Technique -> O -1.026211
OD -> Air -1.149284
O -> Air -1.334781
Gtype -> O -1.407268
Med -> Supp -1.495850
Substrain -> O -2.598977
Top positive:
10.185301 Technique b'lemma[:2]:Ch'
8.021086 O b'-1:lemma:tag'
6.996653 Gtype b'lemma[:1]:\xce\xb4'
6.769097 O b'lemma:2'
6.193177 O b'lemma:1'
6.092689 Air b'lemma:anaerobic'
5.980202 Phase b'lemma:stationary'
5.955259 Supp b'+1:lemma:\xc2\xb5m'
5.876491 O b'lemma:3'
5.874142 O b'lemma:with'
5.804642 O b'lemma:-'
5.578233 Supp b'lemma:Iron'
5.578233 Supp b'lemma[:2]:Ir'
5.487985 O b'lemma[:2]:re'
5.341246 Temp b'+1:lemma:in'
5.328334 Supp b'-1:lemma:Cra'
5.243720 Strain b'+1:lemma:substr'
5.241759 Phase b'lemma:mid-log'
5.207769 Substrain b'lemma[:2]:mg'
5.202431 O b'lemma:b'
5.093704 Supp b'-1:lemma:+'
4.978877 Supp b'-1:lemma:vol'
4.960498 O b'lemma:_'
4.960498 O b'lemma[:1]:_'
4.785889 Gtype b'-1:lemma:\xe2\x88\x86'
4.773191 Technique b'lemma:chipseq'
4.718733 O b'-1:lemma:lb'
4.679807 O b'lemma:a'
4.628908 Gversion b'-1:lemma:nc'
4.467825 O b'+1:lemma:pq'
4.445709 Air b'-1:lemma:ChIP-Seq'
4.378030 O b'-1:lemma:glucose'
4.329558 Air b'lemma[:2]:ae'
4.328211 O b'lemma:rpob'
4.198740 O b'lemma:delta'
4.173979 Med b'-1:lemma:ml'
4.095801 Supp b'lemma:acetate'
4.022526 O b'lemma:Custom'
4.022526 O b'lemma[:2]:Cu'
4.014705 O b'-1:lemma:0.3'
4.002057 Supp b'-1:lemma:with'
3.969838 Med b'+1:lemma:0.4'
3.874806 Supp b'lemma:pq'
3.874806 Supp b'lemma[:2]:pq'
3.827929 Gtype b'lemma:type'
3.827929 Gtype b'lemma[:2]:ty'
3.798612 O b'lemma:chip'
3.738737 Supp b'lemma:arginine'
3.738664 O b'lemma:ompr'
3.673537 Med b'+1:lemma:2.0'
3.658221 Agit b'+1:lemma:rpm'
3.493765 O b'-1:lemma:into'
3.452810 Air b'lemma:Aerobic'
3.452810 Air b'lemma[:2]:Ae'
3.452043 Temp b'-1:lemma:\xcf\x8332'
3.425411 Temp b'-1:lemma:sample'
3.422615 Gversion b'lemma:chip-seq'
3.364562 Gtype b'lemma:fnr8myc'
3.351216 O b'-1:lemma:type'
3.349721 Gtype b'lemma:arca8myc'
3.319200 O b'+1:lemma:od600'
3.296763 Gtype b'lemma[:2]:pk'
3.278704 O b'-1:lemma:Aerobic'
3.266869 O b'+1:lemma:mid-log'
3.263095 Gtype b'lemma[:2]:cr'
3.249020 Supp b'lemma:rifampicin'
3.239748 O b'+1:lemma:sparging'
3.237458 Supp b'lemma[:2]:ri'
3.210269 O b'-1:lemma:0.3-0.35'
3.205436 Technique b'lemma[:2]:rn'
3.180683 O b'lemma[:1]:h'
3.156042 Phase b'lemma[:2]:ex'
3.140201 Air b'lemma:aerobic'
3.137123 O b'+1:lemma:250'
3.129608 Anti b'lemma[:2]:an'
3.121981 Supp b'-1:lemma:final'
3.117574 O b'lemma[:2]:ge'
3.100954 Air b'-1:lemma:-'
3.095232 Technique b'-1:lemma:input'
3.095018 Med b'-1:lemma:fresh'
3.052910 O b'lemma:.'
3.052910 O b'postag:.'
3.052910 O b'postag[:1]:.'
3.041093 Technique b'lemma[:2]:ch'
3.027376 Supp b'+1:lemma:1'
3.017384 Phase b'lemma:exponential'
3.011507 Gtype b'lemma[:1]:W'
3.000727 Supp b'lemma[:2]:gl'
3.000345 Med b'lemma:MOPS'
3.000345 Med b'lemma[:1]:M'
3.000345 Med b'lemma[:2]:MO'
2.946850 Anti b'lemma:none'
2.913481 Supp b'lemma:fructose'
2.906897 Gtype b'lemma:flag-tag'
2.906897 Gtype b'-1:lemma:c-terminal'
2.891578 Phase b'-1:lemma:until'
2.885138 O b'lemma:n'
2.870143 O b'+1:lemma:43'
2.866970 O b'+1:postag:RB'
2.866043 Gtype b'+1:lemma::'
2.843685 O b'postag::'
2.843685 O b'postag[:1]::'
2.823299 Air b'lemma[:2]:an'
2.803059 Gtype b'lemma[:1]:w'
2.801040 Gtype b'lemma[:2]:de'
2.793153 Gtype b'+1:lemma:flagtag'
2.782454 Gversion b'lemma:asm584v2'
2.771209 Gversion b'lemma[:2]:as'
2.728848 O b'+1:postag:NNP'
2.710278 O b'-1:lemma:ChIP-exo'
2.685529 O b'+1:lemma:or'
2.672360 Med b'lemma:broth'
2.672360 Med b'-1:lemma:L'
2.672360 Med b'lemma[:2]:br'
2.653066 Gtype b'lemma:nsrr'
2.653066 Gtype b'lemma[:2]:ns'
2.644949 Gtype b'lemma:wt'
2.644949 Gtype b'lemma[:2]:wt'
2.643481 O b'+1:lemma:acetate'
2.640438 Gtype b'lemma[:2]:ar'
2.625663 Gversion b'lemma:nc'
2.625663 Gversion b'lemma[:2]:nc'
2.605563 Anti b'+1:lemma:antibody'
2.591652 Technique b'-1:lemma:chip-exo'
2.575198 O b'lemma[:2]:fo'
2.565649 Gtype b'-1:lemma:vector'
2.556891 Gtype b'-1:postag:VBG'
2.531858 Substrain b'+1:lemma:phtpg'
2.530034 O b'lemma:0.4'
2.515928 OD b'lemma:od450'
2.506637 O b'lemma[:1]:C'
2.500720 Supp b'lemma:sodium'
2.401085 O b'+1:lemma:coli'
2.393881 Supp b'+1:lemma:phosphate'
2.376464 O b'-1:postag:NNS'
2.374397 Temp b'-1:lemma:43'
2.366322 Supp b'+1:lemma:_'
2.344507 Supp b'lemma[:2]:ni'
2.318294 O b'lemma[:2]:Cr'
2.313838 O b'lemma[:2]:om'
2.312761 O b'-1:lemma:anaerobic'
2.297530 Gtype b'-1:lemma:rpob'
2.286218 O b'-1:lemma:phase'
2.285928 Gversion b'lemma[:2]:00'
2.270036 Supp b'lemma:iptg'
2.264276 Supp b'lemma[:1]:I'
2.248929 O b'+1:lemma:30'
2.245626 O b'lemma:oxyr'
2.240334 O b'lemma:Cra'
2.237370 Gtype b'-1:lemma:_'
2.220905 Med b'lemma:L'
2.220905 Med b'+1:lemma:broth'
2.217317 Supp b'lemma:nacl'
2.217317 Supp b'lemma[:2]:na'
2.191402 Temp b'lemma:43'
2.191402 Temp b'lemma[:2]:43'
2.169633 Supp b'lemma:Fe'
2.169633 Supp b'lemma[:2]:Fe'
2.169099 pH b'lemma[:2]:ph'
2.167053 O b'lemma:ml'
2.167053 O b'lemma[:2]:ml'
2.163570 Gtype b'lemma:dfnr'
2.163570 Gtype b'lemma[:2]:df'
2.115282 OD b'lemma[:1]:o'
2.113233 OD b'lemma:0.3-0.35'
2.096796 Supp b'lemma:no3'
2.092494 O b'-1:lemma:aerobically'
2.080983 Phase b'lemma[:1]:e'
2.077238 Anti b'-1:lemma::'
2.069956 O b'lemma[:2]:ha'
2.068054 Supp b'lemma:dpd'
2.068054 Supp b'lemma[:2]:dp'
2.065958 Supp b'lemma[:2]:ac'
2.065024 Gtype b'+1:lemma:type'
2.034166 O b'+1:lemma:nitrate'
2.033879 Gtype b'lemma[:1]:f'
2.029691 Gtype b'-1:lemma:knock-out'
2.028831 pH b'+1:postag:CD'
2.022928 Gtype b'+1:lemma:_'
1.993507 Supp b'lemma:Leu'
1.993507 Supp b'lemma[:2]:Le'
1.979381 O b'-1:lemma:stpa'
1.969036 Technique b'lemma[:1]:C'
1.953152 Technique b'+1:lemma:chip-exo'
1.940011 Air b'+1:postag:IN'
1.924352 Supp b'lemma[:2]:30'
1.903191 O b'lemma:purr'
1.902904 Air b'lemma[:1]:a'
1.895935 O b'lemma:A'
1.889757 Gtype b'+1:lemma:with'
1.884874 O b'+1:lemma:dfnr'
1.875072 Supp b'-1:lemma:30'
1.870747 Supp b'-1:lemma::'
1.864656 Technique b'-1:lemma:rna-seq'
1.858198 Gtype b'lemma[:1]:t'
1.836001 O b'lemma[:2]:in'
1.821777 pH b'lemma:ph5'
1.821777 pH b'+1:lemma:.5'
1.813827 Supp b'lemma:Adenine'
1.813827 Supp b'lemma[:2]:Ad'
Top negative:
-0.122503 O b'+1:lemma:vol'
-0.122503 O b'lemma[:2]:1/'
-0.125682 Supp b'+1:postag:NNS'
-0.128364 O b'lemma[:1]:5'
-0.128783 Med b'postag[:1]:C'
-0.131277 Supp b'postag:CD'
-0.131277 Supp b'postag[:2]:CD'
-0.131348 Supp b'-1:postag::'
-0.133721 Gtype b'lemma:-lrb-'
-0.135824 Phase b'lemma[:2]:pa'
-0.136598 O b'lemma:grow'
-0.140727 Air b'+1:lemma:-lrb-'
-0.148264 O b'lemma:ph'
-0.150736 O b'+1:lemma:ph'
-0.159027 O b'+1:postag:CD'
-0.160597 OD b'+1:lemma:0.4'
-0.165913 O b'+1:lemma:phosphate'
-0.169625 Air b'-1:postag:CC'
-0.171481 OD b'postag[:1]:N'
-0.171481 OD b'postag[:2]:NN'
-0.171501 O b'lemma:1m'
-0.171501 O b'lemma[:2]:1m'
-0.176117 Supp b'-1:lemma:.'
-0.176117 Supp b'-1:postag:.'
-0.177236 Temp b'postag[:1]:N'
-0.177236 Temp b'postag[:2]:NN'
-0.184780 Supp b'+1:postag:-RRB-'
-0.189482 Gversion b'+1:postag:NN'
-0.191580 O b'+1:lemma:1/100'
-0.194798 Med b'lemma[:1]:c'
-0.201698 Gtype b'+1:postag:CD'
-0.208826 O b'lemma:150'
-0.208826 O b'+1:lemma:mg/ml'
-0.208826 O b'lemma[:2]:15'
-0.209680 O b'+1:lemma:supplement'
-0.211723 O b'lemma[:1]:p'
-0.211762 O b'-1:lemma:g/l'
-0.218211 O b'lemma:30'
-0.218501 Temp b'-1:lemma:\xc2\xb0c'
-0.223383 O b'lemma:7.6'
-0.223383 O b'+1:lemma:;'
-0.223383 O b'lemma[:2]:7.'
-0.230315 O b'-1:postag:-LRB-'
-0.234210 Air b'-1:postag:VBN'
-0.234317 Air b'+1:postag:-LRB-'
-0.238132 O b'lemma:\xe2\x88\x86'
-0.238132 O b'lemma[:1]:\xe2\x88\x86'
-0.239813 O b'lemma[:2]:20'
-0.243759 Supp b'lemma[:2]:ph'
-0.268591 O b'-1:lemma:-lrb-'
-0.273129 O b'+1:lemma:.'
-0.273129 O b'+1:postag:.'
-0.274515 Gtype b'-1:postag:NN'
-0.276640 O b'lemma:aerobic'
-0.280353 pH b'postag[:1]:N'
-0.280353 pH b'postag[:2]:NN'
-0.281176 O b'+1:lemma:co2'
-0.284309 OD b'hUpper'
-0.284309 OD b'hLower'
-0.299607 O b'+1:lemma:rep2'
-0.305286 Supp b'+1:postag::'
-0.305442 O b'-1:lemma:1m'
-0.307656 O b'lemma[:2]:an'
-0.311346 Supp b'lemma[:1]:s'
-0.313198 O b'-1:lemma:IP'
-0.327309 O b'+1:lemma:1m'
-0.328461 Med b'+1:postag:NN'
-0.331739 Supp b'-1:lemma:dpd'
-0.332765 O b'-1:lemma:n2'
-0.333171 Supp b'postag[:1]:C'
-0.334484 O b'+1:lemma:-rrb-'
-0.335881 Technique b'postag:NN'
-0.336835 Supp b'-1:lemma:%'
-0.343844 O b'lemma:37'
-0.343844 O b'lemma[:2]:37'
-0.352890 O b'lemma:co2'
-0.366891 O b'-1:lemma:rpob'
-0.372918 O b'-1:lemma:of'
-0.379129 O b'lemma[:1]:k'
-0.385300 Supp b'lemma:10'
-0.411833 O b'lemma[:2]:od'
-0.414482 O b'lemma:20'
-0.419047 Supp b'-1:lemma:-lrb-'
-0.423807 Air b'postag:NN'
-0.429435 O b'lemma:anaerobically'
-0.430690 Med b'-1:postag:NN'
-0.435460 Gtype b'postag[:1]:V'
-0.435460 Gtype b'postag[:2]:VB'
-0.436108 O b'-1:lemma:with'
-0.436725 Supp b'postag:JJ'
-0.448963 O b'-1:lemma:from'
-0.449583 Supp b'postag[:1]:J'
-0.449583 Supp b'postag[:2]:JJ'
-0.452505 O b'lemma:wt'
-0.452505 O b'lemma[:2]:wt'
-0.456297 Med b'+1:postag:IN'
-0.460912 O b'lemma:of'
-0.460912 O b'lemma[:2]:of'
-0.464084 O b'lemma[:2]:ph'
-0.476123 Phase b'+1:postag:NN'
-0.478646 O b'-1:lemma:37'
-0.479822 O b'lemma[:2]:mg'
-0.490640 O b'lemma:glucose'
-0.494749 O b'-1:lemma:mm'
-0.513617 O b'+1:postag:IN'
-0.520527 O b'+1:lemma:2.0'
-0.524819 Phase b'+1:lemma:pahse'
-0.528372 O b'lemma:media'
-0.534673 Temp b'postag:NN'
-0.544692 Air b'-1:lemma:or'
-0.551287 Air b'-1:postag:JJ'
-0.559988 O b'-1:lemma:\xe2\x88\x86'
-0.572775 Air b'+1:postag:JJ'
-0.575164 O b'+1:postag:VBG'
-0.579204 Air b'postag[:1]:N'
-0.579204 Air b'postag[:2]:NN'
-0.579634 Supp b'+1:lemma:fructose'
-0.613266 O b'lemma[:2]:ae'
-0.626767 Anti b'symb'
-0.633791 Med b'+1:postag:NNS'
-0.643773 O b'+1:lemma:+'
-0.653623 O b'lemma[:2]:me'
-0.686630 O b'-1:lemma:delta'
-0.691758 Phase b'postag[:1]:J'
-0.691758 Phase b'postag[:2]:JJ'
-0.695582 Gtype b'lemma[:1]:c'
-0.707615 Agit b'symb'
-0.710847 O b'lemma[:1]:n'
-0.719133 O b'lemma:mid-log'
-0.720783 O b'lemma[:2]:0.'
-0.727230 O b'lemma:0.3'
-0.730648 Supp b'-1:postag:-LRB-'
-0.737779 Gtype b'lemma[:1]:r'
-0.743565 Med b'-1:postag:CD'
-0.750347 O b'+1:lemma:for'
-0.760505 Air b'symb'
-0.775646 O b'lemma[:2]:30'
-0.777433 O b'lemma:2h'
-0.777433 O b'-1:lemma:additional'
-0.777433 O b'lemma[:2]:2h'
-0.811526 O b'-1:lemma:final'
-0.812240 O b'lemma[:1]:0'
-0.816482 Gtype b'-1:lemma:-lrb-'
-0.821802 O b'-1:lemma:30'
-0.824390 O b'lemma[:2]:gl'
-0.845131 Supp b'lemma[:1]:a'
-0.845884 O b'lemma:methanol'
-0.854110 Technique b'-1:postag::'
-0.866570 O b'+1:lemma:until'
-0.885272 Supp b'+1:lemma:rifampicin'
-0.885699 O b'-1:lemma:nsrr'
-0.888698 O b'-1:lemma:cra'
-0.904476 O b'+1:lemma:at'
-0.932299 Med b'symb'
-0.942332 O b'-1:lemma:co2'
-0.962304 O b'+1:lemma:g/l'
-0.962951 O b'+1:lemma:mm'
-0.972177 O b'lemma:soxs-8myc'
-0.976305 O b'-1:lemma:dissolve'
-0.976305 O b'+1:lemma:methanol'
-1.026438 O b'lemma[:2]:ar'
-1.071623 Gtype b'lemma[:1]:a'
-1.074211 Gtype b'+1:lemma:-rrb-'
-1.085129 Phase b'postag:JJ'
-1.096666 O b'-1:lemma:ph'
-1.111621 Supp b'-1:postag:NNP'
-1.112692 Phase b'hUpper'
-1.112692 Phase b'hLower'
-1.151925 Anti b'postag:NNP'
-1.166449 Supp b'symb'
-1.186999 Phase b'-1:postag:JJ'
-1.188170 O b'-1:lemma:until'
-1.246650 Agit b'hUpper'
-1.246650 Agit b'hLower'
-1.262971 O b'+1:lemma:in'
-1.269087 O b'-1:lemma:ml'
-1.380405 OD b'+1:postag:NN'
-1.438101 Supp b'+1:postag:VBN'
-1.535115 Supp b'hGreek'
-1.573856 Supp b'+1:lemma:-lrb-'
-1.599754 Supp b'+1:postag:-LRB-'
-1.625658 O b'-1:postag:VBG'
-1.641325 O b'-1:postag::'
-1.657440 O b'lemma[:2]:ri'
-1.696650 O b'-1:lemma:1'
-1.882679 O b'+1:lemma:1'
-1.947502 O b'lemma:rifampicin'
-2.109295 O b'-1:lemma:sample'
-2.179742 O b'-1:lemma:fresh'
-2.704413 Supp b'+1:lemma:,'
-2.704413 Supp b'+1:postag:,'
-2.995590 Supp b'lemma[:1]:c'
-3.044999 O b'+1:lemma:2'
-3.183494 O b'lemma[:2]:fl'
-3.486906 O b'-1:lemma:vol'
-3.568659 O b'-1:lemma:2'
-3.575405 Temp b'+1:postag:IN'
-4.334558 O b'postag:VBP'
-5.276604 O b'-1:lemma:_'
-5.933765 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.11486416926458794, 'c2': 0.02680674100354409}
best CV score:0.8644252350498997
model size: 0.12M
Flat F1: 0.7889552451646998
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 0.955 0.913 0.933 23
Med 1.000 0.943 0.971 53
Temp 1.000 0.690 0.816 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.875 0.933 0.903 15
Air 0.556 0.362 0.439 69
Anti 0.579 1.000 0.733 11
Strain 0.000 0.000 0.000 1
Gtype 0.877 0.753 0.810 85
Substrain 0.000 0.000 0.000 0
Supp 0.848 0.791 0.819 134
Gversion 0.000 0.000 0.000 0
avg / total 0.845 0.749 0.789 451
Top likely transitions:
Temp -> Temp 5.434090
Agit -> Agit 5.083855
Anti -> Anti 5.066839
OD -> OD 4.944534
Supp -> Supp 4.921053
Med -> Med 4.814443
Gtype -> Gtype 4.724645
Gversion -> Gversion 4.371417
O -> O 4.134917
Phase -> Phase 4.076535
Air -> Air 4.013049
Technique -> Technique 3.791769
pH -> pH 2.691203
Substrain -> Gtype 1.373548
Gtype -> Supp 1.262860
O -> Gtype 1.231509
O -> Technique 1.057715
O -> Supp 0.927276
Air -> O 0.926745
Gtype -> pH 0.675182
Gtype -> Air 0.618804
Technique -> Air 0.558242
Supp -> O 0.412087
Temp -> O 0.355896
O -> Temp 0.211297
Med -> O 0.203763
O -> Anti 0.188101
Strain -> O 0.110948
O -> Phase 0.067802
O -> OD 0.026887
Phase -> O -0.000033
OD -> O -0.000624
O -> Agit -0.005237
Anti -> O -0.013260
Technique -> OD -0.015560
Supp -> Air -0.017152
Air -> Supp -0.041258
Technique -> O -0.086710
Agit -> O -0.090919
Air -> Med -0.201293
O -> Med -0.213366
Air -> Phase -0.347298
Air -> Temp -0.357907
Phase -> Supp -0.382883
OD -> Air -0.397558
Phase -> OD -0.415065
Gtype -> Anti -0.434583
Gtype -> Med -0.515634
Technique -> pH -0.532638
Gtype -> O -0.589161
Top unlikely transitions:
Med -> Med 4.814443
Gtype -> Gtype 4.724645
Gversion -> Gversion 4.371417
O -> O 4.134917
Phase -> Phase 4.076535
Air -> Air 4.013049
Technique -> Technique 3.791769
pH -> pH 2.691203
Substrain -> Gtype 1.373548
Gtype -> Supp 1.262860
O -> Gtype 1.231509
O -> Technique 1.057715
O -> Supp 0.927276
Air -> O 0.926745
Gtype -> pH 0.675182
Gtype -> Air 0.618804
Technique -> Air 0.558242
Supp -> O 0.412087
Temp -> O 0.355896
O -> Temp 0.211297
Med -> O 0.203763
O -> Anti 0.188101
Strain -> O 0.110948
O -> Phase 0.067802
O -> OD 0.026887
Phase -> O -0.000033
OD -> O -0.000624
O -> Agit -0.005237
Anti -> O -0.013260
Technique -> OD -0.015560
Supp -> Air -0.017152
Air -> Supp -0.041258
Technique -> O -0.086710
Agit -> O -0.090919
Air -> Med -0.201293
O -> Med -0.213366
Air -> Phase -0.347298
Air -> Temp -0.357907
Phase -> Supp -0.382883
OD -> Air -0.397558
Phase -> OD -0.415065
Gtype -> Anti -0.434583
Gtype -> Med -0.515634
Technique -> pH -0.532638
Gtype -> O -0.589161
O -> Air -0.833200
Supp -> Med -0.863491
Technique -> Gtype -0.999961
Substrain -> O -1.154679
Med -> Supp -1.746454
Top positive:
5.242070 Air b'word:Aerobic'
4.810824 O b'lemma:_'
4.810824 O b'word:_'
4.702604 Air b'lemma:anaerobic'
4.379183 O b'word:Cra'
3.811896 Technique b'word:ChIP-Seq'
3.692913 Gtype b'lemma:wild-type'
3.658772 Air b'postag:RB'
3.551778 O b'postag:IN'
3.496331 O b'-1:lemma:ChIP-exo'
3.382322 Gtype b'lemma:wt'
3.353140 O b'postag::'
3.300897 Air b'word:Anaerobic'
3.260825 Technique b'lemma:ChIP-exo'
3.157424 Technique b'word:ChIP-exo'
3.138715 Supp b'lemma:Iron'
3.138715 Supp b'word:Iron'
3.138715 Supp b'+1:word:Deficient'
3.135138 Gtype b'word:WT'
3.125987 Technique b'word:ChIPSeq'
3.117161 Supp b'lemma:nh4cl'
3.113447 Supp b'lemma:pq'
3.113447 Supp b'word:PQ'
3.089898 O b'lemma:2'
3.089898 O b'word:2'
2.955558 Technique b'lemma:rna-seq'
2.907748 Supp b'-1:word:Cra'
2.858859 O b'lemma:-'
2.858859 O b'word:-'
2.779905 O b'lemma:1'
2.779905 O b'word:1'
2.749191 O b'postag:CC'
2.719373 O b'lemma:rpob'
2.719373 O b'word:RpoB'
2.681464 O b'lemma:3'
2.681464 O b'word:3'
2.680679 Gversion b'lemma:asm584v2'
2.680679 Gversion b'word:ASM584v2'
2.675651 Phase b'lemma:mid-log'
2.675651 Phase b'word:mid-log'
2.644118 O b'lemma:chip'
2.643576 Strain b'+1:lemma:substr'
2.643576 Strain b'+1:word:substr'
2.573762 Gtype b'lemma:\xce\xb4cra'
2.570718 Gtype b'word:\xce\x94cra'
2.569025 Med b'lemma:MOPS'
2.569025 Med b'word:MOPS'
2.564545 Supp b'+1:lemma:\xc2\xb5m'
2.564545 Supp b'+1:word:\xc2\xb5M'
2.540412 O b'lemma:.'
2.540412 O b'postag:.'
2.540412 O b'word:.'
2.523030 O b'-1:word:tag'
2.521842 Gtype b'lemma:type'
2.521842 Gtype b'word:type'
2.501449 Technique b'lemma:chipseq'
2.486934 O b'lemma:Custom'
2.486934 O b'word:Custom'
2.456959 Gtype b'+1:lemma:type'
2.456959 Gtype b'+1:word:type'
2.442001 O b'lemma:a'
2.306156 Phase b'lemma:exponential'
2.306156 Phase b'word:exponential'
2.306156 Phase b'lemma:stationary'
2.306156 Phase b'word:stationary'
2.278404 O b'-1:lemma:tag'
2.248178 O b'lemma:b'
2.248178 O b'word:B'
2.236935 Air b'word:anaerobic'
2.220325 Supp b'lemma:arginine'
2.200862 pH b'+1:postag:CD'
2.198724 Supp b'lemma:glucose'
2.198724 Supp b'word:glucose'
2.169027 O b'+1:postag:RB'
2.146306 Supp b'lemma:nacl'
2.146306 Supp b'word:NaCl'
2.141305 O b'lemma:ompr'
2.141305 O b'word:OmpR'
2.134893 Gtype b'lemma:flag-tag'
2.134893 Gtype b'-1:lemma:c-terminal'
2.134893 Gtype b'word:Flag-tag'
2.134893 Gtype b'-1:word:C-terminal'
2.115770 Gversion b'lemma:nc'
2.115770 Gversion b'word:NC'
2.102355 Substrain b'lemma:mg1655'
2.102355 Substrain b'word:MG1655'
2.096735 O b'+1:lemma:pq'
2.096735 O b'+1:word:PQ'
2.076016 Gtype b'postag:-LRB-'
2.067392 O b'-1:word:Aerobic'
2.067015 Gtype b'lemma:delta-fnr'
2.067015 Gtype b'word:delta-fnr'
2.020747 O b'word:A'
2.011558 Supp b'lemma:rifampicin'
2.011558 Supp b'word:rifampicin'
2.009828 Technique b'lemma:rnaseq'
2.009828 Technique b'word:RNASeq'
1.995883 O b'+1:postag:NNP'
1.979049 Gtype b'-1:postag:VBG'
1.964073 O b'+1:word:were'
1.938262 O b'lemma:rep1'
1.938262 O b'word:rep1'
1.933374 Supp b'lemma:acetate'
1.933374 Supp b'word:acetate'
1.929172 O b'postag:VBN'
1.923161 Strain b'lemma:k-12'
1.923161 Strain b'word:K-12'
1.913934 Technique b'-1:lemma:chip-exo'
1.905918 O b'lemma:rep3'
1.905918 O b'word:rep3'
1.902460 Gtype b'-1:lemma:\xe2\x88\x86'
1.902460 Gtype b'-1:word:\xe2\x88\x86'
1.888089 Supp b'lemma:nitrate'
1.888089 Supp b'word:nitrate'
1.880361 Supp b'-1:lemma:with'
1.880361 Supp b'-1:word:with'
1.879119 O b'+1:word:ChIP-Seq'
1.873439 Gtype b'lemma:\xe2\x88\x86'
1.873439 Gtype b'word:\xe2\x88\x86'
1.869636 O b'-1:lemma:lb'
1.869636 O b'-1:word:LB'
1.865650 Gversion b'-1:lemma:nc'
1.865650 Gversion b'-1:word:NC'
1.865606 Gtype b'lemma:arca8myc'
1.865606 Gtype b'word:ArcA8myc'
1.864036 Agit b'+1:lemma:rpm'
1.864036 Agit b'+1:word:rpm'
1.856334 Agit b'lemma:rpm'
1.856334 Agit b'word:rpm'
1.839596 Anti b'lemma:seqa'
1.839596 Anti b'word:SeqA'
1.836163 Supp b'-1:postag:CC'
1.835200 O b'lemma:with'
1.835200 O b'word:with'
1.814125 O b'lemma:culture'
1.811651 Supp b'-1:lemma:Cra'
1.792538 Air b'-1:lemma:ChIP-Seq'
1.792538 Air b'-1:word:ChIP-Seq'
1.768358 Air b'lemma:aerobic'
1.762953 O b'isLower'
1.736496 Gtype b'word:cra'
1.714381 Gversion b'word:ChIP-Seq'
1.710946 Air b'-1:postag::'
1.709986 Gtype b'postag:NN'
1.698044 O b'lemma:at'
1.695726 Gtype b'lemma:\xce\xb4soxr'
1.695726 Gtype b'word:\xce\x94soxR'
1.678182 Vess b'lemma:flask'
1.678182 Vess b'-1:lemma:warm'
1.678182 Vess b'word:flask'
1.678182 Vess b'-1:word:warmed'
1.674493 Gtype b'lemma:pk4854'
1.674493 Gtype b'word:PK4854'
1.651636 Anti b'lemma:none'
1.651636 Anti b'word:none'
1.644746 O b'-1:lemma:0.3'
1.644746 O b'-1:word:0.3'
1.636558 Supp b'lemma:no3'
1.636558 Supp b'word:NO3'
1.624682 Gtype b'+1:postag::'
1.623664 Phase b'-1:lemma:mid-log'
1.623664 Phase b'-1:word:mid-log'
1.618042 Supp b'lemma:dpd'
1.618042 Supp b'word:DPD'
1.614435 Gtype b'lemma:\xce\xb4fur'
1.614435 Gtype b'word:\xce\x94fur'
1.613699 Gtype b'+1:lemma:ph5'
1.613699 Gtype b'+1:word:pH5'
1.607121 Gversion b'lemma:chip-seq'
1.597322 Anti b'lemma:\xcf\x8332'
1.597322 Anti b'word:\xcf\x8332'
1.592709 O b'lemma:Cra'
1.582551 Med b'lemma:lb'
1.582551 Med b'word:LB'
1.568363 Supp b'-1:lemma:vol'
1.568363 Supp b'-1:word:vol'
1.559468 Air b'lemma:Aerobic'
1.556342 Technique b'word:RNA-Seq'
1.553167 Med b'-1:lemma:ml'
1.553167 Med b'-1:word:ml'
1.553041 Med b'+1:lemma:0.4'
1.553041 Med b'+1:word:0.4'
1.551077 Supp b'lemma:Leu'
1.551077 Supp b'word:Leu'
1.543969 Air b'lemma:anaerobically'
1.543969 Air b'word:anaerobically'
1.543172 O b'lemma::'
1.543172 O b'word::'
1.540510 Anti b'+1:lemma:antibody'
1.540510 Anti b'+1:word:antibody'
1.537669 Air b'lemma:anaeroibc'
1.537669 Air b'word:Anaeroibc'
1.525613 OD b'-1:postag:IN'
1.524837 Supp b'lemma:Fe'
1.524837 Supp b'word:Fe'
1.524330 Gtype b'postag:-RRB-'
1.516209 O b'-1:lemma:glucose'
1.516209 O b'-1:word:glucose'
1.504553 Gtype b'+1:lemma:with'
1.504553 Gtype b'+1:word:with'
Top negative:
-0.149499 Anti b'+1:word:anti-Fur'
-0.152520 O b'+1:lemma:95'
-0.152520 O b'+1:word:95'
-0.156326 Supp b'+1:lemma:dpd'
-0.156326 Supp b'+1:word:DPD'
-0.162658 Supp b'lemma:10'
-0.162658 Supp b'word:10'
-0.163808 O b'+1:lemma:for'
-0.165890 Phase b'+1:postag:NN'
-0.166827 O b'-1:lemma:dfnr'
-0.166827 O b'-1:word:dFNR'
-0.171844 O b'+1:word:was'
-0.175747 O b'-1:lemma:of'
-0.175747 O b'-1:word:of'
-0.193766 O b'-1:word:from'
-0.208529 O b'-1:lemma:1m'
-0.208529 O b'-1:word:1M'
-0.210506 Air b'isLower'
-0.210844 Technique b'-1:postag::'
-0.216830 O b'+1:lemma:.'
-0.216830 O b'+1:postag:.'
-0.216830 O b'+1:word:.'
-0.223522 O b'-1:lemma:final'
-0.223522 O b'-1:word:final'
-0.223753 Med b'-1:postag:CD'
-0.233131 O b'-1:postag:JJ'
-0.239249 OD b'isNumber'
-0.243733 O b'lemma:20'
-0.243733 O b'word:20'
-0.246275 Air b'-1:postag:CC'
-0.247467 O b'lemma:k-12'
-0.247467 O b'word:K-12'
-0.265976 O b'lemma:glucose'
-0.265976 O b'word:glucose'
-0.271369 O b'+1:word:C'
-0.274901 O b'-1:lemma:the'
-0.284581 O b'-1:word:the'
-0.284785 O b'+1:lemma:supplement'
-0.284785 O b'+1:word:supplemented'
-0.285764 pH b'postag:NN'
-0.293864 pH b'isUpper'
-0.308118 Air b'-1:lemma:or'
-0.308118 Air b'-1:word:or'
-0.310139 Gtype b'-1:postag:CD'
-0.322263 Supp b'postag:CC'
-0.324104 O b'-1:lemma:cra'
-0.328286 O b'lemma:37'
-0.328286 O b'word:37'
-0.330168 O b'word:ChIP-exo'
-0.334656 O b'-1:lemma:37'
-0.334656 O b'-1:word:37'
-0.340095 Anti b'isUpper'
-0.342430 O b'-1:lemma:n2'
-0.342430 O b'-1:word:N2'
-0.348188 O b'-1:lemma:mm'
-0.348188 O b'-1:word:mM'
-0.350821 Supp b'+1:lemma:fructose'
-0.350821 Supp b'+1:word:fructose'
-0.352833 O b'+1:postag:IN'
-0.355440 O b'-1:postag:-LRB-'
-0.358948 O b'+1:postag:-LRB-'
-0.368508 O b'lemma:fructose'
-0.368508 O b'word:fructose'
-0.369578 Gtype b'+1:postag:CD'
-0.370939 O b'lemma:aerobically'
-0.370939 O b'word:aerobically'
-0.372723 O b'lemma:\xce\xb4fur'
-0.372723 O b'word:\xce\x94fur'
-0.374885 Supp b'+1:lemma:acetate'
-0.374885 Supp b'+1:word:acetate'
-0.383718 O b'-1:lemma:ph'
-0.383718 O b'-1:word:pH'
-0.388799 O b'-1:lemma:rpob'
-0.388799 O b'-1:word:RpoB'
-0.388879 O b'word:cells'
-0.392268 Supp b'-1:lemma:-lrb-'
-0.392268 Supp b'-1:word:-LRB-'
-0.394884 O b'+1:postag:-RRB-'
-0.401778 O b'lemma:minimal'
-0.401778 O b'word:minimal'
-0.405443 O b'+1:lemma:2.0'
-0.405443 O b'+1:word:2.0'
-0.409365 O b'lemma:medium'
-0.409365 O b'word:medium'
-0.416217 O b'-1:lemma:until'
-0.416217 O b'-1:word:until'
-0.422801 O b'+1:lemma:until'
-0.422801 O b'+1:word:until'
-0.422973 O b'lemma:nitrate'
-0.422973 O b'word:nitrate'
-0.424570 O b'-1:lemma:co2'
-0.424570 O b'-1:word:CO2'
-0.426175 Supp b'-1:postag:-LRB-'
-0.430390 O b'-1:lemma:dissolve'
-0.430390 O b'+1:lemma:methanol'
-0.430390 O b'-1:word:dissolved'
-0.430390 O b'+1:word:methanol'
-0.431326 O b'-1:lemma:chip-exo'
-0.434536 Med b'-1:postag:NN'
-0.437879 Supp b'+1:lemma:nacl'
-0.437879 Supp b'+1:word:NaCl'
-0.438271 Gtype b'+1:lemma:-lrb-'
-0.438271 Gtype b'+1:word:-LRB-'
-0.517777 O b'lemma:0.3'
-0.517777 O b'word:0.3'
-0.554787 O b'+1:lemma:+'
-0.554787 O b'+1:word:+'
-0.556538 Med b'+1:postag:IN'
-0.560443 Med b'-1:postag:IN'
-0.564073 Phase b'isUpper'
-0.567749 O b'lemma:mid-log'
-0.567749 O b'word:mid-log'
-0.569865 O b'+1:lemma:mm'
-0.569865 O b'+1:word:mM'
-0.574929 O b'+1:lemma:at'
-0.574929 O b'+1:word:at'
-0.576720 O b'lemma:methanol'
-0.576720 O b'word:methanol'
-0.582128 O b'-1:lemma:\xe2\x88\x86'
-0.582128 O b'-1:word:\xe2\x88\x86'
-0.586115 O b'lemma:anaerobically'
-0.586115 O b'word:anaerobically'
-0.586317 pH b'isLower'
-0.602158 O b'lemma:30'
-0.602158 O b'word:30'
-0.604997 O b'+1:lemma:g/l'
-0.604997 O b'+1:word:g/L'
-0.607721 O b'+1:lemma:1m'
-0.607721 O b'+1:word:1M'
-0.633619 O b'-1:postag:IN'
-0.635683 O b'-1:lemma:30'
-0.635683 O b'-1:word:30'
-0.637769 O b'-1:lemma:ml'
-0.637769 O b'-1:word:ml'
-0.669793 Air b'postag:NN'
-0.682250 O b'+1:word:ChIP-exo'
-0.688742 O b'lemma:of'
-0.688742 O b'word:of'
-0.712089 Air b'+1:postag:JJ'
-0.712952 O b'-1:lemma:1'
-0.712952 O b'-1:word:1'
-0.726205 O b'lemma:2h'
-0.726205 O b'-1:lemma:additional'
-0.726205 O b'word:2h'
-0.726205 O b'-1:word:additional'
-0.727554 O b'-1:postag::'
-0.740726 O b'-1:lemma:nsrr'
-0.740726 O b'-1:word:NsrR'
-0.747068 Temp b'postag:NN'
-0.802318 O b'lemma:nitrogen'
-0.802318 O b'word:nitrogen'
-0.820825 Supp b'+1:postag:VBN'
-0.826345 O b'lemma:media'
-0.826345 O b'word:media'
-0.827286 O b'-1:lemma:2'
-0.827286 O b'-1:word:2'
-0.831918 O b'-1:lemma:IP'
-0.831918 O b'-1:word:IP'
-0.838607 O b'-1:lemma:fresh'
-0.838607 O b'-1:word:fresh'
-0.854504 O b'lemma:wt'
-0.854588 Supp b'-1:postag:NNP'
-0.874101 O b'postag:RB'
-0.937975 Agit b'isUpper'
-0.956352 O b'+1:lemma:in'
-0.956352 O b'+1:word:in'
-0.979766 O b'+1:lemma:1'
-0.979766 O b'+1:word:1'
-0.982287 Gtype b'isLower'
-0.999251 Technique b'isNumber'
-1.074766 O b'postag:VBP'
-1.110753 Gtype b'isUpper'
-1.116498 O b'+1:postag:VBG'
-1.181571 Supp b'+1:postag:-LRB-'
-1.192346 O b'-1:lemma:sample'
-1.216569 Supp b'+1:lemma:-lrb-'
-1.216569 Supp b'+1:word:-LRB-'
-1.249356 Technique b'isLower'
-1.267812 O b'+1:lemma:2'
-1.267812 O b'+1:word:2'
-1.275807 O b'lemma:rifampicin'
-1.275807 O b'word:rifampicin'
-1.320279 Gtype b'isNumber'
-1.328236 Gversion b'isLower'
-1.369132 O b'-1:lemma:vol'
-1.369132 O b'-1:word:vol'
-1.376089 Supp b'+1:lemma:,'
-1.376089 Supp b'+1:postag:,'
-1.376089 Supp b'+1:word:,'
-1.376957 Phase b'postag:JJ'
-1.401753 OD b'+1:postag:NN'
-1.485274 Anti b'postag:NNP'
-1.787019 Supp b'postag:JJ'
-1.891720 Temp b'+1:postag:IN'
-2.179250 O b'-1:lemma:_'
-2.179250 O b'-1:word:_'
-2.181717 Phase b'-1:postag:JJ'
-2.190768 O b'-1:postag:VBG'
-2.226803 O b'-1:lemma::'
-2.226803 O b'-1:word::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.04937325798482469, 'c2': 0.021347060592283952}
best CV score:0.8727912197138052
model size: 0.16M
Flat F1: 0.7834470810208732
precision recall f1-score support
OD 0.857 0.818 0.837 22
pH 1.000 1.000 1.000 8
Technique 1.000 0.913 0.955 23
Med 1.000 0.943 0.971 53
Temp 1.000 0.621 0.766 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.882 1.000 0.938 15
Air 0.556 0.362 0.439 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.885 0.812 0.847 85
Substrain 0.000 0.000 0.000 0
Supp 0.735 0.806 0.769 134
Gversion 0.000 0.000 0.000 0
avg / total 0.818 0.763 0.783 451
Top likely transitions:
Temp -> Temp 5.697885
Med -> Med 5.203676
Supp -> Supp 5.023623
Anti -> Anti 4.919423
Agit -> Agit 4.878209
OD -> OD 4.490452
Gtype -> Gtype 4.342655
Phase -> Phase 4.265502
Gversion -> Gversion 4.228866
O -> O 3.927755
Air -> Air 3.904248
Technique -> Technique 3.337823
pH -> pH 3.047130
Substrain -> Gtype 0.928380
O -> Technique 0.767492
O -> Supp 0.345238
O -> Gtype 0.335711
Technique -> Air 0.302367
Air -> O 0.238748
Gtype -> Supp 0.106918
Temp -> O 0.099174
O -> Anti 0.028918
O -> Temp 0.024359
Gtype -> Air 0.000872
O -> Strain -0.006968
OD -> Med -0.016869
Vess -> O -0.025460
Air -> Agit -0.030216
Technique -> Supp -0.054732
Gtype -> Temp -0.066953
Air -> Technique -0.074730
Phase -> Gtype -0.080334
O -> Phase -0.082037
Anti -> Supp -0.094817
O -> Agit -0.122548
Anti -> Gtype -0.136175
Agit -> Air -0.139559
Phase -> Technique -0.151160
Gtype -> Gversion -0.170913
Supp -> Phase -0.196758
Supp -> O -0.208933
Agit -> O -0.223185
Supp -> Technique -0.223482
Phase -> OD -0.285779
Supp -> Gversion -0.304736
Technique -> OD -0.319741
Supp -> Anti -0.336780
Med -> O -0.359992
Temp -> Med -0.365995
Supp -> Air -0.395049
Top unlikely transitions:
Vess -> O -0.025460
Air -> Agit -0.030216
Technique -> Supp -0.054732
Gtype -> Temp -0.066953
Air -> Technique -0.074730
Phase -> Gtype -0.080334
O -> Phase -0.082037
Anti -> Supp -0.094817
O -> Agit -0.122548
Anti -> Gtype -0.136175
Agit -> Air -0.139559
Phase -> Technique -0.151160
Gtype -> Gversion -0.170913
Supp -> Phase -0.196758
Supp -> O -0.208933
Agit -> O -0.223185
Supp -> Technique -0.223482
Phase -> OD -0.285779
Supp -> Gversion -0.304736
Technique -> OD -0.319741
Supp -> Anti -0.336780
Med -> O -0.359992
Temp -> Med -0.365995
Supp -> Air -0.395049
Anti -> Temp -0.433807
Air -> Med -0.443745
Technique -> Gtype -0.461980
Anti -> O -0.477920
Phase -> Med -0.496773
Phase -> Supp -0.504111
Supp -> Gtype -0.542703
Air -> Supp -0.549017
OD -> Air -0.590463
Gtype -> Technique -0.617626
OD -> O -0.633869
Gversion -> O -0.665626
Technique -> pH -0.667545
Phase -> O -0.674680
Air -> Phase -0.701860
Air -> Temp -0.825208
Gtype -> Anti -0.861670
O -> Med -0.887587
O -> OD -1.001120
Technique -> O -1.008992
Gtype -> Med -1.115268
Supp -> Med -1.155087
Gtype -> O -1.446717
O -> Air -1.553604
Substrain -> O -1.560176
Med -> Supp -1.895924
Top positive:
4.079246 Gtype b'lemma[:1]:\xce\xb4'
3.875929 Technique b'lemma[:2]:Ch'
3.769086 O b'lemma[:2]:re'
3.421852 Air b'word:Aerobic'
3.297057 O b'word:Cra'
3.280691 Air b'lemma:anaerobic'
3.279696 O b'lemma:_'
3.279696 O b'lemma[:1]:_'
3.279696 O b'word:_'
3.054016 O b'-1:word:tag'
2.950170 O b'-1:lemma:tag'
2.923311 Supp b'-1:word:Cra'
2.917103 O b'lemma:-'
2.917103 O b'word:-'
2.673438 O b'lemma:2'
2.673438 O b'word:2'
2.601916 Anti b'lemma[:2]:an'
2.548804 Air b'word:Anaerobic'
2.540901 O b'lemma:with'
2.540901 O b'word:with'
2.527977 Supp b'+1:lemma:\xc2\xb5m'
2.527977 Supp b'+1:word:\xc2\xb5M'
2.513465 Gtype b'lemma[:2]:pk'
2.467865 Air b'lemma[:2]:ae'
2.465142 Supp b'-1:lemma:vol'
2.465142 Supp b'-1:word:vol'
2.430821 O b'word:A'
2.414676 O b'+1:lemma:pq'
2.414676 O b'+1:word:PQ'
2.381594 Supp b'lemma:arginine'
2.365113 Strain b'+1:lemma:substr'
2.365113 Strain b'+1:word:substr'
2.358430 O b'lemma:3'
2.358430 O b'word:3'
2.281060 O b'+1:postag:RB'
2.274077 Gtype b'lemma:wt'
2.274077 Gtype b'lemma[:2]:wt'
2.271330 Gtype b'lemma[:1]:w'
2.262507 Gtype b'lemma[:2]:de'
2.230752 O b'-1:word:Aerobic'
2.223212 Gtype b'lemma[:2]:ar'
2.204027 Gtype b'word:WT'
2.203708 Supp b'lemma:pq'
2.203708 Supp b'lemma[:2]:pq'
2.203708 Supp b'word:PQ'
2.195032 Technique b'lemma[:2]:rn'
2.177901 Air b'lemma[:2]:an'
2.152688 O b'lemma[:2]:ge'
2.152187 O b'lemma:1'
2.152187 O b'word:1'
2.147937 Phase b'lemma:stationary'
2.147937 Phase b'word:stationary'
2.139876 Phase b'lemma:mid-log'
2.139876 Phase b'word:mid-log'
2.082317 O b'lemma:Custom'
2.082317 O b'lemma[:2]:Cu'
2.082317 O b'word:Custom'
2.081417 Technique b'lemma[:1]:C'
2.059825 O b'lemma:b'
2.059825 O b'word:B'
2.051640 Technique b'word:ChIPSeq'
2.044050 Supp b'lemma:Iron'
2.044050 Supp b'lemma[:2]:Ir'
2.044050 Supp b'word:Iron'
2.044050 Supp b'+1:word:Deficient'
2.034480 O b'-1:lemma:ChIP-exo'
2.021616 Technique b'lemma[:2]:ch'
2.004928 Substrain b'lemma[:2]:mg'
1.992585 Gtype b'lemma[:1]:f'
1.988583 Gtype b'lemma[:2]:cr'
1.983400 O b'+1:postag:NNP'
1.976657 O b'lemma[:1]:h'
1.975191 O b'-1:lemma:lb'
1.975191 O b'-1:word:LB'
1.973175 Supp b'lemma[:1]:I'
1.969412 O b'lemma:.'
1.969412 O b'postag:.'
1.969412 O b'postag[:1]:.'
1.969412 O b'word:.'
1.954569 Phase b'lemma[:2]:ex'
1.946311 O b'lemma:delta'
1.946311 O b'word:delta'
1.942336 Supp b'postag:VBP'
1.917047 Technique b'word:ChIP-Seq'
1.916217 Supp b'-1:lemma:Cra'
1.899702 Supp b'lemma:rifampicin'
1.899702 Supp b'word:rifampicin'
1.898554 Supp b'-1:lemma:with'
1.898554 Supp b'-1:word:with'
1.892222 Air b'lemma[:1]:a'
1.892185 Supp b'lemma[:2]:ri'
1.887353 Technique b'lemma:chipseq'
1.852638 O b'-1:lemma:0.3'
1.852638 O b'-1:word:0.3'
1.839355 pH b'lemma[:2]:ph'
1.838181 Gtype b'hGreek'
1.837034 O b'-1:lemma:glucose'
1.837034 O b'-1:word:glucose'
1.821428 Gtype b'lemma:arca8myc'
1.821428 Gtype b'word:ArcA8myc'
1.821045 Med b'lemma:MOPS'
1.821045 Med b'lemma[:1]:M'
1.821045 Med b'lemma[:2]:MO'
1.821045 Med b'word:MOPS'
1.813601 Technique b'-1:lemma:chip-exo'
1.811851 Gversion b'word:ChIP-Seq'
1.810109 O b'lemma:ompr'
1.810109 O b'word:OmpR'
1.807989 Supp b'-1:lemma:+'
1.807989 Supp b'-1:word:+'
1.805457 OD b'lemma[:1]:o'
1.789186 Gtype b'lemma[:1]:t'
1.787157 Supp b'lemma[:2]:gl'
1.784556 Gtype b'symb'
1.769736 Gversion b'lemma:chip-seq'
1.769144 O b'lemma[:2]:om'
1.756478 Temp b'+1:lemma:in'
1.756478 Temp b'+1:word:in'
1.750825 O b'lemma:rpob'
1.750825 O b'word:RpoB'
1.744454 Gversion b'-1:lemma:nc'
1.744454 Gversion b'-1:word:NC'
1.743983 Technique b'lemma:ChIP-exo'
1.738988 Gtype b'-1:lemma:\xe2\x88\x86'
1.738988 Gtype b'-1:word:\xe2\x88\x86'
1.737321 Supp b'lemma[:2]:30'
1.719994 Gversion b'lemma[:2]:00'
1.691622 Med b'+1:lemma:0.4'
1.691622 Med b'+1:word:0.4'
1.690183 Supp b'lemma:acetate'
1.690183 Supp b'word:acetate'
1.680098 Gtype b'lemma[:1]:W'
1.669697 Phase b'lemma[:1]:e'
1.654569 O b'-1:lemma:into'
1.654569 O b'-1:word:into'
1.646155 Agit b'+1:lemma:rpm'
1.646155 Agit b'+1:word:rpm'
1.639372 Supp b'lemma[:2]:ni'
1.637838 Supp b'-1:postag:CC'
1.633069 Med b'-1:lemma:ml'
1.633069 Med b'-1:word:ml'
1.626970 Gtype b'-1:postag:VBG'
1.625020 O b'+1:word:ChIP-Seq'
1.592935 Air b'lemma[:1]:A'
1.581837 Supp b'lemma[:2]:ac'
1.581166 Gversion b'lemma:asm584v2'
1.581166 Gversion b'word:ASM584v2'
1.576758 Gversion b'lemma[:2]:as'
1.566133 Air b'lemma:Aerobic'
1.566133 Air b'lemma[:2]:Ae'
1.556942 Air b'+1:postag:IN'
1.540738 Air b'postag:RB'
1.540738 Air b'postag[:1]:R'
1.540738 Air b'postag[:2]:RB'
1.537191 O b'-1:lemma:phase'
1.537191 O b'-1:word:phase'
1.528090 Air b'-1:lemma:-'
1.528090 Air b'-1:word:-'
1.527198 Gtype b'lemma:type'
1.527198 Gtype b'lemma[:2]:ty'
1.527198 Gtype b'word:type'
1.520984 O b'lemma[:1]:C'
1.511967 Air b'-1:postag::'
1.501971 O b'postag::'
1.501971 O b'postag[:1]::'
1.500818 Gtype b'lemma[:2]:wi'
1.494237 O b'+1:word:were'
1.488912 O b'postag:CC'
1.488912 O b'postag[:2]:CC'
1.488215 Agit b'lemma:rpm'
1.488215 Agit b'word:rpm'
1.486647 Supp b'+1:lemma:1'
1.486647 Supp b'+1:word:1'
1.483688 Temp b'-1:lemma:\xcf\x8332'
1.483688 Temp b'-1:word:\xcf\x8332'
1.483025 Air b'word:anaerobic'
1.481168 O b'lemma:a'
1.480233 Gversion b'lemma:nc'
1.480233 Gversion b'lemma[:2]:nc'
1.480233 Gversion b'word:NC'
1.465788 O b'-1:lemma:anaerobic'
1.464120 O b'+1:lemma:od600'
1.464120 O b'+1:word:OD600'
1.459126 O b'+1:postag:VBN'
1.449825 Phase b'lemma:exponential'
1.449825 Phase b'word:exponential'
1.443892 O b'-1:lemma:0.3-0.35'
1.443892 O b'-1:word:0.3-0.35'
1.429105 Technique b'symb'
1.428375 O b'lemma[:2]:ga'
1.423820 Technique b'word:ChIP-exo'
1.422037 O b'lemma:chip'
1.420614 O b'isNumber'
1.417035 pH b'+1:postag:CD'
1.414508 Phase b'lemma[:2]:st'
1.410028 O b'isLower'
1.399604 Med b'+1:postag:CD'
1.398736 Med b'+1:lemma:2.0'
1.398736 Med b'+1:word:2.0'
1.374106 O b'-1:lemma:wt'
Top negative:
-0.314411 Supp b'postag[:1]:N'
-0.314411 Supp b'postag[:2]:NN'
-0.316666 OD b'+1:lemma:0.4'
-0.316666 OD b'+1:word:0.4'
-0.319400 O b'+1:word:for'
-0.319616 Substrain b'isLower'
-0.329221 O b'+1:lemma:supplement'
-0.329221 O b'+1:word:supplemented'
-0.334710 Gtype b'+1:lemma:-rrb-'
-0.334710 Gtype b'+1:word:-RRB-'
-0.335324 Supp b'+1:lemma:nacl'
-0.335324 Supp b'+1:word:NaCl'
-0.338195 Phase b'lemma[:2]:pa'
-0.343360 Med b'+1:postag:NNS'
-0.343534 O b'+1:postag:IN'
-0.344316 Temp b'postag:NN'
-0.345297 O b'lemma:wt'
-0.345297 O b'lemma[:2]:wt'
-0.355887 Air b'isLower'
-0.356867 O b'-1:lemma:from'
-0.358831 O b'lemma:c'
-0.359856 O b'lemma:37'
-0.359856 O b'lemma[:2]:37'
-0.359856 O b'word:37'
-0.360624 O b'-1:lemma:final'
-0.360624 O b'-1:word:final'
-0.361530 Med b'-1:postag:NN'
-0.362367 Technique b'-1:postag::'
-0.362612 O b'lemma:of'
-0.362612 O b'lemma[:2]:of'
-0.362612 O b'word:of'
-0.368278 O b'-1:word:from'
-0.374267 O b'-1:lemma:30'
-0.374267 O b'-1:word:30'
-0.376045 O b'lemma:glucose'
-0.376045 O b'word:glucose'
-0.379443 Gtype b'lemma[:2]:rp'
-0.382310 Gtype b'lemma[:1]:s'
-0.383855 Supp b'-1:lemma:-lrb-'
-0.383855 Supp b'-1:word:-LRB-'
-0.385029 O b'-1:lemma:delta'
-0.385029 O b'-1:word:delta'
-0.390702 O b'-1:lemma:dissolve'
-0.390702 O b'+1:lemma:methanol'
-0.390702 O b'-1:word:dissolved'
-0.390702 O b'+1:word:methanol'
-0.395103 O b'-1:lemma:nsrr'
-0.395103 O b'-1:word:NsrR'
-0.399202 Supp b'lemma[:1]:a'
-0.403898 O b'-1:lemma:1'
-0.403898 O b'-1:word:1'
-0.404014 Temp b'hGreek'
-0.406161 O b'-1:lemma:\xe2\x88\x86'
-0.406161 O b'-1:word:\xe2\x88\x86'
-0.417535 O b'-1:postag:IN'
-0.418404 Supp b'-1:postag:-LRB-'
-0.428053 O b'-1:lemma:ph'
-0.428053 O b'-1:word:pH'
-0.432185 Supp b'postag:JJ'
-0.432252 O b'lemma[:1]:L'
-0.435862 Air b'-1:lemma:or'
-0.435862 Air b'-1:word:or'
-0.437167 Supp b'postag[:1]:J'
-0.437167 Supp b'postag[:2]:JJ'
-0.439673 Vess b'hUpper'
-0.439673 Vess b'hLower'
-0.443285 O b'lemma[:2]:gl'
-0.444141 Temp b'isLower'
-0.449813 Supp b'lemma[:2]:an'
-0.455745 O b'-1:lemma:37'
-0.455745 O b'-1:word:37'
-0.465238 O b'lemma[:2]:ni'
-0.469627 Supp b'lemma[:1]:s'
-0.472157 Air b'+1:postag:JJ'
-0.477980 O b'lemma:2h'
-0.477980 O b'-1:lemma:additional'
-0.477980 O b'lemma[:2]:2h'
-0.477980 O b'word:2h'
-0.477980 O b'-1:word:additional'
-0.493105 Agit b'symb'
-0.493489 O b'lemma:methanol'
-0.493489 O b'word:methanol'
-0.495166 O b'lemma:mid-log'
-0.495166 O b'word:mid-log'
-0.497445 Air b'-1:postag:JJ'
-0.499724 O b'+1:lemma:g/l'
-0.499724 O b'+1:word:g/L'
-0.501348 O b'+1:lemma:mm'
-0.501348 O b'+1:word:mM'
-0.507557 O b'lemma[:2]:me'
-0.508454 Gtype b'postag[:1]:V'
-0.508454 Gtype b'postag[:2]:VB'
-0.508460 Gtype b'postag::'
-0.508460 Gtype b'postag[:1]::'
-0.529784 O b'+1:lemma:at'
-0.529784 O b'+1:word:at'
-0.537465 O b'lemma[:1]:0'
-0.542594 Supp b'-1:lemma:.'
-0.542594 Supp b'-1:postag:.'
-0.542594 Supp b'-1:word:.'
-0.546062 O b'lemma[:1]:4'
-0.548259 O b'-1:lemma:rpob'
-0.548259 O b'-1:word:RpoB'
-0.552281 O b'-1:lemma:IP'
-0.552281 O b'-1:word:IP'
-0.553960 Phase b'postag:JJ'
-0.562367 O b'lemma:0.3'
-0.562367 O b'word:0.3'
-0.573524 Med b'-1:postag:CD'
-0.574613 Technique b'postag:NN'
-0.578197 O b'-1:lemma:chip-exo'
-0.579399 O b'+1:lemma:1m'
-0.579399 O b'+1:word:1M'
-0.584655 O b'word:ChIP-exo'
-0.585801 O b'lemma:soxs-8myc'
-0.585801 O b'word:soxS-8myc'
-0.599558 O b'lemma[:2]:ar'
-0.600604 O b'word:cells'
-0.606899 O b'+1:lemma:until'
-0.606899 O b'+1:word:until'
-0.607019 Air b'symb'
-0.640639 pH b'isNumber'
-0.656724 O b'+1:word:ChIP-exo'
-0.664247 O b'+1:lemma:2.0'
-0.664247 O b'+1:word:2.0'
-0.664520 Med b'+1:postag:IN'
-0.664576 O b'-1:lemma:co2'
-0.664576 O b'-1:word:CO2'
-0.665370 O b'+1:lemma:rep2'
-0.665370 O b'+1:word:rep2'
-0.671212 O b'lemma[:2]:mg'
-0.724471 Agit b'hUpper'
-0.724471 Agit b'hLower'
-0.739501 O b'lemma[:1]:I'
-0.755281 O b'lemma[:2]:30'
-0.762653 pH b'isLower'
-0.780673 Air b'postag:NN'
-0.793295 Supp b'symb'
-0.826875 O b'-1:postag::'
-0.853583 O b'lemma[:2]:ri'
-0.880894 O b'-1:lemma:until'
-0.880894 O b'-1:word:until'
-0.884934 Supp b'+1:postag:VBN'
-0.893108 O b'lemma[:1]:k'
-0.902922 O b'lemma[:2]:ae'
-0.926126 O b'lemma:rifampicin'
-0.926126 O b'word:rifampicin'
-0.929148 Med b'-1:postag:IN'
-0.936796 Anti b'postag:NNP'
-0.952559 Supp b'-1:postag:NNP'
-0.962974 Med b'symb'
-0.981592 Technique b'isNumber'
-1.019994 O b'-1:lemma:2'
-1.019994 O b'-1:word:2'
-1.023240 O b'+1:lemma:1'
-1.023240 O b'+1:word:1'
-1.023642 O b'-1:lemma:ml'
-1.023642 O b'-1:word:ml'
-1.035572 Phase b'hUpper'
-1.035572 Phase b'hLower'
-1.036376 O b'lemma[:1]:n'
-1.044060 Gtype b'lemma[:1]:c'
-1.051632 Phase b'postag[:1]:J'
-1.051632 Phase b'postag[:2]:JJ'
-1.074102 Gtype b'lemma[:1]:a'
-1.077239 O b'+1:lemma:+'
-1.077239 O b'+1:word:+'
-1.084820 O b'+1:lemma:in'
-1.084820 O b'+1:word:in'
-1.107631 Gtype b'isNumber'
-1.117617 Supp b'+1:postag:-LRB-'
-1.129026 Technique b'isLower'
-1.149735 Supp b'hGreek'
-1.156144 Supp b'+1:lemma:-lrb-'
-1.156144 Supp b'+1:word:-LRB-'
-1.229045 Gtype b'lemma[:1]:r'
-1.231933 O b'+1:postag:VBG'
-1.262559 Gversion b'isLower'
-1.277633 O b'-1:lemma:fresh'
-1.277633 O b'-1:word:fresh'
-1.381763 OD b'+1:postag:NN'
-1.472372 O b'+1:lemma:2'
-1.472372 O b'+1:word:2'
-1.571807 Gtype b'isUpper'
-1.677845 Supp b'lemma[:1]:c'
-1.687951 O b'-1:lemma:vol'
-1.687951 O b'-1:word:vol'
-1.695736 Supp b'+1:lemma:,'
-1.695736 Supp b'+1:postag:,'
-1.695736 Supp b'+1:word:,'
-1.723752 O b'-1:lemma:sample'
-1.785262 Phase b'-1:postag:JJ'
-1.895238 O b'postag:VBP'
-1.908071 O b'-1:postag:VBG'
-2.171310 O b'-1:lemma:_'
-2.171310 O b'-1:word:_'
-2.491839 O b'lemma[:2]:fl'
-2.929187 O b'-1:lemma::'
-2.929187 O b'-1:word::'
-2.957042 Temp b'+1:postag:IN'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.14521637026904505, 'c2': 0.02160263268998293}
best CV score:0.8705560896194018
model size: 0.08M
Flat F1: 0.7637642434421422
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 0.952 0.870 0.909 23
Med 1.000 0.925 0.961 53
Temp 0.923 0.828 0.873 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.875 0.933 0.903 15
Air 0.545 0.348 0.425 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.857 0.847 0.852 85
Substrain 0.000 0.000 0.000 0
Supp 0.609 0.791 0.688 134
Gversion 0.000 0.000 0.000 0
avg / total 0.774 0.769 0.764 451
Top likely transitions:
Temp -> Temp 5.770024
Agit -> Agit 5.591940
Med -> Med 5.238745
OD -> OD 5.208128
Supp -> Supp 5.009566
Anti -> Anti 4.435661
Phase -> Phase 4.266553
Air -> Air 4.183832
Gversion -> Gversion 4.181832
Gtype -> Gtype 4.159817
O -> O 4.029919
Technique -> Technique 3.397241
pH -> pH 2.835139
Substrain -> Gtype 1.746378
Gtype -> Supp 1.354961
Air -> O 1.123193
O -> Technique 0.956245
O -> Supp 0.860304
Technique -> Air 0.720188
Gtype -> Air 0.705752
Supp -> O 0.675087
Med -> O 0.607714
O -> Gtype 0.602507
Temp -> O 0.586988
Gtype -> pH 0.132889
O -> Phase 0.103874
O -> Anti 0.059111
O -> Med 0.043976
O -> Temp 0.037475
Phase -> O 0.007325
OD -> O 0.001147
Temp -> Med -0.004820
O -> OD -0.007911
Air -> Agit -0.018394
Technique -> O -0.037749
Air -> Supp -0.052877
Supp -> Gtype -0.118891
Gtype -> Technique -0.146943
Air -> Gtype -0.162470
OD -> Air -0.407752
Supp -> Med -0.519051
Gtype -> Med -0.520149
Gtype -> O -0.569447
O -> Air -0.679735
Phase -> OD -0.846881
Substrain -> O -1.190575
Technique -> Gtype -1.334054
Med -> Supp -1.751616
Top unlikely transitions:
Temp -> Temp 5.770024
Agit -> Agit 5.591940
Med -> Med 5.238745
OD -> OD 5.208128
Supp -> Supp 5.009566
Anti -> Anti 4.435661
Phase -> Phase 4.266553
Air -> Air 4.183832
Gversion -> Gversion 4.181832
Gtype -> Gtype 4.159817
O -> O 4.029919
Technique -> Technique 3.397241
pH -> pH 2.835139
Substrain -> Gtype 1.746378
Gtype -> Supp 1.354961
Air -> O 1.123193
O -> Technique 0.956245
O -> Supp 0.860304
Technique -> Air 0.720188
Gtype -> Air 0.705752
Supp -> O 0.675087
Med -> O 0.607714
O -> Gtype 0.602507
Temp -> O 0.586988
Gtype -> pH 0.132889
O -> Phase 0.103874
O -> Anti 0.059111
O -> Med 0.043976
O -> Temp 0.037475
Phase -> O 0.007325
OD -> O 0.001147
Temp -> Med -0.004820
O -> OD -0.007911
Air -> Agit -0.018394
Technique -> O -0.037749
Air -> Supp -0.052877
Supp -> Gtype -0.118891
Gtype -> Technique -0.146943
Air -> Gtype -0.162470
OD -> Air -0.407752
Supp -> Med -0.519051
Gtype -> Med -0.520149
Gtype -> O -0.569447
O -> Air -0.679735
Phase -> OD -0.846881
Substrain -> O -1.190575
Technique -> Gtype -1.334054
Med -> Supp -1.751616
Top positive:
6.295982 O b'lemma:2'
6.088107 O b'lemma:1'
5.499374 Phase b'lemma:mid-log'
5.468207 Anti b'-2:lemma:antibody'
5.389740 O b'-2:lemma:_'
5.310900 O b'lemma:_'
5.124562 Air b'lemma:anaerobic'
5.038711 Gtype b'lemma:wt'
4.975908 Supp b'lemma:pq'
4.858798 O b'lemma:3'
4.706721 Technique b'lemma:chipseq'
4.694852 Air b'lemma:Aerobic'
4.682998 Gtype b'lemma:type'
4.617298 O b'postag:IN'
4.544190 O b'-2:lemma:flagtag'
4.540916 Technique b'lemma:ChIP-exo'
4.378201 Gtype b'lemma:\xce\xb4cra'
4.141678 Air b'lemma:aerobic'
4.102798 Gtype b'lemma:\xe2\x88\x86'
4.083658 Gtype b'+1:lemma:type'
4.039276 Med b'lemma:MOPS'
3.938693 Technique b'lemma:rna-seq'
3.794686 O b'lemma:rpob'
3.792227 O b'-1:lemma:ChIP-exo'
3.784855 O b'lemma:-'
3.774100 O b'lemma:b'
3.764975 Supp b'lemma:acetate'
3.691796 Gtype b'-2:lemma:genotype/variation'
3.672526 Supp b'lemma:Iron'
3.672526 Supp b'-2:lemma:Anaerobic'
3.617838 Technique b'lemma:chip-seq'
3.604759 Supp b'lemma:no3'
3.568114 Supp b'lemma:nacl'
3.558608 Phase b'-2:lemma:phase'
3.526017 O b'lemma:.'
3.526017 O b'postag:.'
3.449878 Gtype b'-1:lemma:\xe2\x88\x86'
3.446438 Supp b'lemma:glucose'
3.403359 Med b'lemma:lb'
3.365642 Gtype b'lemma:wild-type'
3.350130 Air b'postag:RB'
3.342253 O b'+2:lemma:\xc2\xb0c'
3.335770 Supp b'lemma:nh4cl'
3.326472 O b'lemma:Cra'
3.318880 Gtype b'lemma:\xce\xb4fur'
3.295001 O b'postag:CC'
3.262632 O b'postag:VBN'
3.206029 O b'lemma:a'
3.195357 Gtype b'-2:lemma:genotype'
3.193335 Technique b'+2:lemma:ph5'
3.187683 O b'postag::'
3.127353 Gtype b'lemma:flag-tag'
3.127353 Gtype b'-1:lemma:c-terminal'
3.121057 Supp b'lemma:nitrate'
3.112065 Gtype b'lemma:\xce\xb4soxr'
3.087827 Substrain b'lemma:mg1655'
3.047270 Supp b'+2:lemma:iptg'
3.025373 Supp b'-1:lemma:Cra'
2.966835 Air b'-1:lemma:ChIP-Seq'
2.963423 Supp b'-1:lemma:with'
2.920292 Gtype b'lemma:dfnr'
2.878192 O b'lemma:with'
2.811966 O b'+1:lemma:arca-8myc'
2.806521 O b'-2:lemma:medium'
2.792197 Gtype b'-2:lemma:affyexp'
2.777524 Gversion b'lemma:chip-seq'
2.764298 Technique b'lemma:rnaseq'
2.751592 O b'-1:lemma:tag'
2.744091 Air b'-2:lemma:IP'
2.741300 Supp b'+1:lemma:1'
2.735516 O b'lemma:CEL'
2.729762 O b'lemma:rep2'
2.702452 Med b'lemma:m63'
2.699796 O b'lemma:harbor'
2.669950 Supp b'lemma:arginine'
2.661545 O b'-1:lemma:0.3'
2.602799 Gtype b'+1:lemma:with'
2.599935 O b'-1:lemma:\xc2\xb0c'
2.587031 Technique b'lemma:ChIP-Seq'
2.568848 Gtype b'-2:lemma:delta'
2.543114 Gtype b'lemma:nsrr'
2.539452 Substrain b'-2:lemma:substr'
2.534370 O b'postag:DT'
2.513411 Gtype b'lemma:pk4854'
2.512728 O b'lemma:chip'
2.505650 O b'+1:postag:RB'
2.493029 Temp b'-2:lemma:\xcf\x8332'
2.454541 Anti b'+1:lemma:antibody'
2.447213 Anti b'+2:lemma:antibody'
2.436325 Gtype b'-1:lemma:rpob'
2.430821 pH b'lemma:ph5'
2.430821 pH b'+1:lemma:.5'
2.411045 O b'postag:VBG'
2.347885 Supp b'-2:lemma:media'
2.345716 pH b'+1:postag:CD'
2.342949 Technique b'-2:lemma:Fur'
2.340323 O b'-1:lemma:glucose'
2.326562 O b'-1:lemma:lb'
2.318033 Gtype b'+1:lemma:ph5'
2.318033 Gtype b'+2:lemma:.5'
2.312502 Temp b'lemma:43'
2.307908 Supp b'lemma:Leu'
2.307908 Supp b'-2:lemma:Lrp'
2.283291 O b'-1:lemma:media'
2.278223 Gtype b'lemma:\xce\xb4oxyr'
2.277778 Air b'-1:lemma:-'
2.269087 Substrain b'+1:lemma:phtpg'
2.262780 Supp b'-2:lemma:agent'
2.259842 Temp b'-1:lemma:43'
2.259677 O b'lemma:ompr'
2.241905 Supp b'lemma:Adenine'
2.238893 Gtype b'lemma:\xce\xb4ompr'
2.238698 Supp b'+1:lemma:\xc2\xb5m'
2.212658 Med b'+2:lemma:b2'
2.198189 O b'-2:lemma:myc'
2.163443 Phase b'lemma:exponential'
2.163443 Phase b'lemma:stationary'
2.159726 O b'+1:postag:NNP'
2.156490 Phase b'-1:lemma:mid-log'
2.151371 Gversion b'lemma:.2'
2.151371 Gversion b'-1:lemma:u00096'
2.140308 O b'-1:lemma:type'
2.138372 O b'lemma:ml'
2.128755 Med b'+2:postag:CC'
2.123732 Technique b'-1:lemma:chip-exo'
2.118518 Technique b'-1:lemma:IP'
2.117337 Strain b'+1:lemma:substr'
2.117337 Strain b'-2:lemma:str'
2.099392 Agit b'lemma:rpm'
2.096807 Supp b'+1:lemma:2'
2.096794 Strain b'lemma:k-12'
2.091407 O b'+2:lemma:cra'
2.063564 O b'lemma:\xcf\x8332'
2.050789 Supp b'lemma:rifampicin'
2.049024 Gversion b'lemma:nc'
2.033164 O b'+1:lemma:pq'
2.013358 Temp b'-1:lemma:\xcf\x8332'
1.966874 Temp b'+1:lemma:\xc2\xb0c'
1.966521 O b'-1:lemma:anaerobic'
1.957027 O b'lemma:culture'
1.952247 O b'-2:lemma:min'
1.929194 O b'+1:lemma:condition'
1.923035 Supp b'-1:postag:CC'
1.911344 Temp b'lemma:\xc2\xb0c'
1.902035 Gversion b'+2:lemma:000913'
1.892926 Technique b'-1:lemma:input'
1.877608 Supp b'lemma:fructose'
1.876135 O b'+1:lemma:250'
1.868225 O b'lemma:Custom'
1.867543 Air b'lemma:anaeroibc'
1.865515 O b'+2:postag:JJ'
1.861583 Supp b'-1:lemma:+'
1.856696 O b'lemma:s'
1.856578 Supp b'+1:lemma:_'
1.851722 Gversion b'lemma:u00096'
1.851722 Gversion b'+1:lemma:.2'
1.838273 Phase b'lemma:phase'
1.825249 Med b'lemma:broth'
1.825249 Med b'-1:lemma:L'
1.817848 O b'+1:lemma:coli'
1.805545 Med b'+1:lemma:0.4'
1.803317 O b'-1:lemma:into'
1.801603 Agit b'+1:lemma:rpm'
1.799565 Gtype b'-1:postag:VBG'
1.796090 Supp b'+2:lemma:rifampicin'
1.784603 O b'+1:lemma:chip-seq'
1.777806 Med b'lemma:media'
1.769076 O b'lemma:trpr'
1.768678 Gtype b'lemma:ptac'
1.760760 Gtype b'+1:lemma:flagtag'
1.757550 Gversion b'-2:lemma:nc'
1.745784 O b'lemma:soxs'
1.745784 O b'lemma:soxr'
1.739107 O b'-2:lemma:~'
1.736949 Med b'+2:lemma:b1'
1.736832 OD b'+1:lemma:of'
1.735882 O b'postag:NNS'
1.709175 O b'+2:lemma:70'
1.708616 O b'postag:VBD'
1.705680 O b'-1:lemma:aerobically'
1.700488 O b'lemma:argr'
1.696537 Temp b'-1:lemma:sample'
1.694740 O b'+1:lemma:acetate'
1.690209 O b'lemma:affyexp'
1.689269 Med b'lemma:minimal'
1.687607 Agit b'+2:lemma:at'
1.683096 O b'lemma:purr'
1.680453 Gversion b'-2:lemma:build'
1.674723 O b'+2:lemma:fructose'
1.673798 Vess b'lemma:flask'
1.673798 Vess b'-1:lemma:warm'
1.673798 Vess b'-2:lemma:pre'
1.673798 Vess b'+2:lemma:43'
1.671743 O b'-1:lemma:Aerobic'
1.670303 O b'+2:lemma:polyclonal'
1.644434 O b'+1:lemma:wt'
1.641217 Gtype b'+2:lemma:glucose'
1.620167 O b'lemma:genotype/variation'
1.613899 Air b'lemma:aerobically'
1.603327 Temp b'-1:lemma:37'
Top negative:
-0.098715 O b'-1:lemma:g/l'
-0.098977 O b'+2:lemma:0.2'
-0.099326 O b'lemma:m63'
-0.099882 O b'-1:lemma:rifampicin'
-0.106371 O b'-1:lemma:37'
-0.107922 Med b'-1:postag:CD'
-0.110064 O b'-2:lemma:-lrb-'
-0.117142 O b'+1:lemma:of'
-0.123918 Supp b'+2:lemma:.'
-0.123918 Supp b'+2:postag:.'
-0.131710 O b'+1:lemma:culture'
-0.132661 O b'+1:postag:-LRB-'
-0.136103 O b'+1:lemma:95'
-0.137008 Technique b'-1:postag::'
-0.138812 O b'-1:lemma:final'
-0.138847 O b'lemma:7.6'
-0.138847 O b'+1:lemma:;'
-0.139546 Supp b'+1:lemma:-rrb-'
-0.140317 Supp b'+2:lemma:dpd'
-0.142690 O b'postag:RB'
-0.143532 O b'lemma:;'
-0.143532 O b'-1:lemma:7.6'
-0.144217 O b'+2:lemma:reference'
-0.145656 Temp b'-2:postag:NN'
-0.146454 O b'lemma:anaerobic'
-0.147525 Air b'-1:postag:JJ'
-0.149095 O b'-1:lemma:0.2'
-0.151176 O b'-1:lemma:contain'
-0.151476 Air b'+2:postag:IN'
-0.152942 O b'+1:lemma:fecl2'
-0.154443 Med b'postag:CD'
-0.158960 Supp b'-1:lemma:-lrb-'
-0.169851 Med b'-1:postag:NN'
-0.176135 OD b'postag:NN'
-0.178701 Supp b'-1:postag:-LRB-'
-0.180805 O b'lemma:methanol'
-0.180805 O b'-2:lemma:dissolve'
-0.182601 Supp b'+2:lemma:glucose'
-0.185072 Supp b'postag:CC'
-0.189120 O b'+2:lemma:-rrb-'
-0.192885 O b'+1:lemma:dissolve'
-0.199989 O b'-1:lemma:1m'
-0.199989 O b'+2:lemma:7.6'
-0.201669 O b'-2:lemma:nh4cl'
-0.201904 O b'-2:lemma:the'
-0.202079 O b'-1:lemma:of'
-0.202589 Supp b'+1:lemma:acetate'
-0.204371 Supp b'+1:lemma:rifampicin'
-0.207950 Gtype b'-2:postag:CD'
-0.215109 O b'-1:lemma:iptg'
-0.217533 O b'lemma:nitrogen'
-0.221912 O b'lemma:37'
-0.229060 O b'-1:lemma:n2'
-0.236563 Air b'-2:postag:CC'
-0.241693 O b'-2:lemma:IP'
-0.248054 O b'-2:lemma:aerobically'
-0.248583 Med b'+1:postag:NN'
-0.249693 Phase b'+1:postag:NN'
-0.251280 O b'+2:lemma:tag'
-0.260336 O b'+1:lemma:10'
-0.266374 O b'-1:lemma:until'
-0.269589 O b'lemma:ph'
-0.275513 O b'-1:lemma:dfnr'
-0.277525 Supp b'+1:postag:VBN'
-0.277770 O b'-1:lemma:fresh'
-0.287656 O b'+2:lemma:250'
-0.295216 O b'lemma:nh4cl'
-0.296463 O b'-2:lemma:mm'
-0.296923 O b'lemma:\xe2\x88\x86'
-0.301390 O b'-1:postag:IN'
-0.303644 O b'+1:postag:IN'
-0.305259 O b'lemma:fecl2'
-0.310628 Gtype b'-1:postag:NN'
-0.311296 O b'lemma:k-12'
-0.316958 O b'+1:lemma:%'
-0.317571 O b'+2:lemma:.'
-0.317571 O b'+2:postag:.'
-0.317988 Phase b'-2:postag:NN'
-0.321822 O b'lemma:minimal'
-0.322289 O b'-1:lemma:minimal'
-0.322560 O b'-2:lemma:genome'
-0.332944 O b'-2:postag::'
-0.333295 O b'lemma:dissolve'
-0.333295 O b'+2:lemma:methanol'
-0.338620 O b'-2:lemma:anaerobically'
-0.345174 O b'-1:postag::'
-0.363808 Gtype b'-2:lemma:\xe2\x88\x86'
-0.382684 O b'-2:lemma:rpob'
-0.387944 Supp b'+1:postag:-RRB-'
-0.394192 O b'-2:lemma:fresh'
-0.395680 O b'+1:lemma:minimal'
-0.396198 O b'-2:lemma:phase'
-0.398092 O b'-2:lemma:dpd'
-0.402236 Supp b'+2:lemma:-rrb-'
-0.410787 O b'+1:lemma:1m'
-0.410787 O b'-2:lemma:vol'
-0.419351 O b'-1:lemma:cra'
-0.439916 Med b'+1:postag:IN'
-0.447763 Supp b'+2:postag:-RRB-'
-0.451257 O b'-2:lemma:until'
-0.454676 Supp b'-2:lemma:treat'
-0.461514 Supp b'+1:postag:NNS'
-0.473439 O b'+2:lemma:at'
-0.489525 Supp b'-2:postag:NNS'
-0.490480 O b'-2:lemma:a'
-0.491484 Supp b'+1:lemma:,'
-0.491484 Supp b'+1:postag:,'
-0.497187 O b'lemma:aerobically'
-0.503626 Supp b'-1:postag:NNP'
-0.509023 Supp b'lemma:10'
-0.516721 O b'+1:lemma:+'
-0.521588 O b'lemma:nitrate'
-0.522828 O b'-2:lemma::'
-0.525160 O b'+1:lemma:g/l'
-0.528490 O b'+1:lemma:supplement'
-0.536875 Anti b'+2:postag:JJ'
-0.541770 O b'-1:lemma:ml'
-0.549347 O b'+1:lemma:mm'
-0.554922 O b'-1:lemma:rpob'
-0.555832 O b'-1:lemma:dissolve'
-0.555832 O b'+1:lemma:methanol'
-0.556682 O b'+1:postag:VBG'
-0.560238 O b'-1:lemma:co2'
-0.588187 O b'+2:lemma:a'
-0.589107 Anti b'+1:lemma:anti-fur'
-0.592147 Med b'-2:postag:VBN'
-0.594320 O b'-2:lemma:2'
-0.596090 O b'+2:lemma:add'
-0.599346 O b'-2:lemma:supplement'
-0.602638 pH b'postag:NN'
-0.603217 O b'-2:lemma:glucose'
-0.605916 O b'lemma:mid-log'
-0.609002 O b'+2:lemma:10'
-0.610140 O b'-1:lemma:mm'
-0.614692 Supp b'+1:lemma:-lrb-'
-0.622267 Med b'+2:postag:VBN'
-0.631151 O b'-1:lemma:vol'
-0.631151 O b'-2:lemma:1/100'
-0.631151 O b'+2:lemma:1m'
-0.635308 O b'-2:lemma:media'
-0.643298 Supp b'+1:postag:-LRB-'
-0.667302 O b'-1:lemma:grow'
-0.675604 O b'lemma:anaerobically'
-0.678285 O b'postag:VBP'
-0.712921 O b'-2:postag:DT'
-0.743302 O b'-2:postag:RB'
-0.797815 O b'-1:lemma:2'
-0.803294 Supp b'-2:postag:JJ'
-0.804094 O b'lemma:wt'
-0.827192 O b'+2:lemma:+'
-0.827898 O b'+1:lemma:until'
-0.831996 O b'+2:lemma:mid-log'
-0.837044 O b'lemma:2h'
-0.837044 O b'-1:lemma:additional'
-0.853094 O b'lemma:of'
-0.881861 O b'lemma:aerobic'
-0.919135 O b'-1:lemma:30'
-0.952995 Air b'postag:NN'
-0.973697 O b'+2:postag:-RRB-'
-0.983024 Air b'+1:postag:JJ'
-1.013906 O b'+2:lemma:fnr'
-1.021974 O b'-2:postag:SYM'
-1.037571 O b'+2:lemma:then'
-1.048639 Med b'-2:lemma:grow'
-1.054249 O b'lemma:media'
-1.064365 Temp b'postag:NN'
-1.088097 O b'-1:lemma:nsrr'
-1.141026 Phase b'-1:postag:JJ'
-1.155260 O b'+2:lemma:b'
-1.165858 O b'-2:lemma:0.3'
-1.175657 O b'-1:lemma:ph'
-1.186408 O b'lemma:\xce\xb4fur'
-1.229030 O b'+1:lemma:in'
-1.249314 O b'+1:lemma:at'
-1.257039 O b'lemma:rifampicin'
-1.261781 Gtype b'+2:lemma:cra'
-1.267291 O b'+1:lemma:2.0'
-1.293131 O b'-2:lemma:rifampicin'
-1.297426 O b'-1:lemma:sample'
-1.317920 Phase b'postag:JJ'
-1.321832 O b'-1:lemma:1'
-1.346398 OD b'+2:lemma:aerobically'
-1.353123 Supp b'+2:lemma:2'
-1.353839 OD b'+1:postag:NN'
-1.379126 Anti b'+2:lemma:polyclonal'
-1.433804 O b'+2:lemma:rifampicin'
-1.437034 Supp b'+2:postag:CD'
-1.439161 Supp b'+2:lemma:1'
-1.489609 Supp b'+2:lemma:fructose'
-1.512780 O b'lemma:30'
-1.553364 Supp b'postag:JJ'
-1.601724 O b'-1:postag:VBG'
-1.789724 O b'-1:lemma:IP'
-1.863826 Gtype b'postag:VBG'
-2.000634 Anti b'postag:NNP'
-2.029905 O b'lemma:0.3'
-2.144948 O b'+1:lemma:1'
-2.735063 O b'+1:lemma:2'
-3.945319 O b'-1:lemma::'
-4.420879 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.16617627893415826, 'c2': 0.016246283722594547}
best CV score:0.8677299702871124
model size: 0.09M
Flat F1: 0.8100263815531699
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 1.000 0.913 0.955 23
Med 1.000 0.943 0.971 53
Temp 0.923 0.828 0.873 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.882 1.000 0.938 15
Air 0.565 0.377 0.452 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.889 0.847 0.867 85
Substrain 0.000 0.000 0.000 0
Supp 0.811 0.799 0.805 134
Gversion 0.000 0.000 0.000 0
avg / total 0.845 0.783 0.810 451
Top likely transitions:
Agit -> Agit 6.716949
Temp -> Temp 6.219197
OD -> OD 5.656758
Med -> Med 5.510885
Supp -> Supp 5.091598
Anti -> Anti 4.770668
Gversion -> Gversion 4.530576
Phase -> Phase 4.343162
Air -> Air 4.176421
Gtype -> Gtype 3.872949
O -> O 3.576273
Technique -> Technique 3.464421
pH -> pH 3.115258
O -> Technique 0.821082
Substrain -> Gtype 0.704368
O -> Supp 0.589038
Air -> O 0.511427
Gtype -> Supp 0.510592
Gtype -> Air 0.474170
Temp -> O 0.189513
O -> Gtype 0.176860
Supp -> O 0.145242
Technique -> Air 0.118103
Med -> O 0.093609
O -> Anti 0.090099
Phase -> O 0.002296
O -> Temp 0.002073
O -> Phase 0.000043
O -> Med -0.015745
Gtype -> Med -0.042610
Supp -> Med -0.045443
O -> OD -0.113818
Air -> Supp -0.115743
Technique -> pH -0.124518
OD -> Air -0.194892
Gversion -> O -0.196158
Gtype -> Technique -0.579025
Technique -> O -0.829124
O -> Air -0.902346
Gtype -> O -0.925779
Med -> Supp -1.463243
Substrain -> O -1.475265
Top unlikely transitions:
Agit -> Agit 6.716949
Temp -> Temp 6.219197
OD -> OD 5.656758
Med -> Med 5.510885
Supp -> Supp 5.091598
Anti -> Anti 4.770668
Gversion -> Gversion 4.530576
Phase -> Phase 4.343162
Air -> Air 4.176421
Gtype -> Gtype 3.872949
O -> O 3.576273
Technique -> Technique 3.464421
pH -> pH 3.115258
O -> Technique 0.821082
Substrain -> Gtype 0.704368
O -> Supp 0.589038
Air -> O 0.511427
Gtype -> Supp 0.510592
Gtype -> Air 0.474170
Temp -> O 0.189513
O -> Gtype 0.176860
Supp -> O 0.145242
Technique -> Air 0.118103
Med -> O 0.093609
O -> Anti 0.090099
Phase -> O 0.002296
O -> Temp 0.002073
O -> Phase 0.000043
O -> Med -0.015745
Gtype -> Med -0.042610
Supp -> Med -0.045443
O -> OD -0.113818
Air -> Supp -0.115743
Technique -> pH -0.124518
OD -> Air -0.194892
Gversion -> O -0.196158
Gtype -> Technique -0.579025
Technique -> O -0.829124
O -> Air -0.902346
Gtype -> O -0.925779
Med -> Supp -1.463243
Substrain -> O -1.475265
Top positive:
6.433973 Technique b'lemma[:2]:Ch'
6.360815 O b'lemma:2'
5.662452 Anti b'-2:lemma:antibody'
5.635583 O b'lemma:1'
5.375548 O b'-2:lemma:_'
5.334062 O b'lemma:3'
4.328011 Gtype b'lemma[:1]:\xce\xb4'
4.274267 O b'lemma[:2]:re'
4.227305 Phase b'lemma:mid-log'
4.196090 Air b'lemma[:2]:ae'
3.698810 Gtype b'-1:lemma:\xe2\x88\x86'
3.689545 Substrain b'lemma[:2]:mg'
3.405547 Air b'lemma:anaerobic'
3.375450 Technique b'lemma:chipseq'
3.355381 O b'-2:lemma:medium'
3.263554 O b'lemma:with'
3.228684 O b'lemma:b'
3.212301 Technique b'lemma[:2]:rn'
3.141015 Supp b'lemma:acetate'
3.091356 Phase b'-2:lemma:phase'
3.034572 Gtype b'lemma:type'
3.034572 Gtype b'lemma[:2]:ty'
3.019205 O b'-1:lemma:tag'
2.976419 Air b'lemma[:2]:an'
2.958977 O b'+2:lemma:\xc2\xb0c'
2.937672 O b'lemma[:2]:ge'
2.921253 O b'lemma[:1]:h'
2.903272 Gtype b'-2:lemma:genotype/variation'
2.893999 Supp b'lemma:arginine'
2.891850 O b'lemma:-'
2.870735 Supp b'-1:lemma:with'
2.863673 Gtype b'+1:lemma:type'
2.803093 Supp b'lemma:pq'
2.803093 Supp b'lemma[:2]:pq'
2.800209 O b'lemma:a'
2.752078 Med b'+2:postag:CC'
2.708393 Gtype b'lemma[:2]:pk'
2.703014 Supp b'-1:lemma:Cra'
2.629891 O b'lemma:_'
2.629891 O b'lemma[:1]:_'
2.596183 Supp b'lemma:Iron'
2.596183 Supp b'lemma[:2]:Ir'
2.596183 Supp b'-2:lemma:Anaerobic'
2.569027 O b'+2:lemma:70'
2.544018 Supp b'+2:lemma:iptg'
2.496912 OD b'lemma[:1]:o'
2.467329 Supp b'+1:lemma:\xc2\xb5m'
2.456900 O b'lemma:.'
2.456900 O b'postag:.'
2.456900 O b'postag[:1]:.'
2.453094 O b'+1:lemma:pq'
2.442390 O b'-1:lemma:ChIP-exo'
2.433484 Technique b'lemma[:2]:ch'
2.399879 Anti b'+2:lemma:antibody'
2.363621 Air b'lemma:Aerobic'
2.363621 Air b'lemma[:2]:Ae'
2.358981 Gtype b'-2:lemma:delta'
2.342573 O b'-2:lemma:myc'
2.331321 Supp b'lemma:no3'
2.313202 Supp b'lemma:rifampicin'
2.312753 Anti b'+1:lemma:antibody'
2.297741 Supp b'lemma[:2]:ri'
2.281621 Air b'-2:lemma:IP'
2.278366 Med b'lemma:MOPS'
2.278366 Med b'lemma[:1]:M'
2.278366 Med b'lemma[:2]:MO'
2.270655 OD b'lemma[:2]:od'
2.263438 Gversion b'lemma:chip-seq'
2.231180 O b'-1:lemma:glucose'
2.206950 Gtype b'lemma:wt'
2.206950 Gtype b'lemma[:2]:wt'
2.187730 Supp b'lemma[:2]:gl'
2.186178 pH b'+1:postag:CD'
2.183027 Technique b'-1:lemma:input'
2.179683 O b'lemma[:1]:-'
2.172484 Air b'-1:lemma:ChIP-Seq'
2.160518 Phase b'lemma:stationary'
2.150044 Phase b'lemma[:2]:ex'
2.145423 O b'postag:IN'
2.145423 O b'postag[:1]:I'
2.145423 O b'postag[:2]:IN'
2.135783 O b'-1:lemma:lb'
2.115167 O b'-1:lemma:anaerobic'
2.106797 O b'lemma:rpob'
2.094643 O b'+2:lemma:cra'
2.046836 Gtype b'-2:postag:DT'
2.035645 Gtype b'lemma:nsrr'
2.035645 Gtype b'lemma[:2]:ns'
2.034890 Technique b'lemma[:1]:C'
2.033641 O b'+1:postag:RB'
2.013660 Supp b'+1:lemma:1'
2.011345 Supp b'lemma:fructose'
2.002182 Supp b'-2:lemma:media'
1.998148 Gtype b'hGreek'
1.992371 Temp b'-1:lemma:\xcf\x8332'
1.980657 O b'postag::'
1.980657 O b'postag[:1]::'
1.979866 Supp b'-1:lemma:final'
1.974647 Med b'+2:lemma:b2'
1.964961 Temp b'-1:lemma:sample'
1.959365 Strain b'+1:lemma:substr'
1.959365 Strain b'-2:lemma:str'
1.931532 Temp b'-2:lemma:\xcf\x8332'
1.926204 pH b'lemma[:2]:ph'
1.924918 Supp b'-2:lemma:agent'
1.911949 Supp b'lemma[:1]:n'
1.895649 Supp b'+2:lemma:rifampicin'
1.895108 Temp b'+2:postag:DT'
1.894074 Air b'-1:lemma:-'
1.892359 Gtype b'lemma:\xe2\x88\x86'
1.892359 Gtype b'lemma[:1]:\xe2\x88\x86'
1.886561 O b'-1:lemma:media'
1.878120 Gtype b'lemma[:1]:w'
1.849654 Air b'-1:lemma:co2'
1.819415 Supp b'-1:lemma:+'
1.819234 Med b'lemma:L'
1.819234 Med b'+1:lemma:broth'
1.811008 Anti b'lemma[:2]:an'
1.807476 O b'-1:lemma:0.3'
1.795731 O b'-1:postag:NNS'
1.794762 O b'postag:CC'
1.794762 O b'postag[:2]:CC'
1.793009 Gtype b'-1:postag:VBG'
1.774172 O b'lemma[:1]:C'
1.769427 Temp b'lemma[:1]:3'
1.750065 Med b'lemma:broth'
1.750065 Med b'-1:lemma:L'
1.750065 Med b'lemma[:2]:br'
1.731860 Air b'lemma[:1]:A'
1.729863 Supp b'-1:postag:CC'
1.711219 Gtype b'+1:lemma:with'
1.710940 O b'+1:postag:NNP'
1.706914 O b'+1:lemma:coli'
1.705143 Phase b'lemma[:1]:e'
1.701557 O b'lemma:chip'
1.688302 Gtype b'lemma[:2]:ar'
1.687398 Gtype b'-1:lemma:_'
1.679707 Supp b'-2:lemma:induce'
1.674594 O b'+1:lemma:250'
1.667872 O b'-2:lemma:ChIP-Seq'
1.653279 Gtype b'+1:lemma::'
1.627724 O b'lemma:A'
1.622722 Supp b'lemma[:2]:ac'
1.617166 O b'postag:VBN'
1.614130 Substrain b'lemma[:1]:m'
1.610355 Gtype b'-2:lemma:genotype'
1.606189 O b'+2:postag:JJ'
1.605995 O b'lemma:0.4'
1.600992 Substrain b'+1:lemma:phtpg'
1.591094 O b'-1:lemma:Aerobic'
1.589843 Supp b'-1:lemma:_'
1.579672 Air b'+1:postag:IN'
1.566124 Temp b'lemma:43'
1.566124 Temp b'lemma[:2]:43'
1.563526 O b'-1:lemma:type'
1.558846 Supp b'lemma[:2]:30'
1.552158 O b'-1:lemma:aerobically'
1.551882 Gversion b'lemma:nc'
1.551882 Gversion b'lemma[:2]:nc'
1.536623 Supp b'+1:lemma:_'
1.536256 O b'-1:lemma:\xc2\xb0c'
1.524621 Air b'lemma[:1]:a'
1.511586 Temp b'-1:lemma:43'
1.506306 O b'lemma[:1]:b'
1.506103 Gtype b'lemma[:2]:wi'
1.498210 Supp b'+1:lemma:2'
1.491309 Gtype b'+1:lemma:flagtag'
1.490390 Med b'+1:lemma:0.4'
1.485550 O b'+2:lemma:fructose'
1.479904 Supp b'lemma:Leu'
1.479904 Supp b'lemma[:2]:Le'
1.479904 Supp b'-2:lemma:Lrp'
1.475818 Supp b'lemma:glucose'
1.472393 Gtype b'lemma[:2]:fl'
1.471237 Gtype b'-1:lemma:rpob'
1.464668 Med b'lemma:lb'
1.464668 Med b'lemma[:2]:lb'
1.462458 O b'+1:lemma:mid-log'
1.460719 Med b'+2:lemma:b1'
1.459376 O b'lemma[:2]:ha'
1.450388 Supp b'lemma[:1]:I'
1.442333 Gtype b'-1:lemma:vector'
1.442007 Med b'+1:lemma:g/l'
1.435558 Supp b'lemma[:2]:ni'
1.434864 O b'lemma:ml'
1.434864 O b'lemma[:2]:ml'
1.431207 Technique b'symb'
1.424801 O b'lemma[:1]:r'
1.421918 Supp b'+2:lemma:mid-log'
1.413708 O b'lemma[:1]:s'
1.383477 O b'-2:postag:FW'
1.383257 O b'-1:lemma:into'
1.382919 O b'lemma[:2]:ga'
1.354991 O b'lemma[:1]:c'
1.346926 Med b'+1:lemma:minimal'
1.336028 Gtype b'lemma[:1]:f'
1.335008 O b'+1:postag:VBN'
1.329569 pH b'lemma:ph5'
1.329569 pH b'+1:lemma:.5'
1.326705 Gtype b'-2:lemma:affyexp'
Top negative:
-0.155596 Supp b'-1:lemma:10'
-0.157470 O b'+2:lemma:ph'
-0.158631 Supp b'lemma[:1]:s'
-0.159053 O b'-1:lemma:from'
-0.160270 Supp b'-1:lemma:-lrb-'
-0.160524 Gtype b'+2:postag:CD'
-0.161112 Air b'postag[:1]:N'
-0.161112 Air b'postag[:2]:NN'
-0.161768 pH b'postag:NN'
-0.161818 OD b'symb'
-0.167771 Supp b'-2:lemma:treat'
-0.176292 O b'lemma:co2'
-0.177197 O b'-1:lemma:g/l'
-0.181198 O b'-1:lemma:1m'
-0.181198 O b'+2:lemma:7.6'
-0.186508 O b'-2:lemma:genome'
-0.193058 Supp b'+2:lemma:-rrb-'
-0.194306 Gtype b'lemma[:1]:h'
-0.194450 Supp b'-1:postag:-LRB-'
-0.195692 Anti b'symb'
-0.197135 O b'-2:lemma:of'
-0.201423 O b'-2:lemma:-lrb-'
-0.201718 O b'-1:lemma:0.2'
-0.207096 Temp b'postag[:1]:N'
-0.207096 Temp b'postag[:2]:NN'
-0.208637 Temp b'-2:postag:NN'
-0.210816 Technique b'-2:postag:NN'
-0.212139 O b'lemma:in'
-0.215720 Supp b'+2:postag:-RRB-'
-0.227268 O b'+2:lemma:-rrb-'
-0.229192 O b'lemma:anaerobic'
-0.229556 O b'+1:lemma:.'
-0.229556 O b'+1:postag:.'
-0.229949 Vess b'hUpper'
-0.229949 Vess b'hLower'
-0.232026 O b'lemma[:1]:n'
-0.239837 O b'lemma:30'
-0.244559 O b'lemma:anaerobically'
-0.245601 Phase b'+1:postag:NN'
-0.249673 O b'-2:lemma:aerobically'
-0.251037 O b'-2:lemma:a'
-0.251989 O b'lemma[:1]:d'
-0.252202 Supp b'lemma[:1]:a'
-0.253068 O b'-2:postag:-LRB-'
-0.258564 O b'+1:lemma:mm'
-0.260363 O b'-1:lemma:fresh'
-0.261060 O b'-2:lemma:anaerobically'
-0.262340 Air b'-1:lemma:or'
-0.263528 pH b'postag[:1]:N'
-0.263528 pH b'postag[:2]:NN'
-0.265074 O b'-2:postag::'
-0.265217 O b'-2:lemma:IP'
-0.269304 Med b'+1:postag:NN'
-0.285401 O b'-2:lemma:rpob'
-0.295781 Supp b'lemma[:2]:an'
-0.296045 O b'lemma[:2]:ar'
-0.296575 O b'-2:lemma:fresh'
-0.301913 O b'lemma:media'
-0.307515 O b'lemma[:2]:gl'
-0.308163 O b'-1:postag:IN'
-0.311903 O b'-1:lemma:mm'
-0.312159 O b'-2:lemma:nh4cl'
-0.322974 Anti b'+2:postag:JJ'
-0.325164 Supp b'-2:postag:JJ'
-0.328654 Supp b'-1:postag:NNP'
-0.332535 O b'-1:lemma:of'
-0.341258 O b'lemma:methanol'
-0.341258 O b'-2:lemma:dissolve'
-0.346421 Supp b'+2:lemma:glucose'
-0.353210 Technique b'-1:postag::'
-0.357081 O b'+1:lemma:supplement'
-0.362827 O b'lemma:2h'
-0.362827 O b'-1:lemma:additional'
-0.362827 O b'lemma[:2]:2h'
-0.373258 O b'-1:lemma:final'
-0.373355 O b'+2:lemma:at'
-0.376466 Supp b'+1:postag:NNS'
-0.379434 O b'-1:lemma:IP'
-0.379949 O b'lemma[:1]:0'
-0.380927 O b'-2:lemma:phase'
-0.384075 O b'lemma:glucose'
-0.384929 Med b'-1:postag:NN'
-0.387171 O b'lemma[:1]:k'
-0.388425 O b'lemma:\xe2\x88\x86'
-0.388425 O b'lemma[:1]:\xe2\x88\x86'
-0.390156 Temp b'postag:NN'
-0.390965 O b'+1:lemma:g/l'
-0.410576 Med b'-1:postag:CD'
-0.411791 O b'+1:lemma:1m'
-0.411791 O b'-2:lemma:vol'
-0.418447 Gtype b'+1:lemma:-rrb-'
-0.418498 O b'-1:lemma:rpob'
-0.422355 O b'-2:lemma:until'
-0.434852 Technique b'postag:NN'
-0.435566 Supp b'postag:JJ'
-0.439076 Supp b'postag[:1]:J'
-0.439076 Supp b'postag[:2]:JJ'
-0.442452 O b'-1:lemma:30'
-0.455337 Med b'+2:postag:VBN'
-0.455366 Air b'postag:NN'
-0.461874 O b'+2:lemma:fnr'
-0.467110 O b'-2:lemma:supplement'
-0.468087 O b'-1:lemma:dissolve'
-0.468087 O b'+1:lemma:methanol'
-0.486893 Gtype b'-2:lemma:\xe2\x88\x86'
-0.493653 O b'lemma[:1]:L'
-0.497715 O b'+2:lemma:10'
-0.498822 O b'lemma:of'
-0.498822 O b'lemma[:2]:of'
-0.507436 O b'lemma[:2]:0.'
-0.510139 Med b'+1:postag:IN'
-0.513295 O b'+2:lemma:tag'
-0.514818 O b'+1:lemma:+'
-0.532828 O b'-1:lemma:\xe2\x88\x86'
-0.536622 O b'+2:lemma:mid-log'
-0.538561 Supp b'+1:lemma:-lrb-'
-0.543996 O b'+2:lemma:.'
-0.543996 O b'+2:postag:.'
-0.557797 Supp b'+1:postag:-LRB-'
-0.568152 O b'+2:lemma:a'
-0.585265 O b'-2:lemma:pahse'
-0.590510 O b'-2:lemma:glucose'
-0.590875 O b'+1:lemma:until'
-0.605869 O b'-1:lemma:nsrr'
-0.609882 O b'-2:lemma:dpd'
-0.616188 Phase b'-1:postag:JJ'
-0.618132 Air b'-1:postag:JJ'
-0.624517 O b'-2:postag:SYM'
-0.626955 Gtype b'lemma[:1]:c'
-0.627961 Anti b'+2:lemma:polyclonal'
-0.635434 Med b'symb'
-0.635949 Phase b'postag[:1]:J'
-0.635949 Phase b'postag[:2]:JJ'
-0.636258 OD b'hUpper'
-0.636258 OD b'hLower'
-0.636750 O b'lemma[:2]:ri'
-0.647062 O b'+1:postag:IN'
-0.654730 O b'-2:lemma:2'
-0.655657 Phase b'postag:JJ'
-0.655964 Supp b'symb'
-0.660356 O b'+2:lemma:+'
-0.661231 O b'+1:lemma:2.0'
-0.662145 O b'-2:lemma:media'
-0.666872 O b'+2:lemma:b'
-0.675694 Anti b'+1:lemma:anti-fur'
-0.688911 O b'-1:lemma:co2'
-0.690102 Gtype b'postag[:1]:V'
-0.690102 Gtype b'postag[:2]:VB'
-0.694375 Gtype b'lemma[:1]:r'
-0.706144 O b'lemma[:2]:me'
-0.718038 O b'lemma:mid-log'
-0.724228 O b'lemma:rifampicin'
-0.735529 O b'-2:lemma::'
-0.743194 Med b'-2:postag:VBN'
-0.786989 Supp b'+1:lemma:,'
-0.786989 Supp b'+1:postag:,'
-0.789461 O b'-2:postag:RB'
-0.815790 Agit b'hUpper'
-0.815790 Agit b'hLower'
-0.824637 O b'+2:postag:-RRB-'
-0.828411 O b'-1:lemma:cra'
-0.834312 O b'+1:lemma:at'
-0.864833 O b'-1:lemma:vol'
-0.864833 O b'-2:lemma:1/100'
-0.864833 O b'+2:lemma:1m'
-0.896137 O b'-2:postag:DT'
-0.903408 Phase b'hUpper'
-0.903408 Phase b'hLower'
-0.954592 Gtype b'lemma[:1]:a'
-0.958219 Gtype b'+2:lemma:cra'
-0.959188 O b'-1:lemma:ph'
-1.006021 O b'-2:lemma:0.3'
-1.010964 O b'lemma[:2]:ae'
-1.026041 Med b'-2:lemma:grow'
-1.037767 O b'-1:postag::'
-1.062839 O b'-1:lemma:sample'
-1.105188 Supp b'lemma[:1]:c'
-1.107601 OD b'+1:postag:NN'
-1.109127 Agit b'symb'
-1.130040 Supp b'+2:postag:CD'
-1.149221 O b'lemma:0.3'
-1.151210 O b'-1:lemma:1'
-1.172944 O b'lemma[:2]:30'
-1.211332 O b'+2:lemma:then'
-1.263528 Supp b'+2:lemma:fructose'
-1.346921 O b'+2:lemma:rifampicin'
-1.394447 O b'postag:VBP'
-1.459260 Supp b'+2:lemma:1'
-1.504864 Anti b'postag:NNP'
-1.616873 O b'-1:lemma:2'
-1.627873 Supp b'+2:lemma:2'
-1.790611 O b'-1:postag:VBG'
-1.925758 O b'-2:lemma:rifampicin'
-2.014829 O b'+1:lemma:in'
-2.028945 OD b'+2:lemma:aerobically'
-2.287808 O b'+1:lemma:1'
-2.798978 O b'+1:lemma:2'
-2.902688 O b'lemma[:2]:fl'
-3.487913 O b'-1:lemma::'
-4.880856 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.07838162718183349, 'c2': 0.05887606797757319}
best CV score:0.87030448615518
model size: 0.16M
Flat F1: 0.7823412170507693
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 1.000 0.913 0.955 23
Med 1.000 0.925 0.961 53
Temp 0.917 0.759 0.830 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.875 0.933 0.903 15
Air 0.556 0.362 0.439 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.849 0.859 0.854 85
Substrain 0.000 0.000 0.000 0
Supp 0.686 0.813 0.744 134
Gversion 0.000 0.000 0.000 0
avg / total 0.798 0.778 0.782 451
Top likely transitions:
Temp -> Temp 4.911417
OD -> OD 4.541853
Med -> Med 4.405893
Supp -> Supp 4.305413
Agit -> Agit 4.272648
Anti -> Anti 3.837258
O -> O 3.787660
Gtype -> Gtype 3.752687
Gversion -> Gversion 3.628661
Air -> Air 3.521236
Phase -> Phase 3.320009
Technique -> Technique 2.966375
pH -> pH 2.362986
Substrain -> Gtype 1.121355
O -> Technique 0.896285
Gtype -> Supp 0.860650
O -> Supp 0.689540
O -> Gtype 0.595827
Gtype -> Air 0.530941
Technique -> Air 0.501834
Air -> O 0.490782
Gtype -> pH 0.392258
Temp -> O 0.195983
Med -> O 0.164949
Supp -> O 0.068384
O -> Temp 0.032340
O -> pH 0.007653
O -> Strain -0.003076
Gversion -> Gtype -0.013131
Gversion -> Air -0.016755
Supp -> Anti -0.028664
Vess -> O -0.036195
Agit -> Supp -0.042183
Gtype -> Phase -0.047540
Supp -> Technique -0.060640
Supp -> pH -0.062714
O -> Phase -0.068478
Technique -> OD -0.069783
Supp -> OD -0.073453
O -> Agit -0.079631
Phase -> Air -0.079705
Anti -> Med -0.089699
Anti -> Gtype -0.095119
Gtype -> Gversion -0.100232
Temp -> Air -0.116744
OD -> O -0.129801
Phase -> Med -0.135688
Air -> Agit -0.138019
Phase -> O -0.139580
pH -> Supp -0.141124
Top unlikely transitions:
Supp -> Technique -0.060640
Supp -> pH -0.062714
O -> Phase -0.068478
Technique -> OD -0.069783
Supp -> OD -0.073453
O -> Agit -0.079631
Phase -> Air -0.079705
Anti -> Med -0.089699
Anti -> Gtype -0.095119
Gtype -> Gversion -0.100232
Temp -> Air -0.116744
OD -> O -0.129801
Phase -> Med -0.135688
Air -> Agit -0.138019
Phase -> O -0.139580
pH -> Supp -0.141124
Air -> Temp -0.146478
OD -> Supp -0.177929
Gversion -> O -0.195235
Med -> Air -0.218392
Technique -> Supp -0.218928
O -> OD -0.220859
Anti -> O -0.227867
Anti -> Supp -0.241653
Temp -> Med -0.265069
Air -> Gtype -0.270959
Gversion -> Supp -0.271472
Phase -> OD -0.278093
O -> Med -0.292957
Supp -> Phase -0.310262
Agit -> O -0.315551
Technique -> O -0.317964
Agit -> Air -0.327189
Air -> Phase -0.332685
Supp -> Gtype -0.386391
Gtype -> Technique -0.394676
Supp -> Air -0.403362
Gtype -> Anti -0.428993
Phase -> Supp -0.440098
Air -> Supp -0.449326
Technique -> pH -0.462944
Air -> Med -0.469469
OD -> Air -0.509283
Gtype -> Med -0.621751
Supp -> Med -0.659958
Gtype -> O -0.671881
O -> Air -0.712510
Technique -> Gtype -1.006943
Substrain -> O -1.077790
Med -> Supp -1.486790
Top positive:
4.124249 Anti b'-2:lemma:antibody'
3.641687 Air b'word:Aerobic'
3.497811 O b'-2:lemma:_'
3.258366 Air b'lemma:anaerobic'
3.145552 O b'lemma:_'
3.145552 O b'word:_'
3.024671 Gtype b'-2:lemma:genotype/variation'
2.928771 Air b'postag:RB'
2.783339 Technique b'word:ChIP-Seq'
2.697083 O b'lemma:2'
2.697083 O b'word:2'
2.670728 Phase b'-2:lemma:phase'
2.637125 O b'postag::'
2.618748 O b'word:Cra'
2.610104 Technique b'word:ChIP-exo'
2.571728 O b'lemma:1'
2.571728 O b'word:1'
2.511177 O b'postag:IN'
2.472685 Gtype b'-2:lemma:genotype'
2.396386 Supp b'lemma:pq'
2.396386 Supp b'word:PQ'
2.337247 O b'lemma:.'
2.337247 O b'postag:.'
2.337247 O b'word:.'
2.332296 Technique b'lemma:rna-seq'
2.320371 Gtype b'word:WT'
2.309222 Gtype b'lemma:wt'
2.274976 O b'-2:lemma:flagtag'
2.273949 Technique b'word:ChIPSeq'
2.258997 Technique b'lemma:ChIP-exo'
2.199264 Air b'word:Anaerobic'
2.142635 O b'-1:lemma:ChIP-exo'
2.096906 Supp b'lemma:nh4cl'
2.072818 Phase b'lemma:mid-log'
2.072818 Phase b'word:mid-log'
2.053563 Gtype b'lemma:wild-type'
2.044128 O b'lemma:rpob'
2.044128 O b'word:RpoB'
2.020270 O b'postag:VBN'
2.020107 O b'lemma:3'
2.020107 O b'word:3'
2.011865 Supp b'-1:word:Cra'
1.983566 Gtype b'lemma:type'
1.983566 Gtype b'word:type'
1.975796 O b'lemma:-'
1.975796 O b'word:-'
1.959537 O b'-2:lemma:medium'
1.955236 Gtype b'-2:lemma:affyexp'
1.942868 Gtype b'lemma:\xce\xb4cra'
1.939375 Gtype b'word:\xce\x94cra'
1.938173 Technique b'lemma:chipseq'
1.890561 Supp b'lemma:Iron'
1.890561 Supp b'word:Iron'
1.890561 Supp b'+1:word:Deficient'
1.890561 Supp b'-2:lemma:Anaerobic'
1.874088 Supp b'lemma:acetate'
1.874088 Supp b'word:acetate'
1.819674 pH b'+1:postag:CD'
1.788257 O b'lemma:b'
1.788257 O b'word:B'
1.782368 O b'word:A'
1.774988 O b'+2:lemma:\xc2\xb0c'
1.768402 O b'+2:postag:JJ'
1.762269 Gtype b'lemma:\xe2\x88\x86'
1.762269 Gtype b'word:\xe2\x88\x86'
1.746520 Med b'lemma:MOPS'
1.746520 Med b'word:MOPS'
1.723641 Supp b'+2:lemma:iptg'
1.722034 Air b'lemma:Aerobic'
1.705697 O b'-1:word:Aerobic'
1.703316 Anti b'+2:lemma:antibody'
1.702891 O b'postag:CC'
1.699990 Supp b'-1:lemma:with'
1.699990 Supp b'-1:word:with'
1.688533 Supp b'lemma:glucose'
1.688533 Supp b'word:glucose'
1.674465 Supp b'lemma:no3'
1.674465 Supp b'word:NO3'
1.666479 Supp b'lemma:nacl'
1.666479 Supp b'word:NaCl'
1.641938 Supp b'lemma:arginine'
1.638237 Substrain b'lemma:mg1655'
1.638237 Substrain b'word:MG1655'
1.637725 Supp b'+1:lemma:\xc2\xb5m'
1.637725 Supp b'+1:word:\xc2\xb5M'
1.636903 Med b'isUpper'
1.611838 O b'+1:postag:NNP'
1.611343 O b'lemma:a'
1.607795 Technique b'+2:lemma:ph5'
1.602510 Air b'word:anaerobic'
1.599293 O b'+1:postag:RB'
1.590988 Gtype b'-1:lemma:\xe2\x88\x86'
1.590988 Gtype b'-1:word:\xe2\x88\x86'
1.589631 Gtype b'-2:postag:DT'
1.583352 Med b'+2:postag:CC'
1.579250 O b'isLower'
1.578231 Gtype b'+2:lemma:glucose'
1.552685 Gtype b'+1:lemma:type'
1.552685 Gtype b'+1:word:type'
1.528892 Substrain b'-2:lemma:substr'
1.515887 Supp b'lemma:nitrate'
1.515887 Supp b'word:nitrate'
1.503178 O b'-1:lemma:tag'
1.473485 Gtype b'lemma:\xce\xb4soxr'
1.473485 Gtype b'word:\xce\x94soxR'
1.456677 O b'-1:word:tag'
1.455346 Gtype b'lemma:flag-tag'
1.455346 Gtype b'-1:lemma:c-terminal'
1.455346 Gtype b'word:Flag-tag'
1.455346 Gtype b'-1:word:C-terminal'
1.451462 Technique b'word:RNA-Seq'
1.434331 Supp b'-1:lemma:Cra'
1.432730 O b'+1:word:ChIP-Seq'
1.426572 O b'-1:lemma:anaerobic'
1.425108 Gtype b'lemma:\xce\xb4fur'
1.425108 Gtype b'word:\xce\x94fur'
1.425025 Technique b'lemma:rnaseq'
1.425025 Technique b'word:RNASeq'
1.416012 O b'postag:VBG'
1.415750 Gversion b'-2:lemma:nc'
1.408974 Gversion b'lemma:chip-seq'
1.379843 O b'+1:lemma:arca-8myc'
1.379843 O b'+1:word:ArcA-8myc'
1.374672 O b'lemma:with'
1.374672 O b'word:with'
1.374577 O b'lemma:Cra'
1.373182 Gversion b'word:ChIP-Seq'
1.372454 Gtype b'-2:lemma:delta'
1.369100 O b'+2:lemma:cra'
1.367463 O b'-1:lemma:0.3'
1.367463 O b'-1:word:0.3'
1.366576 O b'-2:lemma:myc'
1.365144 O b'lemma:ompr'
1.365144 O b'word:OmpR'
1.358116 Supp b'-1:postag:CC'
1.351356 Gtype b'postag:JJ'
1.340661 Strain b'+1:lemma:substr'
1.340661 Strain b'+1:word:substr'
1.340661 Strain b'-2:lemma:str'
1.337333 Gtype b'+1:lemma:with'
1.337333 Gtype b'+1:word:with'
1.331237 Supp b'-2:lemma:agent'
1.319017 Air b'+1:postag:IN'
1.313717 O b'lemma:harbor'
1.313717 O b'word:harboring'
1.311281 Technique b'-1:lemma:chip-exo'
1.310088 Air b'lemma:aerobic'
1.301908 Temp b'isNumber'
1.296057 O b'lemma:chip'
1.294021 Med b'+2:lemma:b2'
1.290231 O b'+1:lemma:pq'
1.290231 O b'+1:word:PQ'
1.288968 Supp b'-2:lemma:media'
1.285093 O b'-1:lemma:lb'
1.285093 O b'-1:word:LB'
1.284396 O b'-1:lemma:glucose'
1.284396 O b'-1:word:glucose'
1.281083 O b'-1:lemma:media'
1.281083 O b'-1:word:media'
1.280357 Med b'lemma:lb'
1.280357 Med b'word:LB'
1.277491 O b'+2:lemma:70'
1.276083 Supp b'lemma:Leu'
1.276083 Supp b'word:Leu'
1.276083 Supp b'-2:lemma:Lrp'
1.266271 Supp b'-2:lemma:induce'
1.263127 Phase b'lemma:exponential'
1.263127 Phase b'word:exponential'
1.263127 Phase b'lemma:stationary'
1.263127 Phase b'word:stationary'
1.250254 Air b'-1:lemma:ChIP-Seq'
1.250254 Air b'-1:word:ChIP-Seq'
1.245392 Air b'-2:lemma:IP'
1.237417 OD b'postag:CD'
1.227048 Supp b'-1:lemma:+'
1.227048 Supp b'-1:word:+'
1.225001 Gtype b'-1:postag:VBG'
1.222768 O b'lemma:Custom'
1.222768 O b'word:Custom'
1.222690 Strain b'lemma:k-12'
1.222690 Strain b'word:K-12'
1.211762 Gversion b'-2:lemma:build'
1.207468 Supp b'+1:lemma:1'
1.207468 Supp b'+1:word:1'
1.200932 Air b'-1:lemma:-'
1.200932 Air b'-1:word:-'
1.200071 Gversion b'lemma:nc'
1.200071 Gversion b'word:NC'
1.192535 O b'+2:lemma:fructose'
1.191869 Gtype b'postag:NN'
1.189235 pH b'lemma:ph5'
1.189235 pH b'+1:lemma:.5'
1.189235 pH b'word:pH5'
1.189235 pH b'+1:word:.5'
1.180856 O b'lemma:at'
1.174453 Supp b'lemma:rifampicin'
1.174453 Supp b'word:rifampicin'
1.166521 O b'lemma:argr'
1.166521 O b'word:ArgR'
1.162681 OD b'-1:postag:IN'
Top negative:
-0.311652 O b'+1:word:supplemented'
-0.312640 O b'-2:lemma:control'
-0.323532 O b'lemma:minimal'
-0.323532 O b'word:minimal'
-0.323844 O b'-1:lemma:ph'
-0.323844 O b'-1:word:pH'
-0.324956 O b'lemma:37'
-0.324956 O b'word:37'
-0.328081 O b'-1:lemma:cra'
-0.328436 O b'lemma:methanol'
-0.328436 O b'word:methanol'
-0.328436 O b'-2:lemma:dissolve'
-0.332352 O b'-1:lemma:37'
-0.332352 O b'-1:word:37'
-0.337835 Anti b'+1:lemma:anti-fur'
-0.337835 Anti b'+1:word:anti-Fur'
-0.338497 O b'+1:lemma:1m'
-0.338497 O b'+1:word:1M'
-0.338497 O b'-2:lemma:vol'
-0.340286 O b'+2:lemma:add'
-0.348253 Supp b'-1:lemma:10'
-0.348253 Supp b'-1:word:10'
-0.355580 O b'-2:lemma:10'
-0.356547 O b'-2:lemma:minimal'
-0.359288 O b'-1:lemma:ml'
-0.359288 O b'-1:word:ml'
-0.361424 O b'-1:lemma:dissolve'
-0.361424 O b'+1:lemma:methanol'
-0.361424 O b'-1:word:dissolved'
-0.361424 O b'+1:word:methanol'
-0.367262 Substrain b'isLower'
-0.367703 Supp b'postag:CC'
-0.367998 O b'-1:lemma:co2'
-0.367998 O b'-1:word:CO2'
-0.370364 O b'+2:lemma:-rrb-'
-0.374065 O b'-2:lemma:genome'
-0.375208 Supp b'-2:lemma:.'
-0.375208 Supp b'-2:postag:.'
-0.378770 O b'+2:lemma:at'
-0.385912 O b'-1:lemma:fresh'
-0.385912 O b'-1:word:fresh'
-0.389093 Strain b'isLower'
-0.389489 Temp b'-2:postag:NN'
-0.390296 O b'lemma:aerobic'
-0.391758 O b'-2:lemma:aerobically'
-0.393460 O b'-1:lemma:2'
-0.393460 O b'-1:word:2'
-0.397723 O b'+1:lemma:mm'
-0.397723 O b'+1:word:mM'
-0.398729 O b'-1:lemma:rpob'
-0.398729 O b'-1:word:RpoB'
-0.401643 O b'-1:lemma:mm'
-0.401643 O b'-1:word:mM'
-0.402690 Supp b'-2:lemma:grow'
-0.403521 O b'-2:lemma:pahse'
-0.404104 O b'-1:lemma:grow'
-0.411213 O b'lemma:nitrogen'
-0.411213 O b'word:nitrogen'
-0.411373 O b'+1:lemma:+'
-0.411373 O b'+1:word:+'
-0.415137 Gtype b'-2:postag:CD'
-0.424176 O b'+1:word:ChIP-exo'
-0.437712 O b'-2:postag:SYM'
-0.445165 Supp b'+1:lemma:,'
-0.445165 Supp b'+1:postag:,'
-0.445165 Supp b'+1:word:,'
-0.453707 O b'lemma:fructose'
-0.453707 O b'word:fructose'
-0.457985 O b'-2:lemma:nh4cl'
-0.460272 O b'lemma:anaerobically'
-0.460272 O b'word:anaerobically'
-0.460899 O b'+2:lemma:reference'
-0.461152 O b'lemma:2h'
-0.461152 O b'-1:lemma:additional'
-0.461152 O b'word:2h'
-0.461152 O b'-1:word:additional'
-0.465651 O b'-2:lemma:rpob'
-0.467210 O b'lemma:anaerobic'
-0.482841 Supp b'+1:lemma:-lrb-'
-0.482841 Supp b'+1:word:-LRB-'
-0.487123 O b'-1:lemma:\xe2\x88\x86'
-0.487123 O b'-1:word:\xe2\x88\x86'
-0.490214 O b'lemma:aerobically'
-0.490214 O b'word:aerobically'
-0.493650 O b'lemma:of'
-0.493650 O b'word:of'
-0.498710 O b'+2:lemma:mid-log'
-0.499349 O b'-1:lemma:30'
-0.499349 O b'-1:word:30'
-0.499545 O b'+2:lemma:fnr'
-0.506778 Supp b'+1:postag:-LRB-'
-0.511585 O b'word:ChIP-exo'
-0.513406 O b'-1:lemma:chip-exo'
-0.517504 Supp b'+2:lemma:glucose'
-0.518174 O b'+1:lemma:g/l'
-0.518174 O b'+1:word:g/L'
-0.520916 O b'-2:lemma:anaerobically'
-0.530292 O b'+2:postag:-RRB-'
-0.531458 Med b'-1:postag:NN'
-0.532473 O b'lemma:nh4cl'
-0.535293 O b'-2:lemma:IP'
-0.538050 Gtype b'-2:lemma:\xe2\x88\x86'
-0.542753 pH b'isUpper'
-0.559582 O b'lemma:glucose'
-0.559582 O b'word:glucose'
-0.563071 O b'-2:lemma:dpd'
-0.568245 O b'+1:lemma:until'
-0.568245 O b'+1:word:until'
-0.569455 Supp b'-2:postag:NNS'
-0.569843 Air b'+1:postag:JJ'
-0.573996 O b'lemma:\xce\xb4fur'
-0.573996 O b'word:\xce\x94fur'
-0.574100 O b'+1:postag:IN'
-0.583141 O b'+1:lemma:in'
-0.583141 O b'+1:word:in'
-0.584211 O b'lemma:30'
-0.584211 O b'word:30'
-0.587080 Agit b'isUpper'
-0.591540 Supp b'-1:postag:NNP'
-0.595806 O b'-1:postag::'
-0.599170 O b'-2:lemma:phase'
-0.601235 O b'+2:lemma:b'
-0.602048 O b'+2:lemma:then'
-0.608335 O b'-1:lemma:1'
-0.608335 O b'-1:word:1'
-0.608384 Anti b'isUpper'
-0.617575 O b'-2:lemma:until'
-0.617630 O b'lemma:mid-log'
-0.617630 O b'word:mid-log'
-0.621155 O b'-2:lemma:fresh'
-0.626295 O b'-2:lemma:media'
-0.635986 Phase b'isUpper'
-0.639041 O b'+2:lemma:250'
-0.644310 Supp b'-2:postag:JJ'
-0.653651 O b'+1:lemma:at'
-0.653651 O b'+1:word:at'
-0.658042 Supp b'-2:lemma:treat'
-0.664028 Med b'+2:postag:VBN'
-0.670366 O b'-1:lemma:nsrr'
-0.670366 O b'-1:word:NsrR'
-0.670591 O b'-2:postag:DT'
-0.674143 O b'-1:lemma:vol'
-0.674143 O b'-1:word:vol'
-0.674143 O b'-2:lemma:1/100'
-0.674143 O b'+2:lemma:1m'
-0.698342 Med b'-2:postag:VBN'
-0.715097 O b'+1:lemma:2.0'
-0.715097 O b'+1:word:2.0'
-0.721531 pH b'isLower'
-0.742327 O b'-1:lemma:sample'
-0.752266 O b'lemma:nitrate'
-0.752266 O b'word:nitrate'
-0.767047 O b'-2:lemma::'
-0.787116 O b'lemma:rifampicin'
-0.787116 O b'word:rifampicin'
-0.792673 O b'+2:lemma:rifampicin'
-0.797812 O b'+1:postag:VBG'
-0.816000 O b'lemma:wt'
-0.816563 O b'-1:lemma:IP'
-0.816563 O b'-1:word:IP'
-0.834409 O b'lemma:0.3'
-0.834409 O b'word:0.3'
-0.837034 Supp b'+2:postag:CD'
-0.838689 O b'postag:VBP'
-0.840231 Technique b'isNumber'
-0.846346 Air b'postag:NN'
-0.850091 Gtype b'postag:VBG'
-0.854432 O b'-2:postag:RB'
-0.883296 Gtype b'isLower'
-0.888784 O b'lemma:media'
-0.888784 O b'word:media'
-0.895754 Gversion b'isLower'
-0.904522 Temp b'postag:NN'
-0.924495 O b'+2:lemma:+'
-0.935152 Gtype b'+2:lemma:cra'
-0.943613 Gtype b'isNumber'
-0.975059 O b'postag:RB'
-1.021612 Med b'-2:lemma:grow'
-1.024421 Anti b'+2:lemma:polyclonal'
-1.031484 O b'-2:lemma:rifampicin'
-1.059929 O b'+1:lemma:1'
-1.059929 O b'+1:word:1'
-1.084068 Supp b'+2:lemma:fructose'
-1.123084 Technique b'isLower'
-1.132053 O b'-2:lemma:0.3'
-1.168749 Supp b'+2:lemma:1'
-1.183772 OD b'+1:postag:NN'
-1.263288 OD b'+2:lemma:aerobically'
-1.270174 Supp b'+2:lemma:2'
-1.329893 Anti b'postag:NNP'
-1.366300 O b'+1:lemma:2'
-1.366300 O b'+1:word:2'
-1.389405 Phase b'-1:postag:JJ'
-1.607302 Supp b'postag:JJ'
-1.676382 O b'-1:postag:VBG'
-1.773723 O b'-1:lemma::'
-1.773723 O b'-1:word::'
-1.857508 Phase b'postag:JJ'
-1.945074 O b'-1:lemma:_'
-1.945074 O b'-1:word:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70.txt
best params:{'c1': 0.04223966754804299, 'c2': 0.014836666503726496}
best CV score:0.879077327666518
model size: 0.17M
Flat F1: 0.7889011067174645
precision recall f1-score support
OD 1.000 0.818 0.900 22
pH 1.000 1.000 1.000 8
Technique 1.000 0.913 0.955 23
Med 1.000 0.962 0.981 53
Temp 0.923 0.828 0.873 29
Vess 1.000 1.000 1.000 1
Agit 0.000 0.000 0.000 0
Phase 0.882 1.000 0.938 15
Air 0.556 0.362 0.439 69
Anti 1.000 1.000 1.000 11
Strain 0.000 0.000 0.000 1
Gtype 0.864 0.824 0.843 85
Substrain 0.000 0.000 0.000 0
Supp 0.716 0.791 0.752 134
Gversion 0.000 0.000 0.000 0
avg / total 0.811 0.776 0.789 451
Top likely transitions:
Temp -> Temp 6.132401
Agit -> Agit 5.867978
Med -> Med 5.493382
Anti -> Anti 4.899201
Gversion -> Gversion 4.720775
OD -> OD 4.580439
Gtype -> Gtype 4.270714
Supp -> Supp 4.146870
O -> O 4.001305
Phase -> Phase 3.777096
Air -> Air 3.280795
Technique -> Technique 3.042386
pH -> pH 2.638890
Substrain -> Gtype 0.639428
O -> Technique 0.597359
O -> Gtype 0.301586
Med -> O 0.113620
Gtype -> Air 0.032970
Air -> O 0.023112
Temp -> O 0.007528
O -> Temp 0.000729
Agit -> O -0.000016
Supp -> Technique -0.000031
O -> Strain -0.002771
Phase -> Air -0.018013
Technique -> OD -0.019361
Supp -> OD -0.031281
pH -> Supp -0.038004
Air -> Gtype -0.060170
OD -> Supp -0.078565
Anti -> Gtype -0.100971
Gtype -> Phase -0.118162
Gtype -> Anti -0.120765
Air -> Agit -0.147345
Air -> Temp -0.149520
Anti -> Air -0.164013
Gversion -> Supp -0.166044
Anti -> O -0.170392
Gtype -> OD -0.173948
Anti -> Supp -0.206647
Air -> Technique -0.220913
Technique -> Gtype -0.276956
Supp -> Phase -0.297344
Air -> Phase -0.306210
O -> Supp -0.348583
O -> Med -0.368131
Phase -> OD -0.382985
O -> Phase -0.399249
OD -> Air -0.479222
Phase -> Supp -0.503166
Top unlikely transitions:
Agit -> O -0.000016
Supp -> Technique -0.000031
O -> Strain -0.002771
Phase -> Air -0.018013
Technique -> OD -0.019361
Supp -> OD -0.031281
pH -> Supp -0.038004
Air -> Gtype -0.060170
OD -> Supp -0.078565
Anti -> Gtype -0.100971
Gtype -> Phase -0.118162
Gtype -> Anti -0.120765
Air -> Agit -0.147345
Air -> Temp -0.149520
Anti -> Air -0.164013
Gversion -> Supp -0.166044
Anti -> O -0.170392
Gtype -> OD -0.173948
Anti -> Supp -0.206647
Air -> Technique -0.220913
Technique -> Gtype -0.276956
Supp -> Phase -0.297344
Air -> Phase -0.306210
O -> Supp -0.348583
O -> Med -0.368131
Phase -> OD -0.382985
O -> Phase -0.399249
OD -> Air -0.479222
Phase -> Supp -0.503166
Supp -> Air -0.513142
Phase -> O -0.522452
Air -> Med -0.532689
Gtype -> Med -0.539650
Technique -> Supp -0.572989
Gtype -> Technique -0.595348
Temp -> Med -0.595579
Supp -> Gtype -0.613937
Agit -> Air -0.625302
OD -> O -0.641071
Gversion -> O -0.641193
Technique -> pH -0.782400
Supp -> O -0.786822
Supp -> Med -0.932343
Air -> Supp -0.964410
Technique -> O -1.106191
O -> OD -1.374945
Gtype -> O -1.475612
Substrain -> O -1.620808
Med -> Supp -1.753990
O -> Air -1.843623
Top positive:
5.458764 Anti b'-2:lemma:antibody'
4.034933 O b'-2:lemma:_'
3.995360 Technique b'lemma[:2]:Ch'
3.737588 O b'lemma[:2]:re'
3.686472 Gtype b'lemma[:1]:\xce\xb4'
3.552793 Phase b'-2:lemma:phase'
3.469041 Air b'word:Aerobic'
3.439462 O b'lemma:2'
3.439462 O b'word:2'
3.243564 Air b'lemma:anaerobic'
3.182295 Supp b'-1:word:Cra'
3.047566 Gtype b'-2:lemma:genotype/variation'
3.006963 O b'-2:lemma:medium'
2.984615 O b'lemma:1'
2.984615 O b'word:1'
2.738121 O b'lemma:-'
2.738121 O b'word:-'
2.718196 O b'+1:postag:RB'
2.700586 O b'word:Cra'
2.653876 Supp b'+2:lemma:iptg'
2.620648 O b'lemma:with'
2.620648 O b'word:with'
2.599865 O b'lemma:_'
2.599865 O b'lemma[:1]:_'
2.599865 O b'word:_'
2.575314 Med b'+2:postag:CC'
2.571913 Supp b'lemma:arginine'
2.568347 O b'word:A'
2.546240 Gtype b'lemma[:2]:pk'
2.527855 O b'lemma:3'
2.527855 O b'word:3'
2.379121 O b'-1:lemma:ChIP-exo'
2.355972 O b'lemma[:2]:ge'
2.355054 Technique b'lemma[:2]:rn'
2.341062 Supp b'lemma:pq'
2.341062 Supp b'lemma[:2]:pq'
2.341062 Supp b'word:PQ'
2.297430 O b'+2:lemma:cra'
2.280338 O b'+2:lemma:\xc2\xb0c'
2.251142 O b'+2:lemma:70'
2.245883 Gtype b'word:WT'
2.241264 Air b'word:Anaerobic'
2.237739 Supp b'+2:lemma:for'
2.234179 Anti b'+2:lemma:antibody'
2.153589 O b'-1:lemma:tag'
2.117142 O b'lemma[:1]:h'
2.088934 Air b'lemma[:2]:ae'
2.076795 Technique b'word:ChIPSeq'
2.068935 Gtype b'hGreek'
2.047132 O b'-2:lemma:mid-log'
2.041924 O b'lemma:.'
2.041924 O b'postag:.'
2.041924 O b'postag[:1]:.'
2.041924 O b'word:.'
2.032495 Gtype b'-1:lemma:\xe2\x88\x86'
2.032495 Gtype b'-1:word:\xe2\x88\x86'
2.016938 Gtype b'lemma[:1]:w'
2.000225 O b'+2:lemma:fructose'
1.992930 Supp b'-1:lemma:with'
1.992930 Supp b'-1:word:with'
1.991763 Gtype b'-2:lemma:delta'
1.990954 Substrain b'lemma[:2]:mg'
1.983708 Phase b'+2:lemma:o.d.'
1.977886 Supp b'-1:lemma:Cra'
1.960916 Phase b'lemma:mid-log'
1.960916 Phase b'word:mid-log'
1.958437 Phase b'lemma[:2]:ex'
1.945077 Supp b'-1:postag:CC'
1.944452 Substrain b'lemma[:1]:m'
1.940793 Gtype b'lemma:type'
1.940793 Gtype b'lemma[:2]:ty'
1.940793 Gtype b'word:type'
1.931978 Technique b'lemma:chipseq'
1.906924 O b'+1:postag:NNP'
1.904357 O b'lemma:b'
1.904357 O b'word:B'
1.900770 O b'-1:word:tag'
1.894287 Technique b'lemma:ChIP-exo'
1.866276 Technique b'lemma[:1]:C'
1.856571 O b'-1:word:Aerobic'
1.849985 Supp b'lemma:Iron'
1.849985 Supp b'lemma[:2]:Ir'
1.849985 Supp b'word:Iron'
1.849985 Supp b'+1:word:Deficient'
1.849985 Supp b'-2:lemma:Anaerobic'
1.843831 Supp b'lemma:acetate'
1.843831 Supp b'word:acetate'
1.843349 Air b'lemma[:1]:A'
1.840981 Supp b'-2:lemma:media'
1.831812 Technique b'lemma[:2]:ch'
1.820538 Gtype b'-2:postag:DT'
1.819950 Supp b'-1:lemma:+'
1.819950 Supp b'-1:word:+'
1.813584 Air b'lemma[:1]:a'
1.788160 Gversion b'word:ChIP-Seq'
1.786490 Gtype b'-2:lemma:genotype'
1.771090 Gtype b'+1:lemma:type'
1.771090 Gtype b'+1:word:type'
1.765859 Gtype b'-2:lemma:affyexp'
1.729836 Gversion b'lemma:chip-seq'
1.715972 Air b'lemma[:2]:an'
1.702637 O b'-2:lemma:myc'
1.701777 O b'-2:lemma:flagtag'
1.685559 Supp b'-2:lemma:agent'
1.676248 Supp b'lemma[:1]:n'
1.665235 Technique b'symb'
1.650041 Technique b'-1:lemma:chip-exo'
1.645472 O b'-1:lemma:anaerobic'
1.628175 Gtype b'lemma[:2]:ar'
1.620209 Supp b'postag:VBP'
1.612275 O b'isLower'
1.611690 O b'+1:lemma:pq'
1.611690 O b'+1:word:PQ'
1.609561 Supp b'lemma[:1]:I'
1.608561 O b'-1:lemma:media'
1.608561 O b'-1:word:media'
1.597175 Med b'isUpper'
1.593363 Gtype b'lemma:wt'
1.593363 Gtype b'lemma[:2]:wt'
1.593216 Med b'+2:lemma:b2'
1.592123 Supp b'-2:lemma:induce'
1.590184 Air b'lemma:Aerobic'
1.590184 Air b'lemma[:2]:Ae'
1.571628 Phase b'lemma:stationary'
1.571628 Phase b'word:stationary'
1.555960 Gtype b'lemma[:1]:f'
1.552005 Med b'lemma:MOPS'
1.552005 Med b'lemma[:1]:M'
1.552005 Med b'lemma[:2]:MO'
1.552005 Med b'word:MOPS'
1.545220 Technique b'word:ChIP-Seq'
1.544051 Supp b'lemma[:2]:gl'
1.540691 O b'-1:lemma:0.3'
1.540691 O b'-1:word:0.3'
1.540357 Supp b'-2:lemma:argr'
1.535458 Gtype b'symb'
1.531560 O b'-2:lemma:fructose'
1.530754 O b'-1:lemma:lb'
1.530754 O b'-1:word:LB'
1.528067 Supp b'-1:lemma:final'
1.528067 Supp b'-1:word:final'
1.513494 Phase b'-2:lemma:until'
1.493341 Supp b'lemma:fructose'
1.493341 Supp b'word:fructose'
1.491583 Technique b'-1:lemma:input'
1.491583 Technique b'-1:word:Input'
1.487376 Supp b'lemma[:2]:ac'
1.472895 pH b'+1:postag:CD'
1.458550 O b'-1:lemma:glucose'
1.458550 O b'-1:word:glucose'
1.450710 Gtype b'-1:postag:VBG'
1.450118 Air b'-1:postag::'
1.449701 Supp b'+1:lemma:\xc2\xb5m'
1.449701 Supp b'+1:word:\xc2\xb5M'
1.430882 O b'+1:lemma:mid-log'
1.430882 O b'+1:word:mid-log'
1.420013 Strain b'+1:lemma:substr'
1.420013 Strain b'+1:word:substr'
1.420013 Strain b'-2:lemma:str'
1.408799 O b'lemma:Custom'
1.408799 O b'lemma[:2]:Cu'
1.408799 O b'word:Custom'
1.401545 O b'lemma:chip'
1.400074 O b'lemma[:1]:C'
1.398389 Gtype b'lemma[:2]:wi'
1.394023 O b'lemma:rpob'
1.394023 O b'word:RpoB'
1.389762 Supp b'-2:lemma:supplement'
1.388021 Supp b'lemma[:2]:ni'
1.387197 Phase b'lemma[:1]:e'
1.380149 Air b'postag:RB'
1.380149 Air b'postag[:1]:R'
1.380149 Air b'postag[:2]:RB'
1.378405 O b'lemma:a'
1.377531 O b'+1:word:ChIP-Seq'
1.375054 Technique b'+2:lemma:ph5'
1.371660 Anti b'+1:lemma:antibody'
1.371660 Anti b'+1:word:antibody'
1.355865 O b'postag:VBN'
1.352947 OD b'lemma:0.3'
1.352947 OD b'word:0.3'
1.351689 Phase b'+1:lemma:phase'
1.351689 Phase b'+1:word:phase'
1.350446 pH b'lemma[:2]:ph'
1.341756 O b'isNumber'
1.330917 Supp b'+1:lemma:1'
1.330917 Supp b'+1:word:1'
1.327538 Gversion b'-2:lemma:build'
1.323732 O b'lemma:ompr'
1.323732 O b'word:OmpR'
1.322881 Air b'+1:postag:IN'
1.322130 Temp b'-1:lemma:43'
1.322130 Temp b'-1:word:43'
1.298940 O b'-1:lemma:aerobically'
1.298940 O b'-1:word:aerobically'
1.297300 OD b'lemma[:1]:o'
1.295797 Med b'+1:lemma:0.4'
1.295797 Med b'+1:word:0.4'
1.282176 O b'+2:postag:JJ'
1.271164 O b'+2:lemma:polyclonal'
Top negative:
-0.367697 Supp b'-1:postag:-LRB-'
-0.371915 O b'-2:lemma:fresh'
-0.376710 O b'+2:lemma:fnr'
-0.377206 O b'lemma:2h'
-0.377206 O b'-1:lemma:additional'
-0.377206 O b'lemma[:2]:2h'
-0.377206 O b'word:2h'
-0.377206 O b'-1:word:additional'
-0.379729 O b'-2:postag:-LRB-'
-0.380849 Air b'-1:postag:JJ'
-0.381928 Strain b'isLower'
-0.382657 Med b'-1:postag:IN'
-0.383403 O b'-2:lemma:at'
-0.384281 Technique b'postag:NN'
-0.389220 O b'-1:lemma:until'
-0.389220 O b'-1:word:until'
-0.393047 O b'lemma[:1]:4'
-0.393058 O b'+2:lemma:follow'
-0.393999 O b'-1:lemma:control'
-0.393999 O b'-1:word:control'
-0.395395 O b'lemma[:2]:0.'
-0.396201 O b'-2:lemma:minimal'
-0.397167 O b'lemma:37'
-0.397167 O b'lemma[:2]:37'
-0.397167 O b'word:37'
-0.397928 Gtype b'lemma[:1]:g'
-0.404533 O b'lemma:glucose'
-0.404533 O b'word:glucose'
-0.408273 O b'lemma:30'
-0.408273 O b'word:30'
-0.409885 O b'-2:lemma:supplement'
-0.411648 O b'+1:lemma:mm'
-0.411648 O b'+1:word:mM'
-0.412683 O b'+2:lemma:at'
-0.413401 O b'-2:lemma:of'
-0.419317 O b'-2:postag:RB'
-0.420417 Temp b'isLower'
-0.424626 O b'+1:lemma:phase'
-0.424626 O b'+1:word:phase'
-0.426973 Supp b'lemma[:1]:s'
-0.427251 O b'lemma:wt'
-0.427251 O b'lemma[:2]:wt'
-0.434566 O b'+2:lemma:mid-log'
-0.434970 O b'lemma[:1]:0'
-0.440663 O b'-1:lemma:37'
-0.440663 O b'-1:word:37'
-0.441568 Temp b'-2:postag:NN'
-0.443930 O b'lemma[:1]:\xce\xb4'
-0.458877 O b'-1:lemma:mm'
-0.458877 O b'-1:word:mM'
-0.463660 Supp b'+1:postag:NNS'
-0.463834 O b'lemma:media'
-0.463834 O b'word:media'
-0.464229 O b'-2:lemma:a'
-0.482105 O b'-1:lemma:\xe2\x88\x86'
-0.482105 O b'-1:word:\xe2\x88\x86'
-0.485832 O b'+1:word:ChIP-exo'
-0.487845 Supp b'hGreek'
-0.488218 O b'-2:lemma:nh4cl'
-0.497873 Supp b'+2:postag:NNP'
-0.503699 O b'-2:lemma:genome'
-0.505194 O b'+2:lemma:b'
-0.506273 O b'-1:lemma:IP'
-0.506273 O b'-1:word:IP'
-0.506359 O b'-1:lemma:nsrr'
-0.506359 O b'-1:word:NsrR'
-0.506668 O b'lemma[:1]:L'
-0.507329 O b'+1:postag:VBG'
-0.511337 O b'+1:lemma:2.0'
-0.511337 O b'+1:word:2.0'
-0.514855 Gtype b'lemma[:1]:h'
-0.521122 Med b'-1:postag:NN'
-0.525468 O b'+1:lemma:until'
-0.525468 O b'+1:word:until'
-0.529295 Med b'postag[:1]:C'
-0.531677 O b'+1:lemma:+'
-0.531677 O b'+1:word:+'
-0.535215 O b'lemma[:2]:ar'
-0.537907 O b'lemma[:2]:gl'
-0.550679 Air b'isLower'
-0.551345 O b'-1:lemma:ml'
-0.551345 O b'-1:word:ml'
-0.555528 Supp b'+1:lemma:,'
-0.555528 Supp b'+1:postag:,'
-0.555528 Supp b'+1:word:,'
-0.562680 Gtype b'lemma[:1]:s'
-0.564487 O b'lemma[:1]:p'
-0.574109 Agit b'symb'
-0.582388 O b'-2:postag:DT'
-0.587178 O b'-2:lemma:rpob'
-0.589072 O b'+2:lemma:.'
-0.589072 O b'+2:postag:.'
-0.595658 O b'+1:lemma:g/l'
-0.595658 O b'+1:word:g/L'
-0.596954 O b'-2:postag:SYM'
-0.599195 O b'-1:lemma:rpob'
-0.599195 O b'-1:word:RpoB'
-0.604186 O b'+2:lemma:250'
-0.608815 Supp b'+1:lemma:-lrb-'
-0.608815 Supp b'+1:word:-LRB-'
-0.609845 Anti b'isUpper'
-0.610591 Technique b'isNumber'
-0.617650 O b'-1:lemma:co2'
-0.617650 O b'-1:word:CO2'
-0.622948 Supp b'+1:postag:-LRB-'
-0.641001 Agit b'hUpper'
-0.641001 Agit b'hLower'
-0.645491 O b'+1:postag:IN'
-0.647159 Gtype b'postag[:1]:V'
-0.647159 Gtype b'postag[:2]:VB'
-0.659968 O b'lemma[:2]:ri'
-0.661117 Anti b'+2:postag:JJ'
-0.661585 O b'-1:lemma:1'
-0.661585 O b'-1:word:1'
-0.666026 O b'lemma:mid-log'
-0.666026 O b'word:mid-log'
-0.667310 O b'-2:lemma:pahse'
-0.680963 Anti b'+2:lemma:polyclonal'
-0.685918 Supp b'-1:postag:NNP'
-0.698925 Supp b'-2:postag:JJ'
-0.704054 O b'lemma[:1]:d'
-0.707980 Gtype b'-2:lemma:\xe2\x88\x86'
-0.710771 O b'+2:postag:-RRB-'
-0.713877 O b'+1:lemma:at'
-0.713877 O b'+1:word:at'
-0.722406 O b'-2:lemma::'
-0.722643 O b'lemma:rifampicin'
-0.722643 O b'word:rifampicin'
-0.728674 Med b'-2:postag:VBN'
-0.732685 Supp b'-2:lemma:treat'
-0.742976 Anti b'+1:lemma:anti-fur'
-0.742976 Anti b'+1:word:anti-Fur'
-0.744610 O b'lemma[:1]:k'
-0.745118 O b'-2:lemma:aerobically'
-0.745353 O b'lemma:anaerobic'
-0.747402 O b'+1:lemma:in'
-0.747402 O b'+1:word:in'
-0.757568 O b'+2:lemma:tag'
-0.766874 O b'lemma[:1]:I'
-0.768100 O b'-1:postag::'
-0.770179 O b'-1:lemma:2'
-0.770179 O b'-1:word:2'
-0.779152 Gtype b'isNumber'
-0.782788 Technique b'isLower'
-0.789896 Phase b'hUpper'
-0.789896 Phase b'hLower'
-0.799453 O b'lemma[:1]:n'
-0.803431 pH b'isLower'
-0.809089 O b'-2:postag::'
-0.822848 O b'-2:lemma:dpd'
-0.835295 Supp b'lemma[:2]:an'
-0.862215 O b'-2:lemma:phase'
-0.867939 Gtype b'lemma[:1]:c'
-0.884275 Supp b'+2:lemma:glucose'
-0.904915 Gtype b'lemma[:1]:a'
-0.913465 O b'-1:lemma:vol'
-0.913465 O b'-1:word:vol'
-0.913465 O b'-2:lemma:1/100'
-0.913465 O b'+2:lemma:1m'
-0.930521 Supp b'+2:postag:CD'
-0.948465 O b'-2:lemma:rifampicin'
-0.960959 O b'-2:lemma:until'
-0.973929 O b'+2:lemma:+'
-0.989863 OD b'+1:postag:NN'
-0.993812 Med b'symb'
-1.066464 Gtype b'+2:lemma:cra'
-1.076108 Gtype b'isUpper'
-1.103829 O b'lemma[:2]:ae'
-1.145933 Supp b'-2:lemma:grow'
-1.165009 O b'+2:lemma:then'
-1.184152 Phase b'postag[:1]:J'
-1.184152 Phase b'postag[:2]:JJ'
-1.216149 Gtype b'lemma[:1]:r'
-1.241717 O b'+2:lemma:rifampicin'
-1.245965 Gversion b'isLower'
-1.248268 O b'lemma[:2]:30'
-1.250676 O b'+1:lemma:1'
-1.250676 O b'+1:word:1'
-1.261488 Phase b'postag:JJ'
-1.293329 Anti b'postag:NNP'
-1.311043 Supp b'lemma[:1]:c'
-1.361957 O b'-1:lemma:sample'
-1.486502 Phase b'-1:postag:JJ'
-1.506572 Supp b'+2:lemma:1'
-1.518267 Supp b'+2:lemma:2'
-1.561744 O b'-2:lemma:0.3'
-1.581023 O b'-2:lemma:media'
-1.615538 Med b'-2:lemma:grow'
-1.662192 O b'+1:lemma:2'
-1.662192 O b'+1:word:2'
-1.765435 Med b'+2:postag:VBN'
-1.789575 Supp b'+2:lemma:fructose'
-1.949737 O b'postag:VBP'
-2.303519 O b'-1:postag:VBG'
-2.349341 O b'lemma[:2]:fl'
-2.351661 OD b'+2:lemma:aerobically'
-2.480527 O b'-1:lemma::'
-2.480527 O b'-1:word::'
-2.674521 O b'-1:lemma:_'
-2.674521 O b'-1:word:_'