Estefani Gaytan Nunez

up

python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run1 --version _v1
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run2 --version _v1 --S1
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run3 --version _v1 --S2
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run4 --version _v1 --S1 --S2
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run5 --version _v1 --S3
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run6 --version _v1 --S1 --S3
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run7 --version _v1 --S2 --S3
python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70_v4.txt --testFile test-data-set-30_v4.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --Gridname Run8 --version _v1 --S1 --S2 --S3
......@@ -123,6 +123,7 @@ def word2features(sent, i, S1, S2):
'-1:postag': postag1,
})
if i < len(sent) - 1:
listElem = sent[i + 1].split('|')
lemma1 = listElem[1]
......@@ -135,6 +136,35 @@ def word2features(sent, i, S1, S2):
'+1:postag': postag1,
})
'''
#================== S6 ======================#
if i > 1:
listElem = sent[i - 2].split('|')
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
#LemaG posterior
'-2:lemma': lemma1,
#PostG posterior
'-2:postag': postag1,
})
if len(sent) - 2:
listElem = sent[i + 2].split('|')
lemma1 = listElem[1]
postag1 = listElem[2]
features.update({
#LemaG posterior
'+2:lemma': lemma1,
#PostG posterior
'+2:postag': postag1,
})
'''
#====================== S1 ======================#
if S1:
print("S1")
......
# -*- coding: UTF-8 -*-
import os
from itertools import chain
from optparse import OptionParser
from time import time
from collections import Counter
import re
import nltk
import sklearn
import scipy.stats
import sys
from sklearn.externals import joblib
from sklearn.metrics import make_scorer
from sklearn.cross_validation import cross_val_score
from sklearn.grid_search import RandomizedSearchCV
import sklearn_crfsuite
from sklearn_crfsuite import scorers
from sklearn_crfsuite import metrics
from nltk.corpus import stopwords
# Objective
# Training and evaluation of CRFs with sklearn-crfsuite.
#
# Input parameters
# --inputPath=PATH Path of training and test data set
# --trainingFile File with training data set
# --testFile File with test data set
# --outputPath=PATH Output path to place output files
# --nameGrid Number of run
# --version Version Report
# Output
# 1) Best model
# 2) Report
# Examples
# python training_validation_v9.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets
# --trainingFile training-data-set-70.txt
# --testFile test-data-set-30.txt
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/
# --version _v1
# python3 training_validation_v9.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70.txt --testFile test-data-set-30.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nameGrid Run1 --version _v1 --S1 --S2 --S3
##################################################################
# FEATURES #
##################################################################
#================== COMPLETE WORD FEATURES ======================#
def isGreek(word):
## Complete word are greek letters
alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω',
'α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω']
if word in alphabet:
return True
else:
return False
#================ INNER OF THE WORD FEATURES ====================#
def hNumber(word):
## Al leats has one greek letter
for l in word:
if l.isdigit():
return True
return False
def symb(word):
n=0
## At least a not alphanumeric character
for l in word:
if l.isdigit(): n = n+1
if l.isalpha(): n = n+1
#Exclude Greek letters
if isGreek(l): n = n+1
if n<len(word): return True
else: return False
def hUpper(word):
## At least an upper letter
for l in word:
if l.isupper(): return True
return False
def hLower(word):
## At least a lower letter
for l in word:
if l.islower(): return True
return False
def hGreek(word):
## At least a greek letter
for l in word:
if isGreek(l): return True
return False
#================================================================#
def word2features(sent, i, S1, S2, S3):
## Getting word features
## Saving CoreNLP annotations
listElem = sent[i].split('|')
word = listElem[0]
lemma = listElem[1]
postag = listElem[2]
#ner = listElem[4]
#=========================== G =============================#
## NAME LEVEL G
## FUTURE TYPE General features
features = {
## basal features
'lemma': lemma,
'postag': postag
}
## more tha one word in sentence
if i > 0:
## Anterior word
listElem = sent[i - 1].split('|')
## Saving CoreNLP annotations
lemma0 = listElem[1]
postag0 = listElem[2]
features.update({
#LemaG anterior
'-1:lemma': lemma0,
#PostG anterior
'-1:postag': postag0,
})
## is not the last word
if i < len(sent) - 1:
## Posterior word
listElem = sent[i + 1].split('|')
## Saving CoreNLP annotations
lemma2 = listElem[1]
postag2 = listElem[2]
features.update({
#LemaG posterior
'+1:lemma': lemma2,
#PostG posterior
'+1:postag': postag2,
})
#=========================== S1 =============================#
## NAME LEVEL S1
## FEATURE TYPE Inner word features
if S1:
#Add features
features['hUpper']: hUpper(word)
features['hLower']: hUpper(word)
features['hGreek']: hGreek(word)
features['symb']: symb(word)
#lemma and post firstChar
features['lemma[:1]']: lemma[:1]
features['postag[:1]']: post[:1]
#lemma and post secondChar
features['lemma[:2]']: lemma[:2]
features['postag[:2]']: post[:2]
#=========================== S2 =============================#
## NAME LEVEL S2
## FEATURE TYPE Complete word features
if S2:
#Add features
features['word']: word
features['isUpper']: word.isupper()
features['isLower']: word.isLower()
features['isGreek']: isGreek(word)
features['isNumber']: word.isdigit()
## more tha one word in sentence
if i > 0:
## Anterior word
listElem = sent[i - 1].split('|')
## Saving CoreNLP annotations
word0 = listElem[0]
features['-1:word']: word0
## is not the last word
if i < len(sent)-1:
listElem = sent[i + 1].split('|')
## Saving CoreNLP annotations
word2 = word[0]
## Posterior word
features['+1:word']: word2
#=========================== S3 =============================#
## NAME LEVEL S3
## FEATURE TYPE Extended context features
if S3:
## more tha two words in sentence
if i > 1:
## two anterior lemma and post
listElem = sent[i - 2].split('|')
## Saving CoreNLP annotations
lemma01 = listElem[1]
post01 = listElem[2]
features['-2:lemma']: lemma01
features['-2:post']: post01
## is not the penultimate word
if i < len(sent) - 2:
listElem = sent[i + 2].split('|')
## Saving CoreNLP annotations
lemma2 = listElem[1]
post2 = listElem[2]
## two posterior lemma and post
features['+2:lemma']: lemma2
features['+2:post']: post02
return features
def sent2features(sent, S1, S2, S3):
## Itering in sentence for each word and saving its features
return [word2features(sent, i, S1, S2, S3) for i in range(len(sent))]
def sent2labels(sent):
## 3rd position by word is the label
return [elem.split('|')[3] for elem in sent]
def sent2tokens(sent):
return [token for token, postag, label in sent]
def print_transitions(trans_features, f):
for (label_from, label_to), weight in trans_features:
f.write("{:6} -> {:7} {:0.6f}\n".format(label_from, label_to, weight))
def print_state_features(state_features, f):
for (attr, label), weight in state_features:
f.write("{:0.6f} {:8} {}\n".format(weight, label, attr.encode("utf-8")))
__author__ = 'egaytan'
##################################################################
# MAIN PROGRAM #
##################################################################
if __name__ == "__main__":
## Defining parameters
parser = OptionParser()
parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH")
parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH")
parser.add_option("--trainingFile", dest="trainingFile", help="File with training data set", metavar="FILE")
parser.add_option("--testFile", dest="testFile", help="File with test data set", metavar="FILE")
parser.add_option("--Gridname", dest="Gridname", help="Report number run", metavar="FILE")
parser.add_option("--version", dest="version", help="Report file", metavar="FILE")
parser.add_option("--S1", dest="S1", help="Future Type", action="store_true", default=False)
parser.add_option("--S2", dest="S2", help="Future Type", action="store_true", default=False)
parser.add_option("--S3", dest="S3", help="Future Type", action="store_true", default=False)
parser.add_option("--excludeStopWords", dest="excludeStopWords",help="Exclude stop words", action="store_true", default=False)
parser.add_option("--excludeSymbols", dest="excludeSymbols", help="Exclude punctuation marks", action="store_true", default=False)
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error("Any parameter given.")
sys.exit(1)
print('-------------------------------- PARAMETERS --------------------------------')
print("Path of training data set: " + options.inputPath)
print("File with training data set: " + str(options.trainingFile))
print("Path of test data set: " + options.inputPath)
print("File with test data set: " + str(options.testFile))
print("Exclude stop words: " + str(options.excludeStopWords))
print("Levels: " + str(options.S1) + " " + str(options.S2))
print("Report file: " + str(options.version))
symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{',
'}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']
print("Exclude symbols: " + str(options.excludeSymbols))
print('-------------------------------- PROCESSING --------------------------------')
print('Reading corpus...')
t0 = time()
sentencesTrainingData = []
sentencesTestData = []
stopwords = [word for word in stopwords.words('english')]
with open(os.path.join(options.inputPath, options.trainingFile), "r") as iFile:
for line in iFile.readlines():
listLine = []
line = line.strip('\n')
for token in line.split():
if options.excludeStopWords:
listToken = token.split('|')
lemma = listToken[1]
if lemma in stopwords:
continue
if options.excludeSymbols:
listToken = token.split('|')
lemma = listToken[1]
if lemma in symbols:
continue
listLine.append(token)
sentencesTrainingData.append(listLine)
print(" Sentences training data: " + str(len(sentencesTrainingData)))
with open(os.path.join(options.inputPath, options.testFile), "r") as iFile:
for line in iFile.readlines():
listLine = []
line = line.strip('\n')
for token in line.split():
if options.excludeStopWords:
listToken = token.split('|')
lemma = listToken[1]
if lemma in stopwords:
continue
if options.excludeSymbols:
listToken = token.split('|')
lemma = listToken[1]
if lemma in symbols:
continue
listLine.append(token)
sentencesTestData.append(listLine)
print(" Sentences test data: " + str(len(sentencesTestData)))
print("Reading corpus done in: %fs" % (time() - t0))
print(sent2features(sentencesTrainingData[0], options.S1, options.S2, options.S3)[0])
print(sent2features(sentencesTestData[0], options.S1, options.S2, options.S3)[0])
t0 = time()
X_train = [sent2features(s, options.S1, options.S2, options.S3) for s in sentencesTrainingData]
y_train = [sent2labels(s) for s in sentencesTrainingData]
X_test = [sent2features(s, options.S1, options.S2, options.S3) for s in sentencesTestData]
# print X_test
y_test = [sent2labels(s) for s in sentencesTestData]
# Fixed parameters
# crf = sklearn_crfsuite.CRF(
# algorithm='lbfgs',
# c1=0.1,
# c2=0.1,
# max_iterations=100,
# all_possible_transitions=True
# )
# Hyperparameter Optimization
crf = sklearn_crfsuite.CRF(
algorithm='lbfgs',
max_iterations=100,
all_possible_transitions=True
)
params_space = {
'c1': scipy.stats.expon(scale=0.5),
'c2': scipy.stats.expon(scale=0.05),
}
# Original: labels = list(crf.classes_)
# Original: labels.remove('O')
labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH'])
# use the same metric for evaluation
f1_scorer = make_scorer(metrics.flat_f1_score,
average='weighted', labels=labels)
# search
rs = RandomizedSearchCV(crf, params_space,
cv=10,
verbose=3,
n_jobs=-1,
n_iter=20,
# n_iter=50,
scoring=f1_scorer)
rs.fit(X_train, y_train)
# Fixed parameters
# crf.fit(X_train, y_train)
# Best hiperparameters
# crf = rs.best_estimator_
nameReport = str(options.Gridname) + str(options.version) + '.txt'
with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile:
oFile.write("********** TRAINING AND TESTING REPORT **********\n")
oFile.write("Training file: " + options.trainingFile + '\n')
oFile.write('\n')
oFile.write('best params:' + str(rs.best_params_) + '\n')
oFile.write('best CV score:' + str(rs.best_score_) + '\n')
oFile.write('model size: {:0.2f}M\n'.format(rs.best_estimator_.size_ / 1000000))
print("Training done in: %fs" % (time() - t0))
t0 = time()
# Update best crf
crf = rs.best_estimator_
# Saving model
print(" Saving training model...")
t1 = time()
nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '.mod'
joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
print(" Saving training model done in: %fs" % (time() - t1))
# Evaluation against test data
y_pred = crf.predict(X_test)
print("*********************************")
print("Prediction done in: %fs" % (time() - t0))
# labels = list(crf.classes_)
# labels.remove('O')
with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="a") as oFile:
oFile.write('\n')
oFile.write("Flat F1: " + str(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)))
oFile.write('\n')
# labels = list(crf.classes_)
sorted_labels = sorted(
labels,
key=lambda name: (name[1:], name[0])
)
oFile.write(metrics.flat_classification_report(
y_test, y_pred, labels=sorted_labels, digits=3
))
oFile.write('\n')
oFile.write("\nTop likely transitions:\n")
print_transitions(Counter(crf.transition_features_).most_common(50), oFile)
oFile.write('\n')
oFile.write("\nTop unlikely transitions:\n")
print_transitions(Counter(crf.transition_features_).most_common()[-50:], oFile)
oFile.write('\n')
oFile.write("\nTop positive:\n")
print_state_features(Counter(crf.state_features_).most_common(200), oFile)
oFile.write('\n')
oFile.write("\nTop negative:\n")
print_state_features(Counter(crf.state_features_).most_common()[-200:], oFile)
oFile.write('\n')
No preview for this file type
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.025778789677348212, 'c2': 0.0018714562014074947}
best CV score:0.8085092726655764
model size: 0.08M
Flat F1: 0.8137583902797803
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.880 1.000 0.936 22
Med 0.852 0.912 0.881 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.920 0.742 0.821 62
Anti 1.000 0.444 0.615 9
Strain 1.000 1.000 1.000 1
Gtype 0.905 0.811 0.856 106
Substrain 0.000 0.000 0.000 1
Supp 0.883 0.669 0.762 136
Gversion 0.000 0.000 0.000 0
avg / total 0.904 0.760 0.814 480
Top likely transitions:
OD -> OD 6.458139
Agit -> Agit 6.258931
Anti -> Anti 6.237003
Temp -> Temp 6.159227
Med -> Med 5.670034
O -> O 5.195209
Air -> Air 5.178796
Gversion -> Gversion 5.056321
Gtype -> Gtype 4.897516
Phase -> Phase 4.866866
Technique -> Technique 4.086604
Supp -> Supp 3.969422
pH -> pH 2.272089
Substrain -> Gtype 1.512606
O -> Supp 1.281866
Gtype -> Supp 1.254695
Technique -> Air 1.119472
O -> Technique 1.098084
O -> Anti 0.739732
O -> Gtype 0.595581
O -> Gversion 0.361965
O -> Temp 0.359101
Gtype -> pH 0.252154
O -> Strain 0.203795
Air -> O 0.106227
Gtype -> Air 0.052216
O -> pH 0.007039
Gversion -> Supp -0.000001
pH -> Supp -0.000232
Strain -> O -0.001882
Phase -> Supp -0.055890
Phase -> Air -0.056319
OD -> Phase -0.082524
O -> Phase -0.121225
O -> OD -0.123240
Gtype -> Phase -0.163289
Temp -> O -0.187600
Med -> O -0.241653
Gtype -> Gversion -0.242298
pH -> O -0.266321
OD -> Supp -0.290317
Air -> Gtype -0.354319
Phase -> Temp -0.371875
Anti -> O -0.373128
Phase -> pH -0.374040
Phase -> Gtype -0.388060
Technique -> Gtype -0.398116
Gversion -> O -0.453455
Air -> Phase -0.500367
Supp -> Phase -0.539395
Top unlikely transitions:
Med -> O -0.241653
Gtype -> Gversion -0.242298
pH -> O -0.266321
OD -> Supp -0.290317
Air -> Gtype -0.354319
Phase -> Temp -0.371875
Anti -> O -0.373128
Phase -> pH -0.374040
Phase -> Gtype -0.388060
Technique -> Gtype -0.398116
Gversion -> O -0.453455
Air -> Phase -0.500367
Supp -> Phase -0.539395
O -> Med -0.558012
Technique -> O -0.563680
Technique -> pH -0.572119
OD -> Air -0.575190
Phase -> O -0.591227
Med -> Anti -0.624886
Anti -> OD -0.642709
O -> Air -0.644641
Phase -> Technique -0.707027
Supp -> pH -0.714103
Gtype -> O -0.767223
Supp -> Gversion -0.770430
Supp -> Anti -0.833703
Supp -> Technique -0.866289
Gtype -> Med -0.879350
Supp -> OD -0.892817
Agit -> O -0.961537
Phase -> Med -0.969283
Temp -> Med -1.069687
Air -> Temp -1.156985
Air -> Supp -1.269357
OD -> O -1.314886
Supp -> Air -1.325800
Technique -> OD -1.524590
OD -> Med -1.583386
Supp -> Gtype -1.630169
Gtype -> OD -1.657535
Supp -> O -1.680041
Supp -> Temp -1.709825
Air -> Med -1.823169
OD -> Gtype -1.852919
Gtype -> Anti -1.909177
Substrain -> O -1.923623
Air -> OD -2.860427
Med -> Supp -2.964145
Supp -> Med -3.290588
Phase -> OD -4.227426
Top positive:
11.662855 Supp b'lemma:Iron'
10.511953 O b'lemma:_'
10.230310 Phase b'lemma:stationary'
9.183606 Air b'lemma:anaerobic'
9.126291 O b'lemma:1'
8.974301 Med b'lemma:MOPS'
8.702900 O b'lemma:rpob'
8.676761 Phase b'lemma:mid-log'
8.568886 Air b'lemma:aerobic'
8.230748 Technique b'lemma:chipseq'
8.195390 Technique b'lemma:ChIP-exo'
8.058605 O b'-1:lemma:tag'
7.748813 Supp b'lemma:nitrate'
7.383352 Med b'lemma:LB'
7.258212 O b'lemma:2'
7.185114 Gversion b'lemma:asm584v2'
7.171158 O b'lemma:3'
7.148092 Strain b'lemma:k-12'
7.115910 Supp b'lemma:pq'
7.026371 O b'lemma:for'
7.014323 Gtype b'lemma:arca8myc'
7.008078 Supp b'+1:lemma:\xc2\xb5m'
6.977598 Gtype b'lemma:\xce\xb4cra'
6.880186 Gtype b'lemma:flag-tag'
6.880186 Gtype b'-1:lemma:c-terminal'
6.658138 Gtype b'lemma:delta-arca'
6.634178 O b'lemma:rep3'
6.604885 O b'lemma:rep1'
6.417248 O b'lemma:rep2'
6.415638 O b'lemma:Custom'
6.361925 Gtype b'lemma:fnr8myc'
6.326103 Supp b'lemma:acetate'
6.318547 O b'-1:lemma:ChIP-exo'
6.305986 OD b'+1:lemma:stationary'
6.292137 Air b'lemma:Aerobic'
6.239583 Supp b'-1:lemma:Cra'
6.162599 Air b'-1:lemma:ChIP-Seq'
6.138632 O b'-1:lemma:0.3-0.35'
6.069303 O b'lemma:b'
6.025530 Substrain b'lemma:mg1655'
6.018017 OD b'lemma:od450'
5.968029 Anti b'lemma:none'
5.951239 Technique b'lemma:rna-seq'
5.870411 Gtype b'lemma:delta-fnr'
5.859331 Supp b'lemma:rifampicin'
5.853962 O b'lemma:with'
5.787035 Supp b'lemma:Fe'
5.696867 O b'lemma:Cra'
5.666336 Gtype b'lemma:wt'
5.634727 O b'lemma:\xcf\x8332'
5.556439 Anti b'lemma:seqa'
5.495699 Technique b'lemma:rnaseq'
5.475249 O b'lemma:-'
5.461891 O b'postag::'
5.444655 Gtype b'-1:lemma:\xe2\x88\x86'
5.439532 Supp b'lemma:nh4cl'
5.347691 Med b'+1:lemma:0.4'
5.306864 Gtype b'+1:lemma:type'
5.249286 Supp b'lemma:arginine'
5.228272 Anti b'lemma:anti-myc'
5.189623 Gtype b'lemma:\xce\xb4fur'
5.112523 Med b'lemma:lb'
5.068530 O b'lemma:chip'
5.060776 O b'lemma:CEL'
5.009251 O b'lemma:a'
4.960032 Temp b'-1:lemma:\xcf\x8332'
4.958666 O b'-1:lemma:Aerobic'
4.897653 Supp b'-1:lemma:+'
4.868444 O b'lemma:.'
4.868444 O b'postag:.'
4.830243 Supp b'lemma:glucose'
4.820752 Supp b'lemma:no3'
4.796496 Supp b'lemma:dpd'
4.791818 O b'+1:postag:RB'
4.789121 O b'-1:lemma:anaerobic'
4.777256 Temp b'-1:lemma:37'
4.758659 Technique b'-1:lemma:IP'
4.751186 Gtype b'lemma:wild-type'
4.743291 Supp b'+1:lemma:1'
4.741868 Vess b'lemma:flask'
4.741868 Vess b'-1:lemma:warm'
4.683816 O b'-1:lemma:glucose'
4.643413 Anti b'lemma:anti-rpos'
4.633816 Technique b'lemma:chip-seq'
4.629451 O b'lemma:oxyr'
4.567353 Gtype b'lemma:\xe2\x88\x86'
4.557296 Supp b'lemma:fructose'
4.556286 O b'lemma:affyexp'
4.553249 OD b'lemma:od600'
4.531615 Gtype b'lemma:type'
4.489891 O b'-1:lemma:0.3'
4.480766 O b'lemma:2-3'
4.479384 Gversion b'lemma:nc'
4.467099 Technique b'+1:lemma:chip-exo'
4.434426 Gtype b'lemma:\xce\xb4soxs'
4.430899 O b'-1:lemma:dpd'
4.412502 Strain b'+1:lemma:substr'
4.408579 O b'lemma:or'
4.394512 O b'+1:lemma:o.d.'
4.374031 O b'+1:lemma:sparging'
4.312161 Phase b'+1:lemma:for'
4.261060 O b'lemma:chip-arca'
4.215222 Air b'lemma:anerobically'
4.197581 Technique b'+1:lemma:rna-seq'
4.178924 Supp b'+1:lemma:_'
4.178128 Gversion b'lemma:chip-seq'
4.162694 Technique b'lemma:ChIP-Seq'
4.136869 Supp b'+1:lemma:hour'
4.135692 O b'+1:lemma:od600'
4.120055 O b'postag:IN'
4.088450 Gtype b'lemma:nsrr'
4.087944 Med b'-1:lemma:ml'
4.053773 Gtype b'+1:lemma:with'
4.022074 Gtype b'lemma:\xce\xb4ompr'
4.019457 Gtype b'lemma:WT'
3.994335 Gversion b'-1:lemma:nc'
3.986008 pH b'lemma:ph5'
3.986008 pH b'+1:lemma:.5'
3.963426 O b'lemma:ompr'
3.957893 Med b'+1:lemma:2.0'
3.957602 O b'-1:lemma:lb'
3.947299 Med b'+1:lemma:contain'
3.938942 Gtype b'lemma:pk4854'
3.936645 Supp b'lemma:Leu'
3.914299 Supp b'+1:lemma:2'
3.906482 O b'lemma:Fur'
3.904505 Gversion b'lemma:u00096'
3.904505 Gversion b'+1:lemma:.2'
3.885731 Technique b'-1:lemma:input'
3.877441 O b'-1:lemma:\xc2\xb0c'
3.864432 Gversion b'lemma:.2'
3.864432 Gversion b'-1:lemma:u00096'
3.854046 Substrain b'+1:lemma:phtpg'
3.853262 O b'lemma:s'
3.836382 O b'-1:lemma:type'
3.827763 O b'lemma:soxs'
3.827763 O b'lemma:soxr'
3.789271 Technique b'-1:lemma:chip-exo'
3.787785 Gtype b'lemma:deltaseqa'
3.787785 Gtype b'-1:lemma:old'
3.767682 O b'+1:lemma:43'
3.736088 Air b'lemma:Anaerobic'
3.723469 O b'lemma:argr'
3.707152 Med b'lemma:L'
3.707152 Med b'+1:lemma:broth'
3.693116 O b'lemma:purr'
3.692833 Gtype b'lemma:\xce\xb4oxyr'
3.687510 Gtype b'-1:lemma:ptac'
3.668468 Gtype b'lemma:\xce\xb4soxr'
3.661904 Supp b'lemma:Adenine'
3.646336 O b'+1:lemma:chip-seq'
3.633392 O b'lemma:Lrp'
3.623734 Supp b'lemma:nacl'
3.589169 O b'lemma:chip-fnr'
3.581632 Med b'lemma:glucose'
3.560733 Supp b'lemma:iptg'
3.552695 O b'lemma:delta'
3.509922 O b'postag:VBN'
3.445521 Med b'lemma:m63'
3.434419 Temp b'lemma:37'
3.388210 Temp b'-1:lemma:43'
3.386139 Gtype b'+1:lemma:knock-out'
3.379573 Temp b'-1:lemma:sample'
3.363114 Anti b'+1:lemma:antibody'
3.344587 Gtype b'+1:lemma:flagtag'
3.343995 Gtype b'lemma:dfnr'
3.336085 Med b'+1:lemma:minimal'
3.329171 Gtype b'-1:lemma:rpob'
3.326850 Supp b'+1:lemma:Deficient'
3.310593 Phase b'-1:lemma:until'
3.304072 O b'lemma:supplement'
3.302560 O b'+1:lemma:37'
3.285761 Gtype b'lemma:ptac'
3.282886 OD b'lemma:0.3'
3.280884 Gversion b'lemma:000913'
3.277170 Temp b'lemma:\xc2\xb0c'
3.267178 pH b'lemma:.5'
3.267178 pH b'-1:lemma:ph5'
3.266678 Air b'lemma:anaerobically'
3.258256 Med b'-1:lemma:LB'
3.245906 O b'lemma:at'
3.227428 Temp b'lemma:43'
3.174057 O b'postag:DT'
3.124069 O b'-1:lemma:000913'
3.122174 O b'-1:lemma:min'
3.116791 Air b'postag:RB'
3.107732 O b'lemma:pt7'
3.040595 Gtype b'lemma:\xce\xb4gadw'
3.006824 O b'-1:lemma:stpa'
2.984377 OD b'-1:lemma:~'
2.941221 Supp b'-1:lemma:\xc2\xb5m'
2.928646 Gversion b'lemma:_'
2.927017 Gtype b'lemma:soxs-8myc'
2.927017 Gtype b'lemma:soxr-8myc'
2.916978 O b'+1:lemma:mid-log'
2.899360 OD b'lemma:0.35'
2.886577 Supp b'+1:lemma:iptg'
2.876433 Supp b'lemma:leucine'
2.857437 Gtype b'lemma:purr-8myc'
2.854621 O b'postag:SYM'
Top negative:
-0.011735 OD b'+1:lemma:0.4'
-0.012143 O b'+1:lemma:0.4'
-0.012737 OD b'+1:postag:DT'
-0.013567 Gtype b'+1:postag:IN'
-0.014658 Supp b'-1:postag:NN'
-0.015388 O b'+1:lemma:co2'
-0.019498 O b'+1:lemma:25'
-0.024384 O b'-1:lemma:mm'
-0.026473 OD b'+1:lemma:~'
-0.029376 Supp b'-1:lemma:and'
-0.030278 O b'lemma:\xc2\xb0c'
-0.033870 Gtype b'-1:lemma:mg1655'
-0.035964 O b'+1:lemma:rep1'
-0.036870 Anti b'-1:postag:NN'
-0.037674 Med b'-1:lemma:m63'
-0.041457 O b'-1:lemma:for'
-0.042315 O b'-1:lemma:in'
-0.043253 O b'+1:lemma:5'
-0.054536 O b'+1:lemma:antibody'
-0.056675 OD b'+1:postag:CD'
-0.057484 O b'-1:postag:VBN'
-0.057779 O b'postag:RB'
-0.061765 O b'-1:lemma:at'
-0.063699 O b'-1:lemma:of'
-0.073053 Phase b'+1:postag:NN'
-0.081144 O b'-1:lemma:o2'
-0.083185 O b'+1:lemma:-lcb-'
-0.085303 O b'lemma:o2'
-0.090545 Gversion b'-1:postag:NN'
-0.090810 Gtype b'lemma:fnr'
-0.094072 O b'-1:postag:IN'
-0.095560 O b'+1:lemma:300'
-0.095835 O b'+1:lemma:culture'
-0.099835 Gtype b'-1:postag:NN'
-0.104011 Gtype b'-1:postag:DT'
-0.104295 O b'-1:lemma:-lrb-'
-0.104392 O b'+1:postag:CD'
-0.106605 O b'lemma:e.'
-0.114926 O b'lemma:aerobically'
-0.123054 Supp b'-1:lemma:-'
-0.124613 O b'-1:postag:-LRB-'
-0.130629 O b'lemma:lb'
-0.135809 O b'-1:lemma:delta'
-0.137484 O b'+1:lemma:delta'
-0.145435 O b'-1:lemma:30'
-0.147772 Phase b'lemma:pahse'
-0.151829 O b'-1:lemma:e.'
-0.152470 Phase b'+1:lemma:pahse'
-0.153511 O b'+1:lemma:arginine'
-0.156235 Temp b'-1:postag:IN'
-0.165736 O b'lemma:n2'
-0.167320 O b'lemma:m63'
-0.171593 O b'lemma:mg1655'
-0.171854 O b'lemma:mg/ml'
-0.171854 O b'-1:lemma:150'
-0.172097 O b'+1:lemma:grow'
-0.174184 O b'+1:lemma:-rrb-'
-0.176692 O b'lemma:medium'
-0.190407 O b'-1:lemma:n2'
-0.193709 Technique b'-1:lemma::'
-0.197195 Supp b'+1:lemma:rifampicin'
-0.199110 Supp b'+1:lemma:acetate'
-0.211904 Supp b'lemma:and'
-0.212434 O b'-1:lemma:fresh'
-0.215208 Med b'+1:postag:CC'
-0.223718 Supp b'+1:postag:VBN'
-0.225529 Gtype b'+1:lemma:\xe2\x88\x86'
-0.227506 O b'-1:lemma:iptg'
-0.228252 O b'lemma:grow'
-0.244665 O b'+1:postag:-RRB-'
-0.248414 Phase b'-1:lemma:at'
-0.252297 O b'lemma:co2'
-0.262234 O b'+1:lemma:until'
-0.264339 Med b'+1:postag:NNS'
-0.271018 Supp b'lemma:2'
-0.271420 O b'+1:lemma:mm'
-0.274442 Air b'-1:lemma:or'
-0.275533 Air b'+1:postag:NNP'
-0.282985 O b'lemma:k-12'
-0.283867 pH b'postag:NN'
-0.289565 O b'+1:postag:IN'
-0.292519 Gtype b'postag:CD'
-0.299546 Supp b'-1:postag:VBG'
-0.302599 Supp b'-1:postag:NNP'
-0.304446 OD b'+1:postag:NN'
-0.304579 O b'-1:lemma:0.1'
-0.308132 O b'lemma:30'
-0.315401 O b'+1:postag:VBG'
-0.328419 O b'lemma:\xe2\x88\x86'
-0.334410 O b'+1:lemma:phase'
-0.339050 Med b'postag:CD'
-0.339573 O b'lemma:cell'
-0.349122 Gversion b'+1:postag:NN'
-0.350590 O b'+1:lemma:cell'
-0.353274 O b'-1:lemma:from'
-0.356173 O b'-1:lemma:minimal'
-0.365952 Technique b'-1:postag::'
-0.377481 O b'lemma:m9'
-0.387016 O b'-1:lemma:um'
-0.387016 O b'+1:lemma:paraquat'
-0.396762 O b'lemma:minimal'
-0.399415 Med b'+1:postag:IN'
-0.400174 O b'-1:lemma:.'
-0.400174 O b'-1:postag:.'
-0.405899 Gtype b'+1:lemma:fnr'
-0.420549 Supp b'+1:postag:IN'
-0.426457 Temp b'postag:NN'
-0.443672 Med b'+1:postag:NN'
-0.454874 O b'-1:lemma:with'
-0.455975 O b'-1:lemma:od600'
-0.460725 O b'lemma:od600'
-0.476927 O b'lemma:purify'
-0.513975 O b'lemma:glucose'
-0.514369 O b'+1:lemma:shake'
-0.516980 O b'lemma:anaerobic'
-0.543673 O b'+1:lemma:dissolve'
-0.575481 OD b'+1:postag:CC'
-0.575983 O b'+1:lemma:%'
-0.578458 O b'+1:lemma:\xc2\xb0c'
-0.579234 O b'+1:lemma:.'
-0.579234 O b'+1:postag:.'
-0.590831 O b'-1:lemma:until'
-0.593646 O b'+1:lemma:minimal'
-0.598050 O b'-1:lemma:rifampicin'
-0.599013 Anti b'+1:postag:JJ'
-0.600816 O b'-1:lemma:grow'
-0.609501 O b'+1:postag:NNS'
-0.616694 O b'lemma:mid-log'
-0.622505 OD b'lemma:-lrb-'
-0.681173 Temp b'postag:JJ'
-0.682190 O b'lemma:\xce\xb4fur'
-0.683721 Phase b'-1:postag:NN'
-0.699842 O b'-1:lemma:cra'
-0.699863 O b'+1:lemma:or'
-0.706770 OD b'postag:-LRB-'
-0.726480 Anti b'+1:lemma:anti-fur'
-0.739377 O b'+1:lemma:0.3'
-0.759701 O b'lemma:0.2'
-0.767286 O b'-1:lemma:mid-log'
-0.769781 O b'lemma:dissolve'
-0.773573 O b'+1:lemma:_'
-0.796461 OD b'postag:JJ'
-0.804019 O b'+1:lemma:fecl2'
-0.817322 O b'+1:lemma:c'
-0.823112 O b'-1:lemma:0.2'
-0.856156 O b'lemma:37'
-0.874673 O b'-1:lemma:ml'
-0.897793 O b'lemma:dpd'
-0.928954 O b'lemma:0.3'
-0.935642 O b'lemma:media'
-0.953224 Supp b'postag:CC'
-0.962174 O b'lemma:150'
-0.962174 O b'+1:lemma:mg/ml'
-0.991546 Temp b'+1:lemma:to'
-0.991546 Temp b'+1:postag:TO'
-0.996229 O b'-1:lemma:co2'
-1.000642 O b'lemma:of'
-1.005860 O b'-1:postag::'
-1.099013 O b'lemma:20'
-1.117230 O b'-1:lemma:~'
-1.139604 Air b'postag:NN'
-1.144681 OD b'+1:lemma:and'
-1.201437 O b'-1:lemma:dissolve'
-1.201437 O b'+1:lemma:methanol'
-1.236667 O b'-1:lemma:sample'
-1.244866 O b'lemma:wt'
-1.252945 O b'+1:lemma:g/l'
-1.255778 O b'-1:lemma:37'
-1.264177 Med b'-1:postag:IN'
-1.288204 Supp b'-1:lemma:%'
-1.302552 O b'lemma:0.1'
-1.371688 O b'lemma:anaerobically'
-1.410598 O b'lemma:phase'
-1.489551 O b'+1:lemma:+'
-1.506032 O b'+1:lemma:supplement'
-1.532398 O b'lemma:2h'
-1.532398 O b'-1:lemma:additional'
-1.534894 Air b'+1:postag:JJ'
-1.548281 O b'+1:lemma:in'
-1.565843 Anti b'postag:NNP'
-1.638452 O b'postag:VBP'
-1.657980 O b'lemma:rifampicin'
-1.822232 O b'+1:lemma:at'
-1.836234 O b'-1:postag:VBG'
-1.854980 O b'-1:lemma:IP'
-1.925222 O b'-1:lemma:2'
-1.936575 O b'lemma:fecl2'
-1.945977 Air b'-1:postag:JJ'
-2.044512 O b'-1:lemma:nsrr'
-2.267485 Phase b'-1:postag:JJ'
-2.297463 O b'-1:lemma:rpob'
-2.299343 O b'+1:lemma:hour'
-2.321612 O b'lemma:methanol'
-2.467134 O b'-1:lemma:ompr'
-2.690673 Supp b'postag:JJ'
-3.133082 O b'+1:lemma:2'
-3.547425 Phase b'postag:JJ'
-3.603436 O b'+1:lemma:1'
-4.365017 O b'-1:lemma:_'
-5.016691 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.20645497959678813, 'c2': 0.017572644849766363}
best CV score:0.7977309817293199
model size: 0.06M
Flat F1: 0.784234402165812
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.800 0.842 0.821 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.895 0.944 19
Air 0.807 0.742 0.773 62
Anti 1.000 0.444 0.615 9
Strain 1.000 1.000 1.000 1
Gtype 0.866 0.792 0.828 106
Substrain 0.000 0.000 0.000 1
Supp 0.818 0.662 0.732 136
Gversion 0.000 0.000 0.000 0
avg / total 0.859 0.740 0.784 480
Top likely transitions:
Agit -> Agit 6.425591
OD -> OD 5.826527
Temp -> Temp 5.548786
Med -> Med 5.385181
Anti -> Anti 5.274493
Air -> Air 5.273813
Phase -> Phase 4.728944
Gtype -> Gtype 4.437920
Gversion -> Gversion 4.349816
O -> O 4.268725
Supp -> Supp 3.971396
Technique -> Technique 3.834387
pH -> pH 2.157022
O -> Supp 1.775419
Gtype -> Supp 1.744617
Substrain -> Gtype 1.701513
Air -> O 1.650192
O -> Gtype 1.270693
O -> Technique 1.253973
Technique -> Air 0.977999
O -> Anti 0.884095
O -> Temp 0.762865
Med -> O 0.608226
OD -> Phase 0.415201
Temp -> O 0.355964
Gtype -> Air 0.349440
O -> Gversion 0.343072
Gtype -> pH 0.331974
O -> Med 0.236453
Phase -> O 0.133978
Supp -> O 0.132584
O -> Strain 0.015869
O -> Phase 0.000848
O -> OD -0.004725
Technique -> OD -0.009324
Technique -> O -0.010626
Technique -> pH -0.028457
Supp -> Air -0.068848
OD -> Air -0.084459
Supp -> OD -0.119773
OD -> Med -0.182306
Gtype -> Med -0.204270
Technique -> Gtype -0.212005
Gtype -> O -0.327673
Gtype -> Anti -0.356226
O -> Air -0.606839
OD -> O -0.668006
Agit -> O -0.720674
Substrain -> O -0.778994
Supp -> Med -0.779854
Top unlikely transitions:
Med -> Med 5.385181
Anti -> Anti 5.274493
Air -> Air 5.273813
Phase -> Phase 4.728944
Gtype -> Gtype 4.437920
Gversion -> Gversion 4.349816
O -> O 4.268725
Supp -> Supp 3.971396
Technique -> Technique 3.834387
pH -> pH 2.157022
O -> Supp 1.775419
Gtype -> Supp 1.744617
Substrain -> Gtype 1.701513
Air -> O 1.650192
O -> Gtype 1.270693
O -> Technique 1.253973
Technique -> Air 0.977999
O -> Anti 0.884095
O -> Temp 0.762865
Med -> O 0.608226
OD -> Phase 0.415201
Temp -> O 0.355964
Gtype -> Air 0.349440
O -> Gversion 0.343072
Gtype -> pH 0.331974
O -> Med 0.236453
Phase -> O 0.133978
Supp -> O 0.132584
O -> Strain 0.015869
O -> Phase 0.000848
O -> OD -0.004725
Technique -> OD -0.009324
Technique -> O -0.010626
Technique -> pH -0.028457
Supp -> Air -0.068848
OD -> Air -0.084459
Supp -> OD -0.119773
OD -> Med -0.182306
Gtype -> Med -0.204270
Technique -> Gtype -0.212005
Gtype -> O -0.327673
Gtype -> Anti -0.356226
O -> Air -0.606839
OD -> O -0.668006
Agit -> O -0.720674
Substrain -> O -0.778994
Supp -> Med -0.779854
Gtype -> OD -1.018491
Med -> Supp -1.466475
Phase -> OD -1.648011
Top positive:
7.908476 O b'lemma:_'
7.519798 Supp b'lemma:Iron'
6.893122 Air b'lemma:anaerobic'
6.757207 O b'lemma:1'
6.526738 Air b'lemma:aerobic'
6.246963 Phase b'lemma:stationary'
6.216490 Strain b'lemma:k-12'
6.159501 Technique b'lemma:ChIP-exo'
5.949001 Supp b'lemma:nitrate'
5.557355 O b'lemma:rpob'
5.556016 Phase b'lemma:mid-log'
5.526361 O b'lemma:2'
5.249738 Technique b'lemma:chipseq'
5.235064 Air b'-1:lemma:ChIP-Seq'
5.174037 O b'lemma:3'
5.050373 O b'postag:IN'
4.985897 Supp b'lemma:pq'
4.921604 Substrain b'lemma:mg1655'
4.872348 Gtype b'lemma:\xce\xb4cra'
4.847236 Gversion b'lemma:asm584v2'
4.762705 O b'-1:lemma:tag'
4.726906 OD b'lemma:od600'
4.660975 Gtype b'lemma:type'
4.628321 O b'lemma:Custom'
4.553702 O b'lemma:rep1'
4.515219 Med b'lemma:MOPS'
4.507185 Gversion b'lemma:nc'
4.464398 Air b'lemma:Aerobic'
4.453084 Med b'lemma:LB'
4.447532 Supp b'+1:lemma:\xc2\xb5m'
4.437438 O b'-1:lemma:ChIP-exo'
4.404385 O b'lemma:rep2'
4.373164 Supp b'lemma:nh4cl'
4.365179 Gtype b'lemma:flag-tag'
4.365179 Gtype b'-1:lemma:c-terminal'
4.354053 O b'postag::'
4.297412 O b'lemma:\xcf\x8332'
4.284115 Gtype b'+1:lemma:type'
4.278996 O b'lemma:b'
4.258389 Gtype b'-1:lemma:\xe2\x88\x86'
4.241615 Gtype b'lemma:wt'
4.228158 Med b'lemma:lb'
4.218565 Gtype b'lemma:arca8myc'
4.218302 Technique b'lemma:ChIP-Seq'
4.180599 Supp b'lemma:glucose'
4.160405 O b'lemma:rep3'
4.081991 O b'lemma:Cra'
3.932540 O b'lemma:a'
3.844330 Technique b'lemma:rna-seq'
3.814781 Gtype b'lemma:delta-arca'
3.626994 Gtype b'lemma:fnr8myc'
3.606800 Supp b'lemma:acetate'
3.573033 Supp b'lemma:Fe'
3.567339 Technique b'lemma:chip-seq'
3.563430 Supp b'lemma:rifampicin'
3.511453 OD b'lemma:od450'
3.511078 O b'lemma:.'
3.511078 O b'postag:.'
3.438725 Temp b'-1:lemma:sample'
3.393434 Supp b'lemma:dpd'
3.386786 Supp b'lemma:no3'
3.357689 Anti b'lemma:none'
3.339507 Med b'+1:lemma:0.4'
3.299536 Gtype b'lemma:\xe2\x88\x86'
3.288791 O b'+1:lemma:od600'
3.273758 Technique b'lemma:rnaseq'
3.258770 Gtype b'lemma:delta-fnr'
3.254592 O b'postag:VBN'
3.228840 Supp b'lemma:fructose'
3.219194 Gtype b'lemma:\xce\xb4fur'
3.208266 O b'lemma:CEL'
3.206942 Gtype b'lemma:nsrr'
3.177189 O b'-1:lemma:Aerobic'
3.129133 Technique b'-1:lemma:IP'
3.128048 O b'-1:lemma:0.3-0.35'
3.112441 O b'-1:lemma:type'
3.098709 Gversion b'-1:lemma:nc'
3.051729 Vess b'lemma:flask'
3.051729 Vess b'-1:lemma:warm'
3.008997 Supp b'+1:lemma:1'
2.987213 pH b'lemma:ph5'
2.987213 pH b'+1:lemma:.5'
2.977412 Anti b'lemma:seqa'
2.955347 Gtype b'lemma:wild-type'
2.951444 O b'lemma:-'
2.937212 O b'+1:postag:RB'
2.916028 Supp b'lemma:arginine'
2.840642 O b'-1:lemma:glucose'
2.820501 Gtype b'+1:lemma:ph5'
2.791333 Anti b'+1:lemma:antibody'
2.774879 Gversion b'lemma:chip-seq'
2.771457 Anti b'lemma:anti-myc'
2.765907 O b'lemma:chip'
2.735612 Gtype b'+1:lemma:flagtag'
2.722421 Med b'lemma:m63'
2.721104 O b'lemma:or'
2.699589 Temp b'lemma:\xc2\xb0c'
2.681423 O b'lemma:with'
2.669444 Med b'+1:lemma:minimal'
2.666834 Gversion b'lemma:u00096'
2.666834 Gversion b'+1:lemma:.2'
2.660974 Gtype b'+1:lemma:pq'
2.654887 OD b'+1:lemma:stationary'
2.644825 Gtype b'+1:lemma:with'
2.637258 Supp b'+1:lemma:2'
2.578615 Gtype b'lemma:\xce\xb4ompr'
2.569922 O b'-1:lemma:anaerobic'
2.559477 Temp b'+1:lemma:\xc2\xb0c'
2.558177 Supp b'-1:lemma:Cra'
2.555473 Gversion b'lemma:.2'
2.555473 Gversion b'-1:lemma:u00096'
2.519787 O b'postag:CC'
2.515611 Gtype b'-1:lemma:ptac'
2.474668 Technique b'-1:lemma:chip-exo'
2.465084 Gversion b'lemma:000913'
2.459269 O b'+1:lemma:pq'
2.454369 Strain b'+1:lemma:substr'
2.453079 O b'postag:DT'
2.447446 Temp b'-1:lemma:\xcf\x8332'
2.434523 Technique b'+1:lemma:chip-exo'
2.415981 Supp b'lemma:nacl'
2.400381 O b'+1:postag:NNP'
2.395375 O b'lemma:s'
2.378114 pH b'lemma:.5'
2.378114 pH b'-1:lemma:ph5'
2.360956 Med b'+1:lemma:2.0'
2.360197 Temp b'-1:lemma:43'
2.354343 O b'-1:lemma:lb'
2.333302 Gtype b'-1:lemma:rpob'
2.331044 Supp b'+1:lemma:_'
2.265756 O b'-1:lemma:l1'
2.256561 Supp b'-1:lemma:\xc2\xb5m'
2.253390 Air b'-1:lemma:-'
2.244110 O b'lemma:oxyr'
2.231965 Air b'lemma:anaerobically'
2.211436 Anti b'lemma:anti-rpos'
2.204750 O b'-1:lemma:stpa'
2.194353 OD b'-1:lemma:~'
2.189113 Gtype b'lemma:\xce\xb4soxs'
2.183419 Air b'postag:RB'
2.170778 Supp b'-1:lemma:+'
2.151725 Temp b'lemma:43'
2.127054 Temp b'lemma:37'
2.124907 Supp b'lemma:20'
2.115142 O b'+1:lemma:o.d.'
2.111143 Supp b'lemma:Leu'
2.107663 O b'lemma:Lrp'
2.107472 Temp b'-1:lemma:37'
2.094728 Gtype b'lemma:ptac'
2.073944 O b'+1:lemma:chip-seq'
2.062388 Supp b'lemma:iptg'
2.042062 O b'lemma:affyexp'
2.040790 Med b'lemma:media'
2.031837 O b'lemma:culture'
2.024586 O b'postag:VBG'
2.010374 Vess b'-1:postag:VBN'
2.009201 Gtype b'lemma:pk4854'
2.002974 Med b'lemma:glucose'
1.998019 Gtype b'+1:lemma:_'
1.980119 Supp b'+1:lemma:hour'
1.975173 O b'lemma:chip-arca'
1.959423 Supp b'-1:lemma:with'
1.922869 Technique b'-1:lemma:input'
1.921526 O b'lemma:ompr'
1.910950 Med b'lemma:broth'
1.910950 Med b'-1:lemma:L'
1.910014 Gtype b'lemma:deltaseqa'
1.910014 Gtype b'-1:lemma:old'
1.905010 O b'-1:lemma:0.3'
1.901042 OD b'lemma:0.3'
1.900964 Med b'lemma:L'
1.900964 Med b'+1:lemma:broth'
1.899018 Supp b'+1:lemma:Deficient'
1.893912 Air b'lemma:Anaerobic'
1.893451 Med b'-1:lemma:ml'
1.870173 O b'lemma:Fur'
1.848155 pH b'+1:postag:CD'
1.823256 O b'+1:lemma:coli'
1.818674 Gtype b'-1:lemma:nsrr'
1.808290 Substrain b'+1:lemma:phtpg'
1.805456 Phase b'-1:lemma:mid-log'
1.797810 Med b'-1:lemma:glucose'
1.796653 Supp b'lemma:methanol'
1.792416 O b'-1:lemma:\xc2\xb0c'
1.781537 Supp b'lemma:Adenine'
1.779649 Air b'-1:lemma:grow'
1.777565 Technique b'+1:lemma:rna-seq'
1.776039 OD b'lemma:phase'
1.775438 O b'lemma:condition'
1.763711 Air b'-1:lemma:phase'
1.762988 Gversion b'lemma:_'
1.759448 O b'-1:lemma:dpd'
1.736984 O b'lemma:genotype/variation'
1.716841 O b'lemma:argr'
1.709815 O b'postag:VBD'
1.687639 Air b'-1:lemma:co2'
1.686138 Gversion b'postag:CD'
1.676377 Phase b'-1:lemma:until'
1.672693 Gtype b'lemma:WT'
1.671406 Phase b'+1:lemma:for'
Top negative:
0.015104 Vess b'+1:postag:IN'
0.014949 OD b'-1:lemma:to'
0.014949 OD b'-1:postag:TO'
0.013481 O b'+1:lemma:nitrate'
0.012911 O b'-1:lemma:mm'
0.012853 O b'-1:lemma:m63'
0.012845 Phase b'+1:lemma:aerobically'
0.012531 Phase b'+1:postag:RB'
0.010531 OD b'+1:lemma:coli'
0.010197 Air b'postag:-LRB-'
0.007967 O b'+1:lemma:mm'
0.007668 Air b'-1:postag:-LRB-'
0.007530 O b'+1:lemma:wt'
0.007412 O b'+1:lemma:for'
0.006443 Med b'-1:lemma:g/l'
0.005977 Temp b'-1:postag:NN'
0.005028 OD b'-1:lemma:growth'
0.004138 Technique b'-1:postag:NN'
0.003008 O b'lemma:fresh'
0.002709 O b'+1:postag:SYM'
0.002549 Supp b'+1:lemma:deficient'
0.002275 Technique b'+1:postag:-RRB-'
0.002219 O b'+1:lemma:_'
0.001749 O b'lemma:dpd'
0.001500 Temp b'postag:VB'
0.001375 Gtype b'lemma:cra-8myc-tagged'
0.001375 Gtype b'lemma:fur-8myc'
0.000712 O b'-1:postag:VBZ'
0.000676 O b'+1:lemma:minute'
0.000589 Temp b'+1:postag:NNS'
0.000468 Supp b'lemma:um'
0.000451 Gtype b'lemma:Combined'
0.000451 Gtype b'+1:lemma:input'
0.000346 O b'postag:VBZ'
0.000331 Supp b'-1:lemma:250'
0.000251 O b'lemma:-80'
0.000219 Temp b'-1:postag:NNS'
0.000204 O b'+1:lemma:total'
0.000185 Air b'+1:postag::'
0.000154 O b'-1:lemma:to'
0.000154 O b'-1:postag:TO'
0.000148 Gversion b'-1:postag::'
0.000078 O b'+1:lemma:dpd'
0.000049 Gtype b'lemma:inducible'
0.000049 Gtype b'-1:lemma:carrying'
0.000049 Gtype b'+1:lemma:ptrc'
0.000030 OD b'+1:lemma:0.35'
0.000027 Anti b'+1:lemma:tag'
0.000010 OD b'lemma:mg1655'
0.000009 OD b'-1:lemma:k-12'
-0.000003 Gtype b'+1:postag:NNS'
-0.000145 O b'lemma:m63'
-0.000217 O b'-1:lemma:um'
-0.000217 O b'+1:lemma:paraquat'
-0.000321 Temp b'-1:lemma:\xc2\xb0c'
-0.000324 Agit b'postag:NN'
-0.000658 Gtype b'+1:lemma:\xe2\x88\x86'
-0.000729 Air b'-1:lemma:,'
-0.000729 Air b'-1:postag:,'
-0.001373 Gtype b'postag:CD'
-0.001602 Supp b'+1:lemma:-rrb-'
-0.001692 Supp b'+1:postag:-RRB-'
-0.002077 O b'lemma:lb'
-0.002511 O b'-1:lemma:,'
-0.002511 O b'-1:postag:,'
-0.003565 OD b'+1:postag:-LRB-'
-0.004827 O b'-1:lemma:g/l'
-0.007036 Air b'+1:lemma:25'
-0.007105 O b'+1:lemma:rep1'
-0.007327 Supp b'lemma:mm'
-0.008864 O b'+1:lemma:c'
-0.009788 Air b'-1:lemma:aerobically'
-0.011509 O b'-1:postag:VBN'
-0.011813 O b'+1:lemma:shake'
-0.014323 Air b'lemma:25'
-0.017276 Air b'+1:lemma:-lrb-'
-0.017627 O b'+1:lemma:aerobically'
-0.018161 O b'lemma:e.'
-0.020084 Med b'-1:postag:CD'
-0.021369 O b'lemma:glucose'
-0.023787 Gtype b'-1:postag:NN'
-0.034266 OD b'postag:JJ'
-0.035836 Med b'postag:CD'
-0.043749 O b'-1:lemma:o2'
-0.049597 Gtype b'-1:postag:DT'
-0.050879 O b'+1:lemma:300'
-0.051292 Phase b'-1:postag:JJ'
-0.052811 Phase b'-1:postag:NN'
-0.057812 O b'-1:lemma:rifampicin'
-0.064533 O b'-1:lemma:e.'
-0.067797 O b'+1:lemma:until'
-0.076803 O b'lemma:150'
-0.076803 O b'+1:lemma:mg/ml'
-0.080714 O b'+1:lemma:5'
-0.090840 Gversion b'+1:postag:NN'
-0.099773 OD b'+1:postag:CD'
-0.103778 O b'-1:lemma:-lrb-'
-0.104888 Anti b'+1:postag:JJ'
-0.112142 Technique b'-1:lemma::'
-0.119753 Air b'postag:CC'
-0.127293 O b'-1:lemma:0.1'
-0.127394 O b'+1:lemma:.'
-0.127394 O b'+1:postag:.'
-0.129048 Air b'-1:lemma:or'
-0.129819 O b'-1:lemma:from'
-0.138472 O b'-1:postag:IN'
-0.146711 Air b'-1:postag:CC'
-0.147481 O b'+1:lemma:-rrb-'
-0.155126 O b'+1:lemma:antibody'
-0.155172 O b'lemma:co2'
-0.168963 Gtype b'-1:postag:CD'
-0.169956 O b'+1:postag:NNS'
-0.174119 Supp b'postag:CC'
-0.179294 O b'-1:lemma:1'
-0.191426 O b'+1:lemma:arginine'
-0.196031 O b'-1:lemma:cra'
-0.199976 O b'lemma:anaerobic'
-0.204578 O b'lemma:30'
-0.224096 Med b'-1:postag:NN'
-0.230003 Phase b'+1:postag:NN'
-0.236956 O b'+1:lemma:%'
-0.241481 O b'-1:lemma:30'
-0.264234 Air b'postag:CD'
-0.270046 O b'lemma:\xce\xb4fur'
-0.280726 O b'-1:lemma:ml'
-0.288044 O b'-1:lemma:of'
-0.297076 Med b'+1:postag:NN'
-0.299805 Med b'+1:postag:IN'
-0.302610 Supp b'+1:lemma:rifampicin'
-0.306596 O b'-1:lemma:~'
-0.307778 O b'-1:postag:-LRB-'
-0.310654 O b'lemma:phase'
-0.311969 pH b'postag:NN'
-0.315543 O b'+1:lemma:0.3'
-0.318646 O b'postag:VBP'
-0.335513 OD b'+1:postag:NN'
-0.365484 Temp b'postag:JJ'
-0.402954 O b'lemma:dissolve'
-0.424513 O b'lemma:20'
-0.429534 O b'+1:postag:IN'
-0.437134 O b'-1:lemma:grow'
-0.457631 Phase b'-1:lemma:at'
-0.461721 Temp b'+1:lemma:to'
-0.461721 Temp b'+1:postag:TO'
-0.486817 Supp b'-1:lemma:%'
-0.495015 O b'lemma:0.3'
-0.505567 Temp b'postag:NN'
-0.513730 O b'lemma:od600'
-0.526067 O b'+1:postag:-RRB-'
-0.537886 O b'+1:postag:VBG'
-0.541924 O b'-1:lemma:od600'
-0.549617 O b'-1:lemma:rpob'
-0.561323 O b'-1:lemma:0.2'
-0.568252 OD b'lemma:-lrb-'
-0.568961 Technique b'-1:postag::'
-0.587228 O b'-1:lemma:sample'
-0.604535 O b'+1:lemma:fecl2'
-0.605395 O b'-1:lemma:37'
-0.633419 O b'lemma:anaerobically'
-0.640315 Air b'-1:postag:JJ'
-0.646518 O b'lemma:\xe2\x88\x86'
-0.700705 O b'lemma:fecl2'
-0.713765 O b'-1:postag::'
-0.750660 O b'lemma:media'
-0.798765 O b'+1:lemma:supplement'
-0.840866 O b'-1:postag:VBG'
-0.867716 O b'lemma:0.2'
-0.905677 O b'+1:lemma:cell'
-0.915154 O b'lemma:2h'
-0.915154 O b'-1:lemma:additional'
-0.930586 Supp b'+1:lemma:acetate'
-0.930611 O b'lemma:37'
-0.940854 O b'+1:lemma:g/l'
-0.942447 O b'-1:lemma:co2'
-0.944499 O b'lemma:of'
-0.951108 O b'-1:lemma:dissolve'
-0.951108 O b'+1:lemma:methanol'
-0.962496 O b'+1:lemma:hour'
-0.976720 Phase b'postag:JJ'
-0.983487 O b'lemma:0.1'
-1.035888 O b'-1:lemma:ompr'
-1.064088 O b'+1:lemma:at'
-1.115638 OD b'postag:-LRB-'
-1.121251 O b'lemma:wt'
-1.195033 O b'lemma:mid-log'
-1.218852 O b'+1:lemma:+'
-1.237063 O b'-1:lemma:IP'
-1.311143 O b'-1:lemma:nsrr'
-1.325930 Anti b'postag:NNP'
-1.428005 O b'lemma:rifampicin'
-1.465722 Air b'+1:postag:JJ'
-1.474845 Supp b'postag:JJ'
-1.503128 O b'lemma:methanol'
-1.517666 O b'+1:lemma:in'
-1.685015 Air b'postag:NN'
-2.153686 O b'+1:lemma:2'
-2.261742 O b'-1:lemma:2'
-2.412156 O b'+1:lemma:1'
-4.073767 O b'-1:lemma::'
-4.265465 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.09081091923387723, 'c2': 0.00038627878700387623}
best CV score:0.7938633983009262
model size: 0.07M
Flat F1: 0.786870840829875
precision recall f1-score support
OD 0.789 0.405 0.536 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.776 0.912 0.839 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.754 0.742 0.748 62
Anti 0.571 0.444 0.500 9
Strain 1.000 1.000 1.000 1
Gtype 0.860 0.811 0.835 106
Substrain 0.000 0.000 0.000 1
Supp 0.819 0.699 0.754 136
Gversion 0.000 0.000 0.000 0
avg / total 0.824 0.765 0.787 480
Top likely transitions:
Agit -> Agit 7.136396
OD -> OD 6.491625
Temp -> Temp 6.283086
Anti -> Anti 6.213232
Med -> Med 5.995903
Phase -> Phase 5.618998
Gtype -> Gtype 5.237438
Air -> Air 5.151876
Gversion -> Gversion 4.933604
O -> O 4.847296
Technique -> Technique 4.762002
Supp -> Supp 4.623411
Gtype -> Supp 1.707940
pH -> pH 1.691677
O -> Technique 1.635878
O -> Supp 1.559728
O -> Gtype 1.022656
Substrain -> Gtype 0.995104
O -> Anti 0.583807
O -> Temp 0.467612
O -> Gversion 0.453034
Technique -> Air 0.359052
Air -> O 0.273813
O -> Med 0.235768
Temp -> O 0.115990
Gtype -> Air 0.085028
Med -> O 0.063901
Phase -> Air -0.013623
Strain -> O -0.015319
Air -> Phase -0.018991
O -> OD -0.096097
Supp -> Air -0.152980
Supp -> Gtype -0.172882
O -> Phase -0.230569
Gtype -> Anti -0.419137
Technique -> O -0.465846
Air -> Supp -0.504152
OD -> Air -0.526665
Gtype -> O -0.592689
Supp -> O -0.617787
Technique -> OD -0.675268
Phase -> O -0.805307
OD -> Med -0.848210
O -> Air -0.948079
Supp -> Med -0.972720
Agit -> O -1.032994
OD -> O -1.111875
Gtype -> OD -1.121470
Air -> OD -1.294245
Substrain -> O -1.829982
Top unlikely transitions:
Temp -> Temp 6.283086
Anti -> Anti 6.213232
Med -> Med 5.995903
Phase -> Phase 5.618998
Gtype -> Gtype 5.237438
Air -> Air 5.151876
Gversion -> Gversion 4.933604
O -> O 4.847296
Technique -> Technique 4.762002
Supp -> Supp 4.623411
Gtype -> Supp 1.707940
pH -> pH 1.691677
O -> Technique 1.635878
O -> Supp 1.559728
O -> Gtype 1.022656
Substrain -> Gtype 0.995104
O -> Anti 0.583807
O -> Temp 0.467612
O -> Gversion 0.453034
Technique -> Air 0.359052
Air -> O 0.273813
O -> Med 0.235768
Temp -> O 0.115990
Gtype -> Air 0.085028
Med -> O 0.063901
Phase -> Air -0.013623
Strain -> O -0.015319
Air -> Phase -0.018991
O -> OD -0.096097
Supp -> Air -0.152980
Supp -> Gtype -0.172882
O -> Phase -0.230569
Gtype -> Anti -0.419137
Technique -> O -0.465846
Air -> Supp -0.504152
OD -> Air -0.526665
Gtype -> O -0.592689
Supp -> O -0.617787
Technique -> OD -0.675268
Phase -> O -0.805307
OD -> Med -0.848210
O -> Air -0.948079
Supp -> Med -0.972720
Agit -> O -1.032994
OD -> O -1.111875
Gtype -> OD -1.121470
Air -> OD -1.294245
Substrain -> O -1.829982
Phase -> OD -2.186140
Med -> Supp -2.693786
Top positive:
12.023128 Supp b'lemma:Iron'
11.905802 Phase b'lemma:stationary'
10.567265 O b'lemma:_'
10.151152 Air b'-1:lemma:ChIP-Seq'
10.090491 Air b'lemma:anaerobic'
9.857898 Supp b'lemma:nitrate'
9.629833 Air b'lemma:aerobic'
9.577578 Technique b'lemma:ChIP-exo'
9.036653 Phase b'lemma:mid-log'
8.721963 Technique b'lemma:chipseq'
8.698132 Med b'lemma:MOPS'
8.668936 O b'lemma:1'
8.103505 O b'-1:lemma:ChIP-exo'
7.864122 O b'lemma:rpob'
7.451473 Strain b'lemma:k-12'
7.338543 O b'lemma:\xcf\x8332'
7.198959 Air b'lemma:Aerobic'
7.109572 Gtype b'lemma:flag-tag'
7.109572 Gtype b'-1:lemma:c-terminal'
7.044978 Supp b'lemma:pq'
6.899921 O b'lemma:2'
6.897717 Gtype b'lemma:\xce\xb4cra'
6.862453 O b'lemma:Custom'
6.848216 O b'-1:lemma:tag'
6.753546 Gversion b'lemma:asm584v2'
6.751054 O b'lemma:b'
6.726115 Technique b'lemma:ChIP-Seq'
6.340232 O b'lemma:a'
6.324324 Med b'lemma:LB'
6.242158 O b'lemma:3'
6.225748 O b'lemma:rep1'
6.183760 Gversion b'-1:lemma:nc'
6.139494 O b'lemma:for'
6.136397 O b'lemma:rep2'
6.112373 O b'-1:lemma:Aerobic'
6.052426 Gtype b'lemma:delta-arca'
6.021691 Supp b'+1:lemma:\xc2\xb5m'
6.014827 O b'lemma:rep3'
5.998278 Gtype b'lemma:arca8myc'
5.864622 Gtype b'lemma:fnr8myc'
5.838163 Supp b'lemma:Fe'
5.756687 Substrain b'lemma:mg1655'
5.628610 O b'lemma:Cra'
5.609519 Gtype b'+1:lemma:type'
5.519720 Gtype b'lemma:type'
5.487434 OD b'+1:lemma:stationary'
5.465596 Gversion b'lemma:nc'
5.326426 Gtype b'lemma:delta-fnr'
5.265842 Anti b'lemma:none'
5.236408 Supp b'lemma:nh4cl'
5.195640 O b'postag::'
5.136998 Med b'lemma:lb'
5.129499 OD b'lemma:od450'
5.106518 Technique b'lemma:rna-seq'
5.084990 O b'-1:lemma:anaerobic'
4.986922 Gtype b'lemma:wt'
4.943309 Supp b'lemma:acetate'
4.879656 Gtype b'lemma:\xce\xb4fur'
4.846650 Technique b'lemma:chip-seq'
4.825495 O b'-1:lemma:0.3-0.35'
4.812352 Gtype b'-1:lemma:\xe2\x88\x86'
4.769607 Supp b'lemma:glucose'
4.759235 O b'-1:lemma:glucose'
4.740317 Supp b'lemma:arginine'
4.738816 Supp b'lemma:rifampicin'
4.701038 Gversion b'lemma:chip-seq'
4.693852 Gtype b'lemma:nsrr'
4.666819 O b'lemma:chip'
4.666777 Air b'lemma:anerobically'
4.665470 O b'lemma:or'
4.635435 Temp b'-1:lemma:sample'
4.605663 O b'lemma:.'
4.605663 O b'postag:.'
4.574656 OD b'lemma:od600'
4.562061 O b'postag:IN'
4.550601 Anti b'lemma:anti-myc'
4.537492 Gtype b'+1:lemma:ph5'
4.523129 O b'-1:lemma:type'
4.511181 Strain b'+1:lemma:substr'
4.493105 O b'+1:lemma:od600'
4.453065 Vess b'lemma:flask'
4.453065 Vess b'-1:lemma:warm'
4.444663 Supp b'lemma:no3'
4.439485 Technique b'+1:lemma:chip-exo'
4.416181 Gtype b'+1:lemma:flagtag'
4.415325 Supp b'-1:lemma:Cra'
4.396509 Supp b'-1:lemma:+'
4.383866 Gtype b'-1:lemma:ptac'
4.365928 O b'+1:lemma:sparging'
4.354534 O b'+1:postag:RB'
4.348783 Med b'+1:lemma:0.4'
4.341814 O b'-1:lemma:0.3'
4.336456 O b'+1:lemma:chip-seq'
4.269719 O b'lemma:with'
4.219193 Anti b'lemma:seqa'
4.194527 Supp b'+1:lemma:hour'
4.154345 Gtype b'lemma:\xe2\x88\x86'
4.105460 O b'lemma:-'
4.045385 Temp b'-1:lemma:\xcf\x8332'
4.041775 Supp b'lemma:dpd'
4.018422 O b'lemma:oxyr'
4.006587 Med b'+1:lemma:minimal'
3.994386 Gtype b'-1:lemma:rpob'
3.943397 pH b'lemma:ph5'
3.943397 pH b'+1:lemma:.5'
3.925919 Supp b'lemma:fructose'
3.913579 Supp b'+1:lemma:1'
3.908385 O b'+1:postag:NNP'
3.803339 O b'+1:lemma:o.d.'
3.758844 Technique b'lemma:rnaseq'
3.730482 Technique b'+1:lemma:rna-seq'
3.692271 Technique b'-1:lemma:IP'
3.684645 O b'lemma:ompr'
3.679500 O b'-1:lemma:lb'
3.642465 Anti b'+1:lemma:antibody'
3.624491 O b'lemma:s'
3.612874 O b'lemma:2-3'
3.606456 Gtype b'+1:lemma:pq'
3.584554 O b'lemma:soxs'
3.584554 O b'lemma:soxr'
3.545385 O b'lemma:CEL'
3.527179 Gversion b'lemma:u00096'
3.527179 Gversion b'+1:lemma:.2'
3.525538 Technique b'-1:lemma:chip-exo'
3.525111 Anti b'lemma:anti-rpos'
3.517186 O b'postag:VBN'
3.489661 Supp b'lemma:Leu'
3.480284 Supp b'lemma:iptg'
3.469423 Temp b'-1:lemma:43'
3.467975 Gtype b'lemma:pk4854'
3.443002 OD b'-1:lemma:about'
3.435842 Gtype b'lemma:\xce\xb4ompr'
3.435548 Med b'+1:lemma:contain'
3.393209 Med b'lemma:m63'
3.390757 Gversion b'lemma:.2'
3.390757 Gversion b'-1:lemma:u00096'
3.365623 Gtype b'lemma:wild-type'
3.365137 Supp b'+1:lemma:_'
3.285213 O b'-1:lemma:\xc2\xb0c'
3.270674 Air b'lemma:anaerobically'
3.263042 Gversion b'lemma:000913'
3.243898 Gtype b'lemma:\xce\xb4soxs'
3.241988 O b'lemma:at'
3.241694 Supp b'lemma:Adenine'
3.235960 Supp b'+1:lemma:2'
3.220154 Substrain b'+1:lemma:phtpg'
3.168232 O b'lemma:chip-arca'
3.124706 Med b'-1:lemma:ml'
3.119275 Gtype b'lemma:deltaseqa'
3.119275 Gtype b'-1:lemma:old'
3.105855 Phase b'-1:lemma:until'
3.104880 Supp b'lemma:nacl'
3.063586 Phase b'+1:lemma:for'
3.062511 O b'lemma:argr'
3.057692 Temp b'-1:lemma:37'
3.030255 O b'lemma:purr'
3.006731 O b'-1:lemma:media'
2.937639 O b'+1:lemma:pq'
2.930516 Supp b'lemma:methanol'
2.918452 Temp b'+1:lemma:\xc2\xb0c'
2.915173 Gtype b'lemma:WT'
2.909714 OD b'-1:lemma:~'
2.900692 Med b'lemma:L'
2.900692 Med b'+1:lemma:broth'
2.899989 Gtype b'lemma:ptac'
2.898928 O b'+1:lemma:rifampicin'
2.878667 pH b'lemma:.5'
2.878667 pH b'-1:lemma:ph5'
2.877293 O b'postag:DT'
2.875342 O b'+1:lemma:mid-log'
2.845729 Gtype b'+1:lemma:with'
2.833330 Temp b'lemma:\xc2\xb0c'
2.821959 Supp b'-1:lemma:\xc2\xb5m'
2.778935 O b'-1:lemma:dpd'
2.716093 Supp b'+1:lemma:iptg'
2.714614 Supp b'+1:lemma:Deficient'
2.696864 Gtype b'-1:lemma:phtpg'
2.687105 Air b'+1:lemma:at'
2.676418 O b'lemma:affyexp'
2.676025 Gtype b'lemma:\xce\xb4soxr'
2.663692 Gtype b'lemma:\xce\xb4oxyr'
2.658854 Med b'+1:lemma:2.0'
2.651224 Gtype b'lemma:dfnr'
2.649576 O b'-1:lemma:l1'
2.620669 Air b'postag:RB'
2.577561 O b'lemma:Lrp'
2.570022 Med b'+1:lemma:g/l'
2.524794 Technique b'-1:lemma:input'
2.523784 O b'-1:lemma:min'
2.517414 O b'+1:postag:VBP'
2.471571 O b'+1:lemma:acetate'
2.464652 O b'postag:VBG'
2.464091 Air b'-1:lemma:phase'
2.457715 OD b'lemma:0.3'
2.425241 Gtype b'lemma:soxs-8myc'
2.425241 Gtype b'lemma:soxr-8myc'
2.406446 O b'+1:lemma:0.2'
2.380330 Temp b'lemma:37'
2.356535 Temp b'lemma:43'
2.340361 Supp b'lemma:leucine'
Top negative:
0.000037 OD b'+1:lemma:\xce\xb4soxr'
0.000031 Technique b'+1:lemma:Anaerobic'
0.000028 OD b'-1:lemma:phase'
0.000026 Agit b'-1:postag:VBG'
0.000015 Agit b'+1:postag:IN'
0.000015 O b'lemma:input'
0.000014 OD b'+1:postag:RB'
0.000012 O b'-1:lemma:Fur'
0.000010 Supp b'+1:lemma:feso4'
0.000010 O b'+1:lemma:ChIP-Seq'
0.000005 O b'-1:lemma:250'
0.000004 OD b'lemma::'
0.000004 Agit b'postag:VBG'
0.000003 OD b'postag::'
0.000001 Temp b'-1:postag:NNS'
-0.000008 O b'lemma:mg1655'
-0.000009 O b'-1:lemma:sample'
-0.000026 O b'-1:lemma:e.'
-0.000069 O b'-1:lemma:1'
-0.000210 Gtype b'-1:postag:NNP'
-0.000267 O b'+1:lemma:co2'
-0.000316 Gtype b'+1:lemma:2'
-0.000469 O b'+1:lemma:rep1'
-0.001404 O b'-1:lemma:at'
-0.002564 Med b'+1:postag:CC'
-0.002647 OD b'lemma:~'
-0.002825 O b'lemma:grow'
-0.003590 O b'lemma:anaerobic'
-0.004607 O b'+1:lemma:dissolve'
-0.005282 Supp b'+1:lemma:of'
-0.008060 OD b'+1:postag:-LRB-'
-0.008731 OD b'+1:postag:-RRB-'
-0.009571 O b'-1:lemma:30'
-0.010095 O b'lemma:m63'
-0.010149 Phase b'lemma:pahse'
-0.010429 Gtype b'+1:lemma:-'
-0.012754 Supp b'lemma:mm'
-0.013827 OD b'+1:lemma:~'
-0.014292 O b'+1:lemma:grow'
-0.015852 OD b'+1:postag:CD'
-0.016820 O b'lemma:25'
-0.017788 OD b'+1:lemma:-rrb-'
-0.019574 O b'+1:lemma:~'
-0.025256 O b'+1:lemma:300'
-0.026121 O b'+1:lemma:0.4'
-0.028318 O b'lemma:o2'
-0.029040 Anti b'+1:lemma:anti-fur'
-0.030350 OD b'+1:lemma:0.4'
-0.030931 Med b'+1:postag:NNS'
-0.033897 O b'-1:lemma:mm'
-0.035604 O b'+1:lemma:o2'
-0.035916 O b'-1:lemma:the'
-0.035990 O b'+1:lemma:arginine'
-0.036415 Air b'-1:lemma:anaerobically'
-0.040153 Air b'-1:postag:CC'
-0.041154 O b'+1:lemma:antibody'
-0.041832 Anti b'-1:postag:NN'
-0.045053 OD b'postag:IN'
-0.047572 Gtype b'+1:postag:IN'
-0.053112 Supp b'+1:lemma:dpd'
-0.053329 O b'-1:lemma:minimal'
-0.060576 O b'-1:postag:IN'
-0.065925 O b'lemma:medium'
-0.066353 O b'+1:lemma:25'
-0.076464 Med b'-1:postag:NN'
-0.078443 Supp b'-1:postag:NN'
-0.078865 Strain b'+1:postag:NN'
-0.091232 Air b'postag:CD'
-0.091763 O b'lemma:30'
-0.095656 O b'lemma:lb'
-0.098575 O b'+1:lemma:shake'
-0.100589 Anti b'+1:postag:JJ'
-0.109250 Supp b'+1:postag:IN'
-0.113108 Agit b'postag:NN'
-0.113815 O b'-1:lemma:o2'
-0.117553 O b'+1:lemma:5'
-0.121789 Supp b'lemma:and'
-0.122865 Gtype b'-1:postag:NN'
-0.129126 O b'lemma:glucose'
-0.130690 O b'-1:lemma:of'
-0.132633 O b'+1:lemma:phase'
-0.134059 OD b'+1:postag:CC'
-0.141756 Air b'-1:lemma:95'
-0.143614 O b'-1:lemma:-lrb-'
-0.149012 O b'+1:lemma:\xc2\xb0c'
-0.155150 O b'+1:postag:NNS'
-0.160903 O b'+1:lemma:-rrb-'
-0.161363 O b'lemma:n2'
-0.162536 O b'postag:VBP'
-0.168298 O b'-1:lemma:n2'
-0.171249 O b'-1:postag:VBN'
-0.176512 O b'lemma:aerobically'
-0.186672 O b'lemma:co2'
-0.186766 O b'+1:lemma:_'
-0.196943 Strain b'postag:NN'
-0.201827 O b'+1:lemma:mm'
-0.206537 O b'-1:postag:-LRB-'
-0.214121 Supp b'postag:CC'
-0.215072 O b'lemma:e.'
-0.235225 O b'-1:lemma:0.1'
-0.261548 O b'lemma:\xce\xb4fur'
-0.264532 O b'-1:lemma:rifampicin'
-0.272491 O b'-1:lemma:0.2'
-0.284244 O b'-1:lemma:from'
-0.285360 O b'-1:lemma:until'
-0.286318 Med b'postag:CD'
-0.287177 Air b'-1:lemma:or'
-0.304066 O b'lemma:150'
-0.304066 O b'+1:lemma:mg/ml'
-0.304896 O b'+1:lemma:or'
-0.309102 O b'+1:postag:-RRB-'
-0.331291 Supp b'-1:postag:NNP'
-0.339237 Med b'+1:postag:IN'
-0.352184 O b'postag:RB'
-0.361577 Gtype b'postag:CD'
-0.367320 O b'-1:lemma:ml'
-0.377900 O b'lemma:dissolve'
-0.378236 O b'+1:lemma:.'
-0.378236 O b'+1:postag:.'
-0.378886 O b'+1:lemma:until'
-0.380197 O b'lemma:od600'
-0.380556 O b'-1:lemma:od600'
-0.388687 O b'lemma:phase'
-0.393310 O b'+1:lemma:0.3'
-0.399102 O b'+1:postag:IN'
-0.399168 Temp b'postag:JJ'
-0.401235 O b'-1:lemma:~'
-0.410916 O b'-1:lemma:rpob'
-0.431448 OD b'lemma:-lrb-'
-0.434432 Technique b'-1:lemma::'
-0.444259 Supp b'+1:postag:VBN'
-0.459196 Technique b'-1:postag::'
-0.459291 OD b'+1:lemma:and'
-0.470209 O b'lemma:\xe2\x88\x86'
-0.486291 O b'+1:lemma:%'
-0.518507 O b'-1:lemma:IP'
-0.519707 Phase b'-1:lemma:at'
-0.526678 O b'lemma:dpd'
-0.534234 Air b'-1:postag:JJ'
-0.540044 pH b'postag:NN'
-0.542011 O b'+1:lemma:c'
-0.553114 O b'-1:lemma:grow'
-0.571198 O b'lemma:0.3'
-0.573213 Med b'+1:postag:NN'
-0.592187 Temp b'postag:NN'
-0.596509 O b'-1:lemma:mid-log'
-0.616998 O b'lemma:media'
-0.625245 O b'+1:lemma:cell'
-0.625847 O b'-1:lemma:cra'
-0.668364 Gversion b'+1:postag:NN'
-0.670411 O b'+1:lemma:+'
-0.670829 O b'lemma:anaerobically'
-0.693082 Supp b'+1:lemma:rifampicin'
-0.727079 O b'-1:lemma:dissolve'
-0.727079 O b'+1:lemma:methanol'
-0.738408 O b'+1:lemma:fecl2'
-0.792874 O b'lemma:20'
-0.848889 OD b'+1:postag:NN'
-0.848997 Supp b'+1:lemma:acetate'
-0.861900 Med b'-1:postag:IN'
-0.865385 O b'lemma:0.2'
-0.892622 Temp b'+1:lemma:to'
-0.892622 Temp b'+1:postag:TO'
-0.937071 O b'+1:lemma:supplement'
-0.938360 O b'-1:lemma:co2'
-0.960239 O b'lemma:2h'
-0.960239 O b'-1:lemma:additional'
-0.973815 O b'lemma:of'
-1.034828 O b'+1:postag:VBG'
-1.035428 O b'+1:lemma:g/l'
-1.050289 O b'lemma:mid-log'
-1.059894 O b'-1:postag:VBG'
-1.066538 O b'lemma:0.1'
-1.076597 O b'-1:postag::'
-1.106329 Phase b'+1:postag:NN'
-1.148398 Phase b'-1:postag:NN'
-1.172180 Air b'+1:postag:JJ'
-1.201626 O b'lemma:37'
-1.206293 OD b'postag:-LRB-'
-1.252513 O b'-1:lemma:37'
-1.399946 O b'+1:lemma:at'
-1.439837 O b'lemma:wt'
-1.474885 O b'-1:lemma:nsrr'
-1.521797 O b'lemma:fecl2'
-1.544569 Anti b'postag:NNP'
-1.548585 O b'+1:lemma:hour'
-1.566877 Supp b'-1:lemma:%'
-1.597712 O b'+1:lemma:in'
-1.629113 O b'-1:lemma:ompr'
-1.773656 Air b'postag:NN'
-1.898147 Phase b'-1:postag:JJ'
-2.222432 O b'lemma:rifampicin'
-2.229376 O b'-1:lemma:2'
-2.493849 O b'+1:lemma:1'
-2.529260 O b'lemma:methanol'
-2.707675 O b'+1:lemma:2'
-2.830767 Supp b'postag:JJ'
-2.988306 Phase b'postag:JJ'
-4.548731 O b'-1:lemma::'
-4.577007 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.24365115413317517, 'c2': 0.017603116029939036}
best CV score:0.797058241278945
model size: 0.06M
Flat F1: 0.7778441635784862
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.800 0.842 0.821 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.895 0.944 19
Air 0.780 0.742 0.760 62
Anti 0.571 0.444 0.500 9
Strain 1.000 1.000 1.000 1
Gtype 0.863 0.774 0.816 106
Substrain 0.000 0.000 0.000 1
Supp 0.818 0.662 0.732 136
Gversion 0.000 0.000 0.000 0
avg / total 0.847 0.735 0.778 480
Top likely transitions:
OD -> OD 6.460512
Agit -> Agit 6.341871
Temp -> Temp 5.624129
Med -> Med 5.402282
Anti -> Anti 5.214940
Air -> Air 4.858980
Phase -> Phase 4.544403
Gtype -> Gtype 4.483666
Supp -> Supp 4.372876
Gversion -> Gversion 4.358204
O -> O 4.356881
Technique -> Technique 3.828561
O -> Supp 2.070080
Gtype -> Supp 1.938994
pH -> pH 1.828748
Air -> O 1.699192
O -> Technique 1.307357
Substrain -> Gtype 1.254891
O -> Gtype 1.237193
O -> Temp 0.952632
Med -> O 0.819857
OD -> Phase 0.738941
Technique -> Air 0.735321
O -> Anti 0.520896
Supp -> O 0.493996
Temp -> O 0.435644
O -> Gversion 0.368943
Phase -> O 0.362864
Gtype -> pH 0.340162
O -> Med 0.294591
Gtype -> Air 0.108303
O -> Strain 0.008465
Gtype -> O -0.094763
Gtype -> Anti -0.194733
OD -> O -0.218649
Technique -> pH -0.317427
Agit -> O -0.543505
Supp -> Med -0.600692
Gtype -> OD -0.628749
O -> Air -0.839621
Substrain -> O -0.911959
Med -> Supp -1.122521
Phase -> OD -1.131519
Top unlikely transitions:
OD -> OD 6.460512
Agit -> Agit 6.341871
Temp -> Temp 5.624129
Med -> Med 5.402282
Anti -> Anti 5.214940
Air -> Air 4.858980
Phase -> Phase 4.544403
Gtype -> Gtype 4.483666
Supp -> Supp 4.372876
Gversion -> Gversion 4.358204
O -> O 4.356881
Technique -> Technique 3.828561
O -> Supp 2.070080
Gtype -> Supp 1.938994
pH -> pH 1.828748
Air -> O 1.699192
O -> Technique 1.307357
Substrain -> Gtype 1.254891
O -> Gtype 1.237193
O -> Temp 0.952632
Med -> O 0.819857
OD -> Phase 0.738941
Technique -> Air 0.735321
O -> Anti 0.520896
Supp -> O 0.493996
Temp -> O 0.435644
O -> Gversion 0.368943
Phase -> O 0.362864
Gtype -> pH 0.340162
O -> Med 0.294591
Gtype -> Air 0.108303
O -> Strain 0.008465
Gtype -> O -0.094763
Gtype -> Anti -0.194733
OD -> O -0.218649
Technique -> pH -0.317427
Agit -> O -0.543505
Supp -> Med -0.600692
Gtype -> OD -0.628749
O -> Air -0.839621
Substrain -> O -0.911959
Med -> Supp -1.122521
Phase -> OD -1.131519
Top positive:
7.945473 O b'lemma:_'
7.412165 Supp b'lemma:Iron'
6.926609 Air b'lemma:anaerobic'
6.922674 O b'lemma:1'
6.590205 Air b'lemma:aerobic'
6.223986 Strain b'lemma:k-12'
6.187011 Technique b'lemma:ChIP-exo'
6.130654 Phase b'lemma:stationary'
5.864907 Supp b'lemma:nitrate'
5.653173 O b'lemma:2'
5.610923 O b'lemma:rpob'
5.610415 Air b'-1:lemma:ChIP-Seq'
5.397732 Phase b'lemma:mid-log'
5.255269 O b'lemma:3'
5.206599 Technique b'lemma:chipseq'
4.960561 Substrain b'lemma:mg1655'
4.914901 O b'postag:IN'
4.746312 Supp b'lemma:pq'
4.579937 Gtype b'lemma:\xce\xb4cra'
4.520187 Gtype b'lemma:type'
4.511014 OD b'lemma:od600'
4.469574 Gtype b'lemma:flag-tag'
4.469574 Gtype b'-1:lemma:c-terminal'
4.462810 O b'lemma:rep1'
4.392739 O b'postag::'
4.375670 Gversion b'lemma:asm584v2'
4.369737 O b'lemma:Custom'
4.356443 O b'lemma:b'
4.303923 Technique b'lemma:ChIP-Seq'
4.302917 Med b'lemma:MOPS'
4.296205 Supp b'lemma:nh4cl'
4.281883 Gtype b'-1:lemma:\xe2\x88\x86'
4.279879 Gtype b'+1:lemma:type'
4.270631 O b'lemma:rep2'
4.239368 Air b'lemma:Aerobic'
4.132080 Gversion b'lemma:nc'
4.117043 Med b'lemma:lb'
4.108014 Supp b'+1:lemma:\xc2\xb5m'
4.076646 O b'lemma:rep3'
4.043095 Gtype b'lemma:arca8myc'
4.041495 Supp b'lemma:glucose'
4.033775 O b'lemma:a'
4.027421 O b'-1:lemma:tag'
4.027011 Med b'lemma:LB'
4.007935 O b'lemma:Cra'
3.910498 Gtype b'lemma:wt'
3.905564 O b'-1:lemma:ChIP-exo'
3.870678 O b'lemma:\xcf\x8332'
3.727782 Supp b'lemma:acetate'
3.697104 Gtype b'lemma:delta-arca'
3.647724 Technique b'lemma:rna-seq'
3.633897 Supp b'lemma:Fe'
3.575150 Gtype b'lemma:fnr8myc'
3.536159 OD b'lemma:od450'
3.488627 Supp b'lemma:rifampicin'
3.426221 O b'lemma:.'
3.426221 O b'postag:.'
3.421248 Technique b'lemma:chip-seq'
3.389912 Air b'postag:RB'
3.367471 Med b'+1:lemma:0.4'
3.358386 Gtype b'lemma:\xe2\x88\x86'
3.341657 Supp b'lemma:dpd'
3.311375 Supp b'lemma:no3'
3.301666 Gtype b'lemma:nsrr'
3.263607 OD b'+1:lemma:stationary'
3.256783 O b'-1:lemma:Aerobic'
3.217842 Anti b'lemma:none'
3.181572 Gversion b'-1:lemma:nc'
3.100116 Technique b'lemma:rnaseq'
3.072172 Gtype b'lemma:\xce\xb4fur'
3.058843 O b'postag:VBN'
3.028102 Anti b'+1:lemma:antibody'
3.018055 Supp b'lemma:arginine'
3.012119 Supp b'+1:lemma:1'
3.009503 Gtype b'lemma:delta-fnr'
2.984566 O b'lemma:CEL'
2.976222 O b'+1:lemma:od600'
2.972362 Supp b'lemma:fructose'
2.971825 Vess b'lemma:flask'
2.971825 Vess b'-1:lemma:warm'
2.946262 Technique b'-1:lemma:IP'
2.917769 Temp b'-1:lemma:sample'
2.914128 Med b'+1:lemma:minimal'
2.847635 Temp b'lemma:\xc2\xb0c'
2.845956 O b'-1:lemma:type'
2.837114 pH b'lemma:ph5'
2.837114 pH b'+1:lemma:.5'
2.822859 Supp b'+1:lemma:2'
2.819530 O b'postag:CC'
2.802486 Gtype b'+1:lemma:with'
2.796286 Anti b'lemma:seqa'
2.757551 Gtype b'+1:lemma:pq'
2.752041 O b'lemma:chip'
2.720379 O b'lemma:-'
2.697742 Gversion b'lemma:u00096'
2.697742 Gversion b'+1:lemma:.2'
2.686468 Technique b'+1:lemma:chip-exo'
2.652892 O b'-1:lemma:0.3-0.35'
2.634884 O b'-1:lemma:glucose'
2.605565 Gversion b'lemma:chip-seq'
2.589331 Strain b'+1:lemma:substr'
2.586612 O b'+1:postag:RB'
2.568676 O b'postag:DT'
2.560900 Temp b'-1:lemma:\xcf\x8332'
2.543878 O b'lemma:with'
2.514778 Med b'lemma:m63'
2.497949 Anti b'lemma:anti-myc'
2.469343 pH b'lemma:.5'
2.469343 pH b'-1:lemma:ph5'
2.464961 Gversion b'lemma:000913'
2.461605 Gtype b'-1:lemma:rpob'
2.454623 O b'lemma:s'
2.438821 O b'+1:postag:NNP'
2.438655 Gversion b'lemma:.2'
2.438655 Gversion b'-1:lemma:u00096'
2.427395 Technique b'-1:lemma:chip-exo'
2.415443 Anti b'lemma:anti-rpos'
2.411875 Temp b'-1:lemma:43'
2.410922 Gtype b'-1:lemma:ptac'
2.405223 Gtype b'lemma:\xce\xb4ompr'
2.401826 O b'-1:lemma:anaerobic'
2.390761 O b'lemma:or'
2.388354 O b'-1:lemma:lb'
2.381109 Gtype b'+1:lemma:flagtag'
2.347230 Med b'+1:lemma:2.0'
2.324166 Med b'lemma:media'
2.310748 O b'-1:lemma:0.3'
2.278494 Supp b'-1:lemma:Cra'
2.263509 O b'+1:lemma:pq'
2.238545 Supp b'+1:lemma:_'
2.229756 O b'-1:lemma:l1'
2.226389 Air b'-1:lemma:co2'
2.200525 Gtype b'+1:lemma:aerobic'
2.173774 Supp b'lemma:nacl'
2.171575 Substrain b'+1:lemma:phtpg'
2.149631 Supp b'lemma:20'
2.120754 Gtype b'lemma:wild-type'
2.119820 Supp b'-1:lemma:\xc2\xb5m'
2.113883 O b'-1:lemma:\xc2\xb0c'
2.105763 Supp b'-1:lemma:+'
2.101125 O b'lemma:oxyr'
2.092553 Gtype b'lemma:\xce\xb4soxs'
2.090291 Air b'-1:lemma:-'
2.081430 Supp b'lemma:Leu'
2.062775 O b'postag:VBG'
2.055064 O b'-1:lemma:stpa'
2.046935 O b'lemma:condition'
2.038964 Temp b'lemma:43'
2.033948 Supp b'lemma:iptg'
2.023886 pH b'+1:postag:CD'
2.022782 Gtype b'+1:lemma:ph5'
2.013337 Temp b'-1:lemma:37'
1.955826 Med b'lemma:L'
1.955826 Med b'+1:lemma:broth'
1.954859 Temp b'lemma:37'
1.927649 O b'+1:lemma:chip-seq'
1.919892 Supp b'-1:lemma:with'
1.915338 O b'lemma:affyexp'
1.911245 Gtype b'lemma:pk4854'
1.909261 Phase b'-1:lemma:mid-log'
1.895692 Temp b'+1:lemma:\xc2\xb0c'
1.886863 O b'lemma:genotype/variation'
1.872821 OD b'-1:lemma:~'
1.872507 OD b'lemma:0.3'
1.858566 Med b'+1:lemma:supplement'
1.848644 Gtype b'lemma:ptac'
1.844359 O b'lemma:Lrp'
1.841048 Med b'+1:lemma:+'
1.831366 O b'lemma:culture'
1.830571 Gtype b'-1:postag:VBG'
1.825208 O b'lemma:-rrb-'
1.822383 Gtype b'lemma:deltaseqa'
1.822383 Gtype b'-1:lemma:old'
1.820507 Gtype b'-1:lemma:nsrr'
1.819979 Gtype b'+1:lemma:_'
1.784801 Med b'+1:lemma:g/l'
1.781733 Technique b'+1:lemma:rna-seq'
1.778548 Supp b'+1:lemma:Deficient'
1.771039 Gversion b'postag:CD'
1.755561 O b'lemma:chip-arca'
1.740739 Supp b'lemma:methanol'
1.728483 O b'lemma:ompr'
1.717002 Med b'-1:lemma:ml'
1.696842 OD b'lemma:phase'
1.693821 Supp b'lemma:Adenine'
1.677174 Air b'-1:postag::'
1.675361 Phase b'-1:lemma:until'
1.672716 Air b'-1:postag:CD'
1.672711 Med b'lemma:broth'
1.672711 Med b'-1:lemma:L'
1.672565 Technique b'-1:lemma:input'
1.666211 Technique b'-1:lemma:_'
1.663572 Air b'lemma:Anaerobic'
1.657183 Gtype b'-1:lemma:_'
1.650921 O b'lemma:Fur'
1.650554 Gtype b'-1:postag:VBP'
1.647569 Med b'-1:lemma:glucose'
1.646454 Gtype b'lemma:WT'
1.642228 Air b'lemma:anaerobically'
1.638505 Supp b'+1:lemma:hour'
Top negative:
0.003606 Phase b'-1:postag:TO'
0.003173 O b'+1:lemma:minute'
0.002853 OD b'postag:CC'
0.002630 Technique b'+1:lemma:-rrb-'
0.002520 Technique b'+1:postag:-RRB-'
0.002467 Med b'lemma:m9'
0.002337 Air b'+1:postag:-LRB-'
0.002264 Technique b'+1:lemma:Anaerobic'
0.002098 Phase b'postag:RB'
0.002086 Gtype b'+1:lemma:rna'
0.002053 O b'lemma:~'
0.001666 OD b'-1:postag:JJ'
0.001084 Gtype b'+1:lemma:chip'
0.001034 Air b'+1:lemma:-lrb-'
0.000728 OD b'-1:lemma:to'
0.000728 OD b'-1:postag:TO'
0.000711 O b'-1:lemma:at'
0.000709 O b'+1:lemma:_'
0.000560 Temp b'+1:postag:VB'
0.000555 Gtype b'lemma:lack'
0.000388 OD b'postag:VBD'
0.000335 Air b'-1:postag:RB'
0.000334 OD b'-1:postag:VBN'
0.000307 Air b'-1:lemma:95'
0.000306 Anti b'-1:lemma:Custom'
0.000275 O b'-1:postag:VBZ'
0.000204 Air b'-1:lemma:-lrb-'
0.000203 Air b'-1:postag:-LRB-'
0.000156 Temp b'postag:VB'
0.000139 Supp b'+1:lemma:dissolve'
0.000138 Gtype b'lemma:ptrc'
0.000138 Gtype b'-1:lemma:inducible'
0.000085 Med b'-1:lemma:complete'
0.000051 OD b'+1:lemma:0.35'
0.000048 Temp b'+1:lemma:-rrb-'
0.000043 Air b'-1:lemma:anaerobically'
0.000039 OD b'+1:postag:VBN'
0.000034 Temp b'+1:postag:-RRB-'
0.000030 Supp b'-1:postag:VBN'
0.000028 OD b'lemma:mg1655'
0.000023 OD b'+1:lemma:mg1655'
0.000022 OD b'lemma:k-12'
0.000019 Temp b'lemma:-lrb-'
0.000017 Temp b'postag:-LRB-'
0.000016 Air b'lemma:-rrb-'
0.000015 Air b'postag:-RRB-'
0.000007 Gtype b'lemma:inducible'
0.000007 Gtype b'-1:lemma:carrying'
0.000007 Gtype b'+1:lemma:ptrc'
0.000003 O b'lemma:4'
0.000001 O b'postag:VBZ'
0.000000 Med b'+1:lemma:,'
0.000000 Med b'+1:postag:,'
-0.000019 O b'-1:lemma:g/l'
-0.000078 O b'-1:lemma:iptg'
-0.000082 O b'+1:lemma:until'
-0.000090 Supp b'+1:postag:CD'
-0.000137 O b'+1:lemma:phase'
-0.000630 O b'+1:lemma:grow'
-0.000719 Air b'lemma:,'
-0.000719 Air b'postag:,'
-0.000881 Supp b'-1:postag:NNP'
-0.001151 O b'+1:lemma:95'
-0.001284 O b'lemma:glucose'
-0.001441 O b'lemma:anaerobic'
-0.002285 Supp b'-1:postag:NN'
-0.003211 Air b'-1:lemma:,'
-0.003211 Air b'-1:postag:,'
-0.003285 O b'+1:lemma:rep1'
-0.003344 O b'-1:lemma:n2'
-0.004884 Supp b'+1:postag:IN'
-0.005691 O b'lemma:grow'
-0.008542 O b'lemma:\xce\xb4fur'
-0.008810 O b'lemma:30'
-0.008856 O b'lemma:\xe2\x88\x86'
-0.016851 Gversion b'+1:postag:NN'
-0.017577 Agit b'postag:NN'
-0.019899 OD b'postag:JJ'
-0.021053 Supp b'lemma:and'
-0.035082 Med b'-1:postag:IN'
-0.037650 O b'+1:lemma:shake'
-0.040816 Technique b'-1:lemma::'
-0.042661 Air b'-1:postag:VBN'
-0.047406 O b'lemma:e.'
-0.050232 O b'+1:lemma:300'
-0.057684 Supp b'postag:CD'
-0.060532 Med b'+1:postag:IN'
-0.071550 O b'+1:lemma:5'
-0.074571 Supp b'+1:lemma:-rrb-'
-0.077631 O b'-1:postag:VBN'
-0.083084 OD b'+1:postag:CD'
-0.083227 Air b'postag:CD'
-0.086748 Supp b'+1:postag:-RRB-'
-0.087729 O b'-1:lemma:e.'
-0.089412 O b'-1:lemma:um'
-0.089412 O b'+1:lemma:paraquat'
-0.095031 O b'-1:lemma:,'
-0.095031 O b'-1:postag:,'
-0.097514 Anti b'+1:postag:JJ'
-0.097686 O b'-1:lemma:the'
-0.115298 OD b'+1:postag:-LRB-'
-0.115560 Phase b'-1:postag:NN'
-0.117367 Phase b'-1:postag:JJ'
-0.118533 Supp b'postag:CC'
-0.118866 Med b'-1:postag:NN'
-0.119788 O b'+1:postag:NNS'
-0.128143 O b'lemma:150'
-0.128143 O b'+1:lemma:mg/ml'
-0.128440 Gtype b'postag:CD'
-0.128454 O b'-1:lemma:rifampicin'
-0.130932 Phase b'-1:lemma:at'
-0.144251 Air b'-1:postag:CC'
-0.152959 O b'+1:lemma:c'
-0.153793 O b'+1:lemma:arginine'
-0.155635 O b'+1:lemma:-rrb-'
-0.155871 Phase b'+1:postag:NN'
-0.169334 O b'-1:lemma:1'
-0.178678 O b'-1:lemma:-lrb-'
-0.185252 O b'-1:lemma:cra'
-0.188794 Supp b'+1:postag:VBN'
-0.189587 Supp b'-1:lemma:%'
-0.197975 O b'+1:lemma:.'
-0.197975 O b'+1:postag:.'
-0.206342 O b'-1:postag:IN'
-0.218582 O b'-1:lemma:of'
-0.228664 O b'-1:lemma:0.2'
-0.243004 O b'lemma:phase'
-0.245652 Temp b'postag:JJ'
-0.248718 O b'-1:lemma:30'
-0.258083 Supp b'+1:lemma:rifampicin'
-0.265675 O b'-1:lemma:from'
-0.270538 Anti b'+1:lemma:anti-fur'
-0.275341 Med b'+1:postag:NN'
-0.286886 O b'lemma:20'
-0.287653 Med b'postag:CD'
-0.299457 pH b'postag:NN'
-0.299809 O b'+1:postag:IN'
-0.304068 O b'-1:postag:-LRB-'
-0.308718 O b'+1:lemma:%'
-0.337061 O b'lemma:media'
-0.343972 O b'+1:postag:-RRB-'
-0.375037 O b'-1:lemma:~'
-0.378855 O b'+1:postag:VBG'
-0.390573 O b'+1:lemma:0.3'
-0.442424 Temp b'+1:lemma:to'
-0.442424 Temp b'+1:postag:TO'
-0.442791 O b'lemma:dissolve'
-0.457125 Air b'-1:postag:JJ'
-0.473796 O b'-1:lemma:ml'
-0.487814 O b'-1:lemma:grow'
-0.504521 O b'-1:lemma:od600'
-0.510322 O b'lemma:anaerobically'
-0.574504 O b'lemma:0.3'
-0.603912 O b'+1:lemma:hour'
-0.607283 O b'lemma:od600'
-0.671305 O b'+1:lemma:+'
-0.673609 O b'lemma:fecl2'
-0.687238 O b'-1:lemma:sample'
-0.693028 OD b'+1:postag:NN'
-0.699278 Technique b'-1:postag::'
-0.701848 O b'+1:lemma:fecl2'
-0.704105 Phase b'postag:JJ'
-0.721360 O b'-1:lemma:37'
-0.751804 O b'lemma:2h'
-0.751804 O b'-1:lemma:additional'
-0.765268 O b'+1:lemma:cell'
-0.797811 O b'-1:postag:VBG'
-0.812938 OD b'lemma:-lrb-'
-0.817055 O b'lemma:37'
-0.849161 O b'lemma:0.1'
-0.857948 Temp b'postag:NN'
-0.858109 Supp b'+1:lemma:acetate'
-0.893857 O b'-1:lemma:rpob'
-0.908853 O b'lemma:0.2'
-0.910535 O b'lemma:of'
-0.917031 O b'+1:lemma:g/l'
-0.927240 O b'-1:lemma:dissolve'
-0.927240 O b'+1:lemma:methanol'
-0.952327 O b'-1:postag::'
-1.012901 O b'lemma:mid-log'
-1.041464 O b'-1:lemma:ompr'
-1.052741 O b'lemma:wt'
-1.080730 Anti b'postag:NNP'
-1.207147 O b'+1:lemma:supplement'
-1.210841 O b'lemma:methanol'
-1.241992 Air b'+1:postag:JJ'
-1.248085 O b'lemma:rifampicin'
-1.259918 O b'+1:lemma:at'
-1.297301 O b'-1:lemma:IP'
-1.445280 O b'-1:lemma:nsrr'
-1.572311 O b'+1:lemma:in'
-1.575804 O b'-1:lemma:co2'
-1.596289 OD b'postag:-LRB-'
-1.598275 Supp b'postag:JJ'
-1.805996 Air b'postag:NN'
-1.906139 O b'+1:lemma:2'
-2.059834 O b'+1:lemma:1'
-2.164534 O b'-1:lemma:2'
-3.758611 O b'-1:lemma::'
-3.808109 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.04724343003249949, 'c2': 0.01243677446159028}
best CV score:0.7976445146405444
model size: 0.08M
Flat F1: 0.7934791324630633
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.891 0.860 0.875 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.742 0.742 0.742 62
Anti 1.000 0.444 0.615 9
Strain 1.000 1.000 1.000 1
Gtype 0.862 0.764 0.810 106
Substrain 0.000 0.000 0.000 1
Supp 0.869 0.684 0.765 136
Gversion 0.000 0.000 0.000 0
avg / total 0.875 0.744 0.793 480
Top likely transitions:
Agit -> Agit 6.389375
Anti -> Anti 5.797544
OD -> OD 5.755859
Temp -> Temp 5.474910
Med -> Med 5.305285
Gversion -> Gversion 4.804763
O -> O 4.666202
Air -> Air 4.625279
Gtype -> Gtype 4.391065
Phase -> Phase 4.207878
Supp -> Supp 3.772231
Technique -> Technique 3.487681
pH -> pH 2.578623
O -> Supp 1.341155
Gtype -> Supp 1.313090
Substrain -> Gtype 1.142286
O -> Technique 1.070411
O -> Gtype 1.049693
O -> Anti 0.871683
Technique -> Air 0.727345
Gtype -> pH 0.559755
O -> Temp 0.443406
Air -> O 0.413184
Gtype -> Air 0.316166
O -> Gversion 0.270633
O -> Strain 0.150083
Med -> O 0.029148
O -> pH 0.001669
OD -> Temp -0.004690
pH -> O -0.027664
Anti -> O -0.053386
Air -> Gtype -0.064282
Gversion -> O -0.068988
OD -> Technique -0.076420
Phase -> Air -0.090168
O -> Med -0.097953
Phase -> O -0.101900
O -> Phase -0.121729
Anti -> Gtype -0.149802
Air -> Phase -0.152600
Anti -> Supp -0.166950
O -> OD -0.195576
Gversion -> Air -0.198819
Med -> Air -0.247104
Technique -> Supp -0.254151
Supp -> pH -0.311839
Supp -> Anti -0.312452
Anti -> OD -0.340468
Supp -> Phase -0.344456
Gtype -> Technique -0.348541
Top unlikely transitions:
Phase -> Air -0.090168
O -> Med -0.097953
Phase -> O -0.101900
O -> Phase -0.121729
Anti -> Gtype -0.149802
Air -> Phase -0.152600
Anti -> Supp -0.166950
O -> OD -0.195576
Gversion -> Air -0.198819
Med -> Air -0.247104
Technique -> Supp -0.254151
Supp -> pH -0.311839
Supp -> Anti -0.312452
Anti -> OD -0.340468
Supp -> Phase -0.344456
Gtype -> Technique -0.348541
Phase -> Technique -0.371932
Gtype -> Gversion -0.440310
Air -> Temp -0.442388
Gtype -> Phase -0.446343
Supp -> Temp -0.482112
OD -> Supp -0.494819
Supp -> O -0.518055
Technique -> O -0.541070
Gtype -> O -0.554034
Supp -> Technique -0.694840
Supp -> OD -0.711794
Supp -> Gversion -0.733427
OD -> Air -0.787405
OD -> Gtype -0.827938
Supp -> Gtype -0.848914
Gtype -> Anti -0.850239
Temp -> Med -0.927802
Gtype -> Med -0.940529
OD -> Med -0.954731
Air -> Med -1.010714
OD -> O -1.020865
O -> Air -1.023312
Supp -> Air -1.036972
Air -> Supp -1.040140
Technique -> pH -1.071992
Agit -> O -1.092416
Technique -> OD -1.136546
Technique -> Gtype -1.152303
Air -> OD -1.203336
Gtype -> OD -1.470509
Substrain -> O -1.705609
Supp -> Med -1.924115
Med -> Supp -2.263703
Phase -> OD -2.559981
Top positive:
8.778667 O b'lemma:_'
7.894046 Phase b'lemma:stationary'
7.688886 Supp b'lemma:Iron'
7.227846 O b'lemma:1'
6.518255 Air b'lemma:anaerobic'
6.510310 Air b'lemma:aerobic'
6.504238 Phase b'lemma:mid-log'
6.494190 Technique b'lemma:ChIP-exo'
6.481618 O b'lemma:rpob'
6.445611 Strain b'lemma:k-12'
6.104157 O b'lemma:2'
6.023116 Technique b'lemma:chipseq'
5.944249 Air b'-1:lemma:ChIP-Seq'
5.823850 Supp b'lemma:pq'
5.723339 O b'lemma:3'
5.687179 O b'-1:lemma:tag'
5.451451 Gversion b'lemma:asm584v2'
5.250448 Substrain b'lemma:mg1655'
5.231044 O b'lemma:rep1'
5.209409 Gtype b'lemma:\xce\xb4cra'
5.200206 Supp b'lemma:nitrate'
5.178620 O b'lemma:rep2'
5.178433 O b'lemma:Cra'
5.173563 Med b'lemma:MOPS'
5.068820 Gtype b'lemma:flag-tag'
5.068820 Gtype b'-1:lemma:c-terminal'
5.040185 Supp b'+1:lemma:\xc2\xb5m'
5.017969 O b'lemma:rep3'
4.971785 Air b'lemma:Aerobic'
4.954714 Supp b'lemma:glucose'
4.889061 Gtype b'lemma:arca8myc'
4.884882 O b'postag:IN'
4.832496 Supp b'lemma:nh4cl'
4.697100 O b'-1:lemma:ChIP-exo'
4.685588 Gtype b'lemma:delta-arca'
4.678766 Med b'lemma:LB'
4.646146 O b'lemma:b'
4.589396 Gtype b'lemma:wt'
4.555367 O b'lemma:\xcf\x8332'
4.535944 Gtype b'-1:lemma:\xe2\x88\x86'
4.520189 Gtype b'lemma:fnr8myc'
4.461759 Med b'lemma:lb'
4.436556 Technique b'lemma:rna-seq'
4.355492 O b'postag::'
4.344693 O b'lemma:a'
4.294247 O b'-1:lemma:Aerobic'
4.275618 Supp b'lemma:Fe'
4.228638 Anti b'lemma:none'
4.228158 O b'-1:lemma:0.3-0.35'
4.206556 Supp b'lemma:acetate'
4.187817 Supp b'lemma:no3'
4.123083 Technique b'lemma:chip-seq'
4.115374 Technique b'lemma:rnaseq'
4.092073 O b'lemma:Custom'
4.090797 Gtype b'lemma:delta-fnr'
4.086877 Anti b'lemma:seqa'
4.043157 Supp b'-1:lemma:Cra'
4.011760 O b'lemma:CEL'
4.000976 Supp b'lemma:rifampicin'
3.995000 OD b'lemma:od450'
3.983638 Gversion b'lemma:nc'
3.942031 O b'+1:postag:RB'
3.937436 O b'lemma:.'
3.937436 O b'postag:.'
3.911694 O b'+1:lemma:od600'
3.747230 Gversion b'lemma:chip-seq'
3.735836 Gtype b'lemma:type'
3.708583 O b'-1:lemma:anaerobic'
3.691098 Gtype b'+1:lemma:type'
3.680809 Supp b'lemma:dpd'
3.646664 Med b'+1:lemma:0.4'
3.593949 O b'-1:lemma:glucose'
3.588513 Gtype b'lemma:\xe2\x88\x86'
3.552980 Technique b'lemma:ChIP-Seq'
3.547388 OD b'lemma:od600'
3.533518 Gtype b'+1:lemma:with'
3.504439 O b'+1:lemma:o.d.'
3.492405 O b'lemma:-'
3.484369 Gtype b'lemma:\xce\xb4fur'
3.480535 Supp b'+1:lemma:1'
3.463027 Supp b'lemma:arginine'
3.457950 O b'-1:lemma:lb'
3.432558 Gtype b'lemma:nsrr'
3.411785 Vess b'lemma:flask'
3.411785 Vess b'-1:lemma:warm'
3.392532 O b'lemma:with'
3.384479 pH b'lemma:ph5'
3.384479 pH b'+1:lemma:.5'
3.360996 Air b'postag:RB'
3.348572 O b'lemma:for'
3.346364 Gtype b'lemma:wild-type'
3.342085 Strain b'+1:lemma:substr'
3.336804 O b'lemma:oxyr'
3.311067 Supp b'lemma:fructose'
3.305851 Anti b'lemma:anti-myc'
3.281892 Supp b'+1:lemma:Deficient'
3.280890 Med b'lemma:m63'
3.275989 Gtype b'lemma:\xce\xb4soxs'
3.263146 O b'lemma:or'
3.240421 O b'postag:VBN'
3.225118 O b'lemma:s'
3.218060 Supp b'+1:lemma:2'
3.170999 O b'lemma:ompr'
3.157791 O b'-1:lemma:type'
3.150274 Gversion b'-1:lemma:nc'
3.144459 O b'lemma:affyexp'
3.098092 Temp b'-1:lemma:37'
3.074698 Anti b'+1:lemma:antibody'
3.074328 OD b'+1:lemma:stationary'
3.057680 Gtype b'lemma:WT'
3.055594 Technique b'-1:lemma:IP'
3.047772 Supp b'-1:lemma:+'
3.030792 Supp b'lemma:nacl'
3.021146 Gversion b'lemma:u00096'
3.021146 Gversion b'+1:lemma:.2'
3.016112 Air b'lemma:Anaerobic'
3.015642 O b'lemma:chip-arca'
2.992114 Gtype b'lemma:\xce\xb4ompr'
2.991588 Substrain b'+1:lemma:phtpg'
2.962136 Gversion b'lemma:.2'
2.962136 Gversion b'-1:lemma:u00096'
2.943798 Supp b'+1:lemma:_'
2.929777 O b'+1:lemma:anti-fur'
2.906800 Temp b'-1:lemma:43'
2.894662 Gversion b'lemma:000913'
2.885668 Gtype b'-1:lemma:ptac'
2.873518 O b'lemma:2-3'
2.848786 Air b'lemma:anaerobically'
2.846499 Med b'+1:lemma:2.0'
2.846396 Supp b'lemma:Leu'
2.832090 O b'lemma:argr'
2.824032 Gtype b'+1:lemma:flagtag'
2.803595 Air b'lemma:anerobically'
2.790970 Temp b'lemma:43'
2.789734 O b'lemma:soxs'
2.789734 O b'lemma:soxr'
2.761406 Anti b'lemma:anti-rpos'
2.759003 Gtype b'lemma:deltaseqa'
2.759003 Gtype b'-1:lemma:old'
2.758592 O b'+1:lemma:chip-seq'
2.753991 O b'lemma:purr'
2.739456 O b'lemma:Lrp'
2.739128 Gtype b'lemma:pk4854'
2.724879 Temp b'-1:lemma:sample'
2.710711 O b'-1:lemma:stpa'
2.707614 O b'-1:lemma:dpd'
2.693567 O b'lemma:chip'
2.680179 Temp b'lemma:\xc2\xb0c'
2.675708 O b'lemma:at'
2.674777 Supp b'lemma:Adenine'
2.654457 Med b'-1:lemma:ml'
2.621739 Supp b'+1:lemma:hour'
2.617235 Technique b'-1:lemma:chip-exo'
2.613832 Med b'+1:lemma:minimal'
2.609168 O b'lemma:Fur'
2.603191 Gtype b'lemma:ptac'
2.602754 Temp b'-1:lemma:\xcf\x8332'
2.600527 Technique b'-1:lemma:input'
2.586051 Gtype b'-1:lemma:rpob'
2.582507 Supp b'lemma:iptg'
2.575439 pH b'lemma:.5'
2.575439 pH b'-1:lemma:ph5'
2.567178 Phase b'+1:lemma:for'
2.561161 Temp b'lemma:37'
2.554310 Phase b'-1:lemma:mid-log'
2.520491 Gtype b'lemma:\xce\xb4oxyr'
2.510164 O b'postag:SYM'
2.501887 Gtype b'lemma:\xce\xb4soxr'
2.469798 O b'-1:lemma:l1'
2.453386 Phase b'-1:lemma:until'
2.441656 O b'+1:lemma:43'
2.410885 O b'+1:lemma:mid-log'
2.403011 Gtype b'+1:lemma:ph5'
2.401711 Med b'lemma:glucose'
2.400503 Med b'lemma:L'
2.400503 Med b'+1:lemma:broth'
2.386245 O b'+1:lemma:pq'
2.371627 O b'postag:VBG'
2.358742 O b'lemma:chip-fnr'
2.353099 Gtype b'lemma:dfnr'
2.338808 Supp b'+1:lemma:respiratory'
2.338302 O b'+1:lemma:sparging'
2.329852 Gtype b'+1:lemma:aerobic'
2.319775 O b'postag:DT'
2.307514 Supp b'lemma:0.2'
2.298843 Med b'+1:lemma:+'
2.298674 Supp b'lemma:methanol'
2.262217 Gversion b'lemma:_'
2.247831 Gtype b'+1:lemma:pq'
2.246181 O b'lemma:genotype/variation'
2.244933 Air b'-1:lemma:-'
2.243583 Gtype b'-1:lemma:nsrr'
2.241509 Technique b'+1:lemma:chip-exo'
2.238931 Supp b'+1:lemma:and'
2.229747 OD b'lemma:0.3'
2.227356 O b'lemma:pt7'
2.218343 O b'-1:lemma:min'
2.209534 OD b'-1:lemma:~'
2.191059 O b'postag:CC'
2.183306 O b'-1:lemma:media'
Top negative:
-0.007960 O b'+1:lemma:25'
-0.011560 Gtype b'-1:lemma:,'
-0.011560 Gtype b'-1:postag:,'
-0.012403 Air b'-1:lemma:anaerobically'
-0.013219 O b'lemma:pahse'
-0.013507 Gversion b'-1:postag:NN'
-0.013699 O b'+1:lemma:medium'
-0.014516 Med b'-1:postag:CD'
-0.014698 Gtype b'-1:lemma:mg1655'
-0.016645 OD b'+1:lemma:~'
-0.017116 Supp b'+1:lemma:glucose'
-0.018290 OD b'+1:lemma:0.4'
-0.019265 O b'lemma:25'
-0.020700 Med b'+1:postag:NNS'
-0.020972 O b'+1:lemma:0.4'
-0.022100 OD b'+1:postag:-LRB-'
-0.022143 O b'+1:lemma:strain'
-0.024088 Supp b'-1:postag:VBG'
-0.038792 O b'+1:postag:CD'
-0.039859 O b'-1:postag:IN'
-0.040290 O b'+1:lemma:k-12'
-0.041101 O b'lemma:nacl'
-0.042472 Gtype b'+1:lemma:cra'
-0.052362 O b'+1:lemma:grow'
-0.054063 Anti b'-1:postag:NN'
-0.057006 O b'-1:lemma:20'
-0.057134 O b'-1:lemma:mm'
-0.062775 Phase b'-1:postag:NN'
-0.064327 O b'-1:postag:VBN'
-0.064682 O b'postag:RB'
-0.066584 Supp b'-1:postag:NN'
-0.069958 Gtype b'postag:CD'
-0.071415 O b'+1:lemma:95'
-0.073208 Phase b'lemma:pahse'
-0.078903 Gtype b'-1:postag:NN'
-0.080786 O b'+1:lemma:5'
-0.083451 Temp b'-1:lemma:\xc2\xb0c'
-0.084429 O b'+1:lemma:antibody'
-0.093835 O b'-1:lemma:fresh'
-0.095847 Supp b'+1:postag:IN'
-0.103631 OD b'postag:JJ'
-0.105270 O b'+1:lemma:o2'
-0.106875 O b'+1:lemma:mm'
-0.117705 O b'+1:lemma:dissolve'
-0.119607 Med b'+1:postag:NN'
-0.123002 OD b'+1:postag:CD'
-0.127620 O b'-1:lemma:g/l'
-0.133716 Gtype b'-1:postag:CD'
-0.140619 O b'-1:lemma:-lrb-'
-0.143859 O b'lemma:o2'
-0.144789 O b'-1:lemma:o2'
-0.146704 O b'-1:lemma:30'
-0.147636 Temp b'postag:JJ'
-0.152183 Gtype b'+1:lemma:\xe2\x88\x86'
-0.153487 Med b'+1:postag:CC'
-0.156696 O b'lemma:mg1655'
-0.156753 O b'lemma:e.'
-0.159541 Technique b'-1:lemma::'
-0.162599 O b'-1:lemma:minimal'
-0.167611 O b'-1:lemma:with'
-0.176978 O b'-1:postag:-LRB-'
-0.179005 O b'lemma:m63'
-0.180170 O b'lemma:medium'
-0.183738 O b'+1:lemma:Aerobic'
-0.190885 O b'lemma:n2'
-0.191225 Supp b'-1:postag:NNP'
-0.198786 O b'-1:lemma:n2'
-0.199226 Supp b'-1:lemma:and'
-0.199384 O b'lemma:\xc2\xb0c'
-0.202752 O b'-1:lemma:of'
-0.208526 O b'+1:lemma:-rrb-'
-0.210040 O b'-1:lemma:e.'
-0.219231 O b'+1:lemma:shake'
-0.219460 Gtype b'-1:postag:DT'
-0.237900 O b'-1:lemma:0.1'
-0.241643 O b'lemma:lb'
-0.242224 Supp b'lemma:and'
-0.245820 O b'lemma:co2'
-0.252281 Temp b'-1:postag:IN'
-0.261850 OD b'+1:postag:NN'
-0.266916 Supp b'+1:postag:VBN'
-0.267689 O b'+1:lemma:until'
-0.271480 O b'-1:lemma:\xe2\x88\x86'
-0.277039 O b'+1:lemma:minimal'
-0.277788 O b'lemma:grow'
-0.285871 O b'+1:postag:IN'
-0.296129 Med b'postag:-LRB-'
-0.302011 O b'-1:lemma:um'
-0.302011 O b'+1:lemma:paraquat'
-0.304578 O b'+1:postag:-RRB-'
-0.307866 Med b'lemma:-lrb-'
-0.309056 O b'+1:lemma:.'
-0.309056 O b'+1:postag:.'
-0.312126 Air b'-1:lemma:or'
-0.317647 O b'lemma:minimal'
-0.319138 Gtype b'-1:postag:SYM'
-0.326202 O b'-1:lemma:1'
-0.328780 Phase b'-1:lemma:at'
-0.346851 O b'+1:lemma:300'
-0.351977 Med b'-1:postag:NN'
-0.355707 O b'lemma:30'
-0.365992 Gversion b'+1:postag:NN'
-0.367337 O b'-1:lemma:grow'
-0.387748 O b'lemma:k-12'
-0.389573 O b'+1:lemma:cell'
-0.397777 O b'-1:lemma:from'
-0.400558 O b'-1:lemma:mid-log'
-0.403379 O b'+1:lemma:phase'
-0.433545 Supp b'+1:lemma:rifampicin'
-0.436853 O b'+1:lemma:or'
-0.441744 Med b'postag:CD'
-0.446245 Med b'+1:postag:IN'
-0.451884 Gtype b'lemma:delta'
-0.460983 Anti b'+1:postag:JJ'
-0.469979 O b'+1:lemma:_'
-0.472319 O b'-1:lemma:rifampicin'
-0.497593 O b'-1:lemma:cra'
-0.503197 O b'-1:lemma:od600'
-0.504727 Supp b'postag:CC'
-0.508290 pH b'postag:NN'
-0.516046 O b'+1:lemma:%'
-0.518365 Technique b'-1:postag::'
-0.523304 OD b'+1:lemma:and'
-0.525519 OD b'+1:postag:CC'
-0.558490 O b'+1:postag:NNS'
-0.586906 O b'-1:lemma:co2'
-0.603821 O b'lemma:150'
-0.603821 O b'+1:lemma:mg/ml'
-0.605931 O b'+1:lemma:arginine'
-0.606479 O b'lemma:phase'
-0.611575 O b'lemma:od600'
-0.651162 O b'+1:lemma:rep1'
-0.653345 O b'lemma:dissolve'
-0.665212 Med b'-1:postag:IN'
-0.669800 OD b'lemma:-lrb-'
-0.672263 O b'-1:lemma:0.2'
-0.687604 O b'lemma:\xe2\x88\x86'
-0.707413 Temp b'+1:lemma:to'
-0.707413 Temp b'+1:postag:TO'
-0.709054 Temp b'postag:NN'
-0.717292 O b'lemma:mid-log'
-0.727885 O b'-1:lemma:until'
-0.733043 O b'lemma:media'
-0.733111 O b'+1:lemma:c'
-0.747011 OD b'postag:-LRB-'
-0.750366 O b'+1:lemma:0.3'
-0.761257 Anti b'+1:lemma:anti-fur'
-0.768890 Phase b'-1:postag:JJ'
-0.791918 O b'+1:lemma:\xc2\xb0c'
-0.792189 O b'lemma:anaerobically'
-0.829072 O b'lemma:0.2'
-0.870965 O b'lemma:0.3'
-0.872689 O b'lemma:purify'
-0.898705 O b'-1:postag:VBG'
-0.916416 O b'+1:lemma:fecl2'
-0.929519 O b'-1:lemma:~'
-0.964077 Air b'postag:NN'
-0.999189 O b'lemma:20'
-1.006423 O b'lemma:fecl2'
-1.015503 O b'lemma:dpd'
-1.039211 O b'lemma:anaerobic'
-1.067901 O b'-1:lemma:dissolve'
-1.067901 O b'+1:lemma:methanol'
-1.079142 Supp b'+1:lemma:acetate'
-1.086390 O b'lemma:37'
-1.090519 O b'lemma:\xce\xb4fur'
-1.110265 O b'-1:postag::'
-1.136252 O b'-1:lemma:ml'
-1.138020 O b'lemma:2h'
-1.138020 O b'-1:lemma:additional'
-1.184350 O b'lemma:of'
-1.210896 Supp b'-1:lemma:%'
-1.216753 O b'-1:lemma:37'
-1.220049 O b'+1:lemma:g/l'
-1.245093 O b'-1:lemma:sample'
-1.280180 Air b'-1:postag:JJ'
-1.282705 O b'lemma:0.1'
-1.293206 O b'+1:lemma:supplement'
-1.340548 Air b'+1:postag:JJ'
-1.397536 O b'+1:lemma:at'
-1.470536 O b'+1:lemma:in'
-1.532979 O b'+1:postag:VBG'
-1.548239 O b'-1:lemma:rpob'
-1.647386 O b'-1:lemma:ompr'
-1.665792 O b'postag:VBP'
-1.714606 O b'lemma:wt'
-1.723995 O b'-1:lemma:nsrr'
-1.770674 Supp b'postag:JJ'
-1.852907 O b'lemma:rifampicin'
-1.859721 Anti b'postag:NNP'
-1.869393 O b'+1:lemma:+'
-1.873494 O b'-1:lemma:IP'
-2.065639 O b'lemma:methanol'
-2.192016 O b'-1:lemma:2'
-2.353663 O b'+1:lemma:2'
-2.354781 Phase b'postag:JJ'
-2.458047 O b'+1:lemma:hour'
-3.007939 O b'+1:lemma:1'
-3.831961 O b'-1:lemma:_'
-4.149098 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.01750591736573677, 'c2': 0.02307723566043045}
best CV score:0.7965019925648547
model size: 0.10M
Flat F1: 0.7968324300802168
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.897 0.912 0.904 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.754 0.742 0.748 62
Anti 1.000 0.667 0.800 9
Strain 1.000 1.000 1.000 1
Gtype 0.862 0.764 0.810 106
Substrain 0.000 0.000 0.000 1
Supp 0.865 0.662 0.750 136
Gversion 0.000 0.000 0.000 0
avg / total 0.876 0.748 0.797 480
Top likely transitions:
OD -> OD 5.786764
Agit -> Agit 5.337876
Anti -> Anti 5.174874
Temp -> Temp 5.088685
Med -> Med 5.002944
Air -> Air 4.877267
O -> O 4.715817
Gversion -> Gversion 4.473906
Phase -> Phase 4.357763
Gtype -> Gtype 4.242273
Supp -> Supp 4.009897
Technique -> Technique 3.021233
pH -> pH 2.548336
Substrain -> Gtype 1.971828
O -> Supp 1.641966
Gtype -> Supp 1.637091
Air -> O 1.145928
O -> Gtype 1.141727
Technique -> Air 1.068808
Gtype -> pH 0.990954
O -> Technique 0.980217
O -> Temp 0.771371
O -> Anti 0.605031
Gtype -> Air 0.434959
Med -> O 0.223888
O -> Strain 0.200811
O -> pH 0.175383
O -> Gversion 0.140227
OD -> Phase 0.052262
O -> Vess 0.045108
Temp -> O 0.034423
Phase -> O 0.018387
Strain -> O -0.000112
Anti -> pH -0.000851
Strain -> OD -0.002202
O -> Substrain -0.003970
Technique -> Phase -0.027678
Med -> pH -0.028490
Anti -> Phase -0.030227
Gtype -> Strain -0.031833
pH -> OD -0.041636
Air -> Anti -0.042531
OD -> pH -0.045135
pH -> Air -0.051415
Anti -> O -0.054882
Med -> OD -0.055592
O -> Med -0.064478
Med -> Temp -0.070415
Phase -> Air -0.071257
Supp -> Agit -0.072692
Top unlikely transitions:
O -> Agit -0.317813
Gversion -> Gtype -0.346160
Anti -> Gtype -0.379001
Gversion -> Supp -0.395995
Technique -> O -0.407315
Technique -> Gversion -0.411287
Air -> Gtype -0.414228
Gversion -> Technique -0.423101
Supp -> pH -0.449062
Air -> Temp -0.454318
OD -> Temp -0.462022
OD -> Technique -0.528407
Gtype -> Phase -0.537062
OD -> Anti -0.541206
Gversion -> Air -0.555674
Supp -> Temp -0.581411
Supp -> Phase -0.581776
Technique -> Supp -0.590399
Gtype -> Technique -0.620634
OD -> Supp -0.644073
Med -> Air -0.679744
Temp -> Med -0.687449
OD -> Med -0.717084
Anti -> OD -0.770834
Supp -> Anti -0.804922
Air -> Supp -0.806945
Gtype -> Gversion -0.811241
Supp -> OD -0.862585
Supp -> Gversion -0.867304
Phase -> Technique -0.873833
Agit -> O -0.910378
Air -> Med -0.936107
Gtype -> Med -0.971156
Supp -> Technique -0.979631
Supp -> Gtype -0.998503
OD -> Gtype -1.001151
Supp -> Air -1.002273
Gtype -> Anti -1.098941
OD -> Air -1.189877
O -> Air -1.194520
OD -> O -1.238899
Air -> OD -1.240827
Gtype -> OD -1.288759
Substrain -> O -1.351729
Technique -> pH -1.436208
Technique -> Gtype -1.565301
Supp -> Med -1.571460
Technique -> OD -1.655463
Med -> Supp -1.791934
Phase -> OD -2.395487
Top positive:
7.777505 O b'lemma:_'
6.605877 O b'lemma:1'
5.933184 Phase b'lemma:stationary'
5.921751 Air b'lemma:anaerobic'
5.663542 Supp b'lemma:Iron'
5.628841 O b'lemma:2'
5.570604 Strain b'lemma:k-12'
5.357728 Air b'lemma:aerobic'
5.308359 Technique b'lemma:chipseq'
5.243195 Technique b'lemma:ChIP-exo'
5.146328 O b'lemma:3'
5.113544 O b'lemma:rpob'
4.972436 Supp b'lemma:pq'
4.736752 O b'-1:lemma:tag'
4.683084 Gtype b'lemma:\xce\xb4cra'
4.682011 Phase b'lemma:mid-log'
4.676054 Gversion b'lemma:asm584v2'
4.661754 Supp b'lemma:nh4cl'
4.645082 Gtype b'lemma:flag-tag'
4.645082 Gtype b'-1:lemma:c-terminal'
4.640572 O b'lemma:rep1'
4.613649 O b'lemma:Cra'
4.546591 Gtype b'lemma:arca8myc'
4.537870 O b'postag:IN'
4.531129 Supp b'lemma:glucose'
4.528260 Air b'-1:lemma:ChIP-Seq'
4.460701 O b'lemma:rep2'
4.331423 Med b'lemma:MOPS'
4.320826 Gtype b'lemma:wt'
4.314650 Supp b'lemma:nitrate'
4.296372 O b'lemma:rep3'
4.285293 Substrain b'lemma:mg1655'
4.240354 Supp b'lemma:acetate'
4.111263 Med b'lemma:lb'
4.087796 Gtype b'lemma:delta-arca'
4.036491 O b'+1:postag:RB'
4.022073 Supp b'+1:lemma:\xc2\xb5m'
4.010145 O b'lemma:b'
4.010092 Supp b'lemma:no3'
3.996538 Med b'lemma:LB'
3.996393 Gtype b'-1:lemma:\xe2\x88\x86'
3.982442 Technique b'lemma:rna-seq'
3.957894 Air b'lemma:Aerobic'
3.935017 O b'lemma:a'
3.803123 OD b'lemma:od600'
3.801977 Anti b'lemma:none'
3.786775 Gtype b'lemma:fnr8myc'
3.769938 O b'lemma:.'
3.769938 O b'postag:.'
3.769927 Supp b'lemma:Fe'
3.764066 Technique b'lemma:chip-seq'
3.735366 O b'postag::'
3.685306 O b'lemma:CEL'
3.645917 O b'lemma:\xcf\x8332'
3.642265 O b'-1:lemma:Aerobic'
3.634484 Anti b'lemma:seqa'
3.544891 Technique b'lemma:rnaseq'
3.525311 Supp b'+1:lemma:Deficient'
3.521610 OD b'lemma:od450'
3.519361 O b'-1:lemma:ChIP-exo'
3.516252 O b'lemma:Custom'
3.464219 O b'postag:VBN'
3.463491 Supp b'+1:lemma:1'
3.450572 Supp b'lemma:rifampicin'
3.427503 Med b'+1:lemma:0.4'
3.379916 Gtype b'lemma:delta-fnr'
3.378105 Supp b'-1:lemma:Cra'
3.351155 Gtype b'lemma:\xe2\x88\x86'
3.343450 Gversion b'lemma:chip-seq'
3.314924 Supp b'lemma:fructose'
3.282214 Supp b'lemma:dpd'
3.262950 Gtype b'+1:lemma:with'
3.253726 O b'lemma:-'
3.214173 O b'+1:lemma:od600'
3.187644 O b'-1:lemma:0.3-0.35'
3.182055 Gtype b'lemma:type'
3.172972 Supp b'lemma:arginine'
3.120594 Gtype b'lemma:\xce\xb4fur'
3.117794 Gversion b'lemma:nc'
3.109528 O b'-1:lemma:glucose'
3.081979 O b'+1:lemma:anti-fur'
3.077641 Gtype b'+1:lemma:type'
3.064731 Supp b'+1:lemma:2'
3.053268 Vess b'lemma:flask'
3.053268 Vess b'-1:lemma:warm'
3.046425 O b'-1:lemma:anaerobic'
3.038756 pH b'lemma:ph5'
3.038756 pH b'+1:lemma:.5'
3.027884 Med b'lemma:m63'
3.018439 Gtype b'lemma:wild-type'
3.002251 O b'lemma:chip-arca'
3.002149 Anti b'lemma:anti-myc'
3.001018 Gtype b'lemma:nsrr'
3.000941 O b'lemma:affyexp'
3.000850 Air b'postag:RB'
2.987234 O b'-1:lemma:lb'
2.981372 Strain b'+1:lemma:substr'
2.977968 O b'lemma:with'
2.970913 O b'lemma:s'
2.954955 O b'lemma:oxyr'
2.919696 Technique b'lemma:ChIP-Seq'
2.911598 Gtype b'lemma:WT'
2.907151 Temp b'lemma:\xc2\xb0c'
2.882096 Gtype b'lemma:\xce\xb4soxs'
2.828303 O b'-1:lemma:stpa'
2.815555 O b'lemma:argr'
2.805344 Med b'+1:lemma:minimal'
2.776817 Gversion b'lemma:.2'
2.776817 Gversion b'-1:lemma:u00096'
2.765075 O b'lemma:at'
2.753642 Supp b'-1:lemma:+'
2.743827 Med b'+1:lemma:2.0'
2.731390 O b'lemma:ompr'
2.715846 O b'+1:lemma:pq'
2.713993 Gversion b'lemma:u00096'
2.713993 Gversion b'+1:lemma:.2'
2.703704 Air b'lemma:Anaerobic'
2.702255 Gtype b'lemma:\xce\xb4ompr'
2.686740 Supp b'+1:lemma:_'
2.677806 Gtype b'-1:lemma:ptac'
2.662879 Phase b'+1:lemma:for'
2.660845 O b'lemma:purr'
2.633400 Gversion b'-1:lemma:nc'
2.610705 Med b'-1:lemma:ml'
2.610364 Supp b'lemma:nacl'
2.598932 Technique b'-1:lemma:IP'
2.587524 O b'lemma:or'
2.572768 Supp b'lemma:Leu'
2.557048 Gversion b'lemma:000913'
2.535138 Gtype b'+1:lemma:flagtag'
2.529124 O b'postag:CC'
2.528151 Gtype b'+1:lemma:pq'
2.519454 OD b'+1:lemma:stationary'
2.517078 Gtype b'+1:lemma:aerobic'
2.508910 O b'lemma:Lrp'
2.508428 Gtype b'-1:lemma:rpob'
2.502749 Temp b'lemma:43'
2.501837 Air b'lemma:anaerobically'
2.500161 Gtype b'lemma:ptac'
2.489806 O b'+1:lemma:o.d.'
2.489002 O b'-1:lemma:type'
2.486890 Gtype b'lemma:pk4854'
2.485914 O b'lemma:for'
2.480050 O b'postag:SYM'
2.476780 Supp b'+1:lemma:hour'
2.458558 Air b'-1:lemma:-'
2.455765 Anti b'lemma:anti-rpos'
2.449385 Supp b'lemma:Adenine'
2.445239 O b'lemma:Fur'
2.444393 O b'lemma:soxs'
2.444393 O b'lemma:soxr'
2.434249 Temp b'-1:lemma:37'
2.428845 Gtype b'lemma:deltaseqa'
2.428845 Gtype b'-1:lemma:old'
2.414384 Anti b'+1:lemma:antibody'
2.405965 O b'lemma:2-3'
2.403476 Temp b'-1:lemma:43'
2.394168 Air b'lemma:anerobically'
2.391084 Substrain b'+1:lemma:phtpg'
2.390362 O b'+1:lemma:chip-seq'
2.364093 O b'lemma:chip'
2.344688 pH b'+1:postag:CD'
2.333775 Med b'postag:NNP'
2.308157 Supp b'lemma:iptg'
2.301961 O b'+1:lemma:43'
2.296886 Gtype b'lemma:\xce\xb4oxyr'
2.293633 O b'lemma:chip-fnr'
2.288275 Med b'+1:lemma:+'
2.287838 Technique b'-1:lemma:input'
2.284745 Gtype b'+1:lemma:ph5'
2.282924 O b'+1:lemma:sparging'
2.273731 Technique b'+1:lemma:chip-exo'
2.256890 Gtype b'lemma:\xce\xb4soxr'
2.248969 O b'lemma:pt7'
2.247412 Gtype b'lemma:dfnr'
2.234348 Technique b'-1:lemma:chip-exo'
2.225915 O b'+1:lemma:mid-log'
2.218353 Supp b'+1:lemma:deficient'
2.214682 Phase b'lemma:phase'
2.209324 Phase b'-1:lemma:until'
2.209232 Supp b'lemma:methanol'
2.187036 Technique b'postag:NNP'
2.159413 Temp b'-1:lemma:sample'
2.151326 Gtype b'-1:lemma:nsrr'
2.149028 Supp b'lemma:20'
2.143459 pH b'lemma:.5'
2.143459 pH b'-1:lemma:ph5'
2.130382 Supp b'-1:lemma:with'
2.124595 Phase b'-1:lemma:mid-log'
2.122806 Gversion b'+1:lemma:000913'
2.119158 Temp b'+1:lemma:\xc2\xb0c'
2.116827 O b'lemma:genotype/variation'
2.111971 Gtype b'-1:lemma:from'
2.099395 Gversion b'lemma:_'
2.093842 O b'postag:VBG'
2.087024 Gtype b'-1:lemma::'
2.084864 Med b'+1:lemma:-lrb-'
2.083399 Med b'lemma:broth'
2.083399 Med b'-1:lemma:L'
2.080517 Med b'lemma:L'
Top negative:
-0.116698 Gtype b'-1:postag:NNP'
-0.117399 Med b'-1:postag:NN'
-0.121920 Phase b'-1:postag:NN'
-0.126158 O b'-1:lemma:delta'
-0.126979 O b'lemma:medium'
-0.136307 O b'lemma:10'
-0.138994 O b'+1:lemma:2.0'
-0.140939 O b'-1:postag:IN'
-0.146352 OD b'+1:lemma:in'
-0.147540 O b'-1:lemma:iptg'
-0.148360 Gtype b'lemma:ompr'
-0.148713 Supp b'+1:lemma:glucose'
-0.150545 Gtype b'-1:lemma:,'
-0.150545 Gtype b'-1:postag:,'
-0.152555 O b'+1:lemma:mm'
-0.154518 O b'+1:lemma:5'
-0.161052 Air b'postag:CD'
-0.161490 O b'-1:lemma:purify'
-0.162601 O b'+1:lemma:strain'
-0.164231 O b'-1:lemma:with'
-0.165378 O b'-1:lemma:minimal'
-0.167055 Air b'-1:lemma:or'
-0.177695 O b'+1:lemma:0.4'
-0.179481 Med b'+1:postag:CC'
-0.192792 Gtype b'+1:postag:NNS'
-0.194783 O b'-1:lemma:-lrb-'
-0.199810 Gtype b'-1:lemma:mg1655'
-0.202202 O b'+1:lemma:antibody'
-0.204305 O b'+1:lemma:mg1655'
-0.205357 O b'+1:lemma:minimal'
-0.215743 O b'+1:lemma:.'
-0.215743 O b'+1:postag:.'
-0.224035 Supp b'+1:postag:IN'
-0.226319 Technique b'-1:lemma::'
-0.228443 Supp b'lemma:and'
-0.228851 O b'lemma:e.'
-0.233559 Gtype b'+1:lemma:cra'
-0.233930 O b'lemma:8'
-0.236052 Gtype b'+1:lemma:a'
-0.236429 O b'lemma:n2'
-0.240273 O b'-1:postag:-LRB-'
-0.243379 O b'-1:lemma:e.'
-0.246913 O b'-1:lemma:20'
-0.249761 O b'-1:postag:VBP'
-0.250445 O b'+1:lemma:from'
-0.253990 O b'lemma:co2'
-0.257147 O b'lemma:lb'
-0.262866 O b'-1:lemma:grow'
-0.263101 O b'-1:lemma:n2'
-0.266675 Med b'postag:CD'
-0.267433 Med b'+1:postag:NNS'
-0.271690 Med b'lemma:-lrb-'
-0.272421 Med b'postag:-LRB-'
-0.274354 Anti b'+1:postag:JJ'
-0.275817 O b'-1:lemma:fresh'
-0.276458 Gversion b'+1:postag:NN'
-0.284818 Gtype b'+1:lemma:b'
-0.291947 O b'+1:lemma:until'
-0.292980 O b'lemma:minimal'
-0.294264 O b'lemma:30'
-0.295523 O b'-1:lemma:od600'
-0.299205 O b'+1:lemma:b'
-0.302254 O b'lemma:aerobically'
-0.310314 O b'lemma:grow'
-0.331454 O b'-1:lemma:0.1'
-0.331977 O b'-1:postag:VBN'
-0.340682 O b'+1:lemma:delta'
-0.346089 O b'+1:lemma:-lcb-'
-0.347377 O b'-1:lemma:um'
-0.347377 O b'+1:lemma:paraquat'
-0.348080 O b'+1:postag:-RRB-'
-0.348644 Gtype b'postag:CD'
-0.350220 O b'-1:lemma:chip-exo'
-0.353135 O b'lemma:mg/ml'
-0.353135 O b'-1:lemma:150'
-0.368298 O b'-1:lemma:mm'
-0.368952 O b'+1:lemma:phase'
-0.383392 Phase b'-1:lemma:at'
-0.383608 Gtype b'lemma:_'
-0.386439 O b'+1:postag:IN'
-0.389536 O b'lemma:glucose'
-0.397045 Supp b'+1:postag:VBN'
-0.406129 O b'postag:RB'
-0.417245 O b'lemma:m63'
-0.417398 OD b'+1:postag:CC'
-0.419368 Temp b'postag:JJ'
-0.422874 O b'+1:lemma:or'
-0.427973 O b'+1:lemma:%'
-0.435206 O b'+1:lemma:shake'
-0.441406 OD b'+1:lemma:mid-log'
-0.442803 Med b'+1:postag:NN'
-0.449476 O b'lemma:nacl'
-0.461678 O b'+1:lemma:_'
-0.466634 O b'lemma:od600'
-0.469668 Technique b'-1:postag::'
-0.470685 O b'-1:lemma:from'
-0.478610 Supp b'postag:CC'
-0.495782 O b'+1:lemma:dissolve'
-0.501976 Supp b'-1:postag:VBG'
-0.521671 O b'-1:lemma:rifampicin'
-0.527652 O b'lemma:\xc2\xb0c'
-0.532074 O b'-1:lemma:g/l'
-0.535235 O b'-1:lemma:cra'
-0.535605 pH b'postag:NN'
-0.540053 O b'+1:lemma:cell'
-0.542400 O b'-1:lemma:mid-log'
-0.554731 O b'+1:lemma:Aerobic'
-0.557223 OD b'+1:postag:NN'
-0.577398 Temp b'-1:lemma:\xc2\xb0c'
-0.586593 O b'+1:lemma:rep1'
-0.589048 O b'-1:lemma:30'
-0.595835 Med b'-1:postag:IN'
-0.598295 O b'lemma:phase'
-0.599331 Supp b'-1:lemma:%'
-0.612222 O b'+1:lemma:\xc2\xb0c'
-0.624875 O b'-1:lemma:affinity'
-0.636676 OD b'lemma:-lrb-'
-0.643469 O b'+1:postag:NNS'
-0.649099 Temp b'postag:NN'
-0.649456 O b'lemma:of'
-0.658905 Gtype b'-1:postag:SYM'
-0.660226 OD b'+1:lemma:and'
-0.660293 O b'+1:lemma:c'
-0.672111 O b'lemma:mg1655'
-0.675080 Gtype b'lemma:delta'
-0.677816 O b'lemma:dissolve'
-0.678428 Supp b'+1:lemma:rifampicin'
-0.684724 O b'-1:lemma:co2'
-0.686280 Gtype b'-1:postag:DT'
-0.694863 O b'-1:lemma:o2'
-0.722306 Gtype b'+1:lemma:\xe2\x88\x86'
-0.732614 O b'lemma:150'
-0.732614 O b'+1:lemma:mg/ml'
-0.761599 OD b'postag:-LRB-'
-0.772588 Temp b'+1:lemma:to'
-0.772588 Temp b'+1:postag:TO'
-0.781297 O b'lemma:anaerobically'
-0.807083 O b'+1:lemma:arginine'
-0.819715 O b'-1:lemma:\xe2\x88\x86'
-0.823821 O b'+1:lemma:0.3'
-0.836002 O b'lemma:dpd'
-0.858798 O b'+1:lemma:fecl2'
-0.878330 Med b'+1:postag:IN'
-0.883801 O b'-1:lemma:until'
-0.890383 O b'lemma:purify'
-0.893680 Anti b'+1:lemma:anti-fur'
-0.905649 O b'lemma:anaerobic'
-0.919723 O b'-1:lemma:0.2'
-0.932910 O b'+1:lemma:300'
-0.948417 O b'-1:lemma:ml'
-0.951682 O b'-1:lemma:1'
-0.955382 Air b'-1:postag:JJ'
-0.960497 O b'lemma:\xe2\x88\x86'
-0.979778 O b'-1:lemma:dissolve'
-0.979778 O b'+1:lemma:methanol'
-0.985663 O b'lemma:fecl2'
-0.995333 O b'lemma:media'
-1.037234 Phase b'-1:postag:JJ'
-1.042219 O b'lemma:20'
-1.048118 O b'lemma:k-12'
-1.067143 Supp b'+1:lemma:acetate'
-1.079445 O b'lemma:2h'
-1.079445 O b'-1:lemma:additional'
-1.084169 O b'+1:lemma:supplement'
-1.104220 O b'lemma:0.1'
-1.125319 O b'lemma:mid-log'
-1.136189 O b'lemma:0.2'
-1.136814 O b'-1:lemma:~'
-1.142555 O b'lemma:0.3'
-1.163967 O b'+1:lemma:at'
-1.165833 O b'+1:lemma:in'
-1.233659 O b'+1:postag:VBG'
-1.234566 OD b'postag:JJ'
-1.251923 O b'-1:postag:VBG'
-1.261321 Phase b'postag:JJ'
-1.282079 O b'+1:lemma:g/l'
-1.318236 O b'lemma:\xce\xb4fur'
-1.394008 O b'-1:lemma:ompr'
-1.417347 O b'-1:lemma:sample'
-1.574030 O b'lemma:37'
-1.604365 Supp b'postag:JJ'
-1.624714 O b'postag:VBP'
-1.649157 O b'-1:postag::'
-1.654774 Air b'postag:NN'
-1.662186 O b'-1:lemma:rpob'
-1.692863 Air b'+1:postag:JJ'
-1.720211 Anti b'postag:NNP'
-1.810759 O b'lemma:wt'
-1.845010 O b'-1:lemma:37'
-1.856437 O b'+1:lemma:+'
-1.877902 O b'-1:lemma:IP'
-1.987331 O b'-1:lemma:2'
-2.039453 O b'lemma:methanol'
-2.059494 O b'-1:lemma:nsrr'
-2.091546 O b'+1:lemma:2'
-2.107679 O b'lemma:rifampicin'
-2.361875 O b'+1:lemma:hour'
-2.588783 O b'+1:lemma:1'
-3.600862 O b'-1:lemma:_'
-3.688282 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.06622541546379261, 'c2': 0.005803516535443396}
best CV score:0.806003784483002
model size: 0.08M
Flat F1: 0.8138835387377452
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.897 0.912 0.904 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.939 0.742 0.829 62
Anti 0.571 0.444 0.500 9
Strain 1.000 1.000 1.000 1
Gtype 0.876 0.802 0.837 106
Substrain 0.000 0.000 0.000 1
Supp 0.886 0.684 0.772 136
Gversion 0.000 0.000 0.000 0
avg / total 0.901 0.758 0.814 480
Top likely transitions:
Agit -> Agit 6.855330
OD -> OD 6.403256
Temp -> Temp 5.890045
Anti -> Anti 5.766779
Med -> Med 5.604768
Air -> Air 4.967782
O -> O 4.848357
Gversion -> Gversion 4.809684
Gtype -> Gtype 4.697823
Phase -> Phase 4.308635
Supp -> Supp 4.041417
Technique -> Technique 3.913590
pH -> pH 2.570273
O -> Supp 1.615671
Substrain -> Gtype 1.499561
Gtype -> Supp 1.421628
O -> Technique 1.327277
O -> Gtype 1.086040
Air -> O 0.813429
Technique -> Air 0.731728
O -> Temp 0.588704
O -> Anti 0.562550
Gtype -> pH 0.228949
Med -> O 0.224698
O -> Gversion 0.210688
Temp -> O 0.172212
O -> Med 0.102170
Gtype -> Air 0.095373
OD -> Phase 0.013336
Anti -> OD -0.004256
O -> OD -0.004531
Gtype -> Gversion -0.009932
Phase -> O -0.017336
O -> Phase -0.028487
Phase -> Technique -0.066036
Med -> Air -0.087994
Gtype -> Phase -0.099971
Supp -> Gversion -0.157806
Anti -> O -0.169383
Supp -> Temp -0.179380
Phase -> Air -0.231554
Supp -> OD -0.255465
Supp -> Anti -0.267111
Technique -> O -0.296565
OD -> Med -0.297702
Gtype -> Med -0.322957
Technique -> pH -0.337491
Supp -> O -0.351378
OD -> Gtype -0.357504
Gtype -> O -0.378451
Top unlikely transitions:
O -> Gtype 1.086040
Air -> O 0.813429
Technique -> Air 0.731728
O -> Temp 0.588704
O -> Anti 0.562550
Gtype -> pH 0.228949
Med -> O 0.224698
O -> Gversion 0.210688
Temp -> O 0.172212
O -> Med 0.102170
Gtype -> Air 0.095373
OD -> Phase 0.013336
Anti -> OD -0.004256
O -> OD -0.004531
Gtype -> Gversion -0.009932
Phase -> O -0.017336
O -> Phase -0.028487
Phase -> Technique -0.066036
Med -> Air -0.087994
Gtype -> Phase -0.099971
Supp -> Gversion -0.157806
Anti -> O -0.169383
Supp -> Temp -0.179380
Phase -> Air -0.231554
Supp -> OD -0.255465
Supp -> Anti -0.267111
Technique -> O -0.296565
OD -> Med -0.297702
Gtype -> Med -0.322957
Technique -> pH -0.337491
Supp -> O -0.351378
OD -> Gtype -0.357504
Gtype -> O -0.378451
Supp -> Gtype -0.415119
Supp -> Technique -0.546983
Air -> Supp -0.580378
Air -> OD -0.591958
Technique -> Gtype -0.721938
OD -> Air -0.869884
Agit -> O -0.874071
Technique -> OD -0.901946
Gtype -> OD -0.968255
Supp -> Air -0.992187
OD -> O -1.076988
Gtype -> Anti -1.093167
O -> Air -1.094876
Supp -> Med -1.350916
Substrain -> O -1.483392
Phase -> OD -2.458856
Med -> Supp -2.624787
Top positive:
9.234903 Supp b'lemma:Iron'
8.941229 Phase b'lemma:stationary'
8.612848 O b'lemma:_'
8.414915 Air b'lemma:anaerobic'
8.110054 Air b'lemma:aerobic'
7.668526 O b'lemma:1'
7.617369 Supp b'lemma:nitrate'
7.396629 Technique b'lemma:ChIP-exo'
7.004694 O b'lemma:rpob'
6.951208 Strain b'lemma:k-12'
6.929222 Air b'-1:lemma:ChIP-Seq'
6.919087 Supp b'lemma:pq'
6.729357 Med b'lemma:MOPS'
6.630951 Technique b'lemma:chipseq'
6.338936 Gversion b'lemma:asm584v2'
6.275492 O b'-1:lemma:tag'
6.112382 Gtype b'lemma:\xce\xb4cra'
6.001427 O b'lemma:3'
5.997947 O b'lemma:2'
5.921853 O b'lemma:\xcf\x8332'
5.917190 Gtype b'lemma:flag-tag'
5.917190 Gtype b'-1:lemma:c-terminal'
5.881286 Substrain b'lemma:mg1655'
5.839915 Phase b'lemma:mid-log'
5.810496 Air b'lemma:Aerobic'
5.761122 O b'lemma:Custom'
5.570827 O b'lemma:rep1'
5.482824 Med b'lemma:LB'
5.430575 Supp b'+1:lemma:\xc2\xb5m'
5.428880 O b'lemma:rep2'
5.402169 O b'-1:lemma:ChIP-exo'
5.366372 O b'lemma:Cra'
5.339186 Supp b'lemma:nh4cl'
5.259341 Gtype b'lemma:arca8myc'
5.256666 Gtype b'lemma:type'
5.250724 O b'lemma:rep3'
5.250467 Med b'lemma:lb'
5.157840 Supp b'lemma:rifampicin'
5.124923 O b'lemma:b'
5.111909 Gtype b'lemma:delta-arca'
5.091556 OD b'+1:lemma:stationary'
5.048220 Supp b'lemma:glucose'
5.001772 O b'postag:IN'
4.994648 Gversion b'lemma:nc'
4.925252 Gtype b'lemma:fnr8myc'
4.905784 OD b'lemma:od450'
4.901181 Gtype b'+1:lemma:type'
4.837803 Technique b'lemma:rna-seq'
4.828361 Gtype b'lemma:wt'
4.778406 O b'lemma:a'
4.678498 Supp b'lemma:acetate'
4.667864 Anti b'lemma:none'
4.612775 Technique b'lemma:rnaseq'
4.597784 Supp b'-1:lemma:Cra'
4.596321 Technique b'lemma:ChIP-Seq'
4.580488 O b'-1:lemma:0.3-0.35'
4.559265 Gtype b'-1:lemma:\xe2\x88\x86'
4.543048 Supp b'lemma:no3'
4.494990 Supp b'lemma:Fe'
4.475916 O b'lemma:-'
4.465899 Anti b'lemma:seqa'
4.445546 Gtype b'lemma:delta-fnr'
4.372585 OD b'lemma:od600'
4.346955 Technique b'lemma:chip-seq'
4.301785 O b'lemma:or'
4.260295 Gtype b'lemma:\xce\xb4fur'
4.236090 O b'lemma:for'
4.225287 O b'lemma:.'
4.225287 O b'postag:.'
4.161720 Supp b'lemma:dpd'
4.122022 O b'+1:lemma:od600'
4.073219 Vess b'lemma:flask'
4.073219 Vess b'-1:lemma:warm'
4.068384 O b'-1:lemma:anaerobic'
4.063188 Supp b'lemma:arginine'
4.026011 O b'-1:lemma:Aerobic'
4.023826 O b'postag::'
4.014353 Med b'+1:lemma:0.4'
3.983642 O b'lemma:chip'
3.937745 O b'lemma:CEL'
3.912596 Supp b'+1:lemma:1'
3.911905 Anti b'lemma:anti-myc'
3.908162 O b'-1:lemma:glucose'
3.843811 Supp b'lemma:fructose'
3.821238 Gtype b'lemma:nsrr'
3.805915 Gversion b'-1:lemma:nc'
3.773446 Gversion b'lemma:chip-seq'
3.714736 Gtype b'lemma:\xce\xb4ompr'
3.700490 O b'-1:lemma:type'
3.624739 O b'+1:postag:RB'
3.602969 Strain b'+1:lemma:substr'
3.596045 Med b'+1:lemma:minimal'
3.588702 O b'-1:lemma:lb'
3.537935 O b'+1:lemma:o.d.'
3.533153 Gversion b'lemma:000913'
3.531194 Temp b'lemma:\xc2\xb0c'
3.520259 pH b'lemma:ph5'
3.520259 pH b'+1:lemma:.5'
3.511450 Gtype b'lemma:\xe2\x88\x86'
3.509713 Technique b'-1:lemma:IP'
3.496996 Med b'lemma:m63'
3.478233 Anti b'+1:lemma:antibody'
3.475022 Temp b'-1:lemma:sample'
3.455068 Supp b'-1:lemma:+'
3.449758 Anti b'lemma:anti-rpos'
3.439491 Supp b'+1:lemma:2'
3.415465 O b'lemma:oxyr'
3.385223 Gtype b'lemma:wild-type'
3.352842 O b'postag:VBN'
3.343983 Gtype b'+1:lemma:ph5'
3.343718 Air b'postag:RB'
3.343342 Supp b'lemma:nacl'
3.321016 Gtype b'-1:lemma:ptac'
3.317465 Gtype b'-1:lemma:rpob'
3.298959 O b'lemma:with'
3.236711 O b'lemma:s'
3.233948 Temp b'-1:lemma:\xcf\x8332'
3.232171 Supp b'+1:lemma:hour'
3.226682 O b'-1:lemma:0.3'
3.213829 Gversion b'lemma:u00096'
3.213829 Gversion b'+1:lemma:.2'
3.176657 O b'lemma:ompr'
3.173449 Gversion b'lemma:.2'
3.173449 Gversion b'-1:lemma:u00096'
3.148343 Gtype b'+1:lemma:with'
3.113186 O b'lemma:at'
3.099394 Gtype b'lemma:\xce\xb4soxs'
3.081492 Gtype b'lemma:pk4854'
3.081097 Supp b'+1:lemma:_'
3.076137 Air b'lemma:Anaerobic'
3.073681 O b'lemma:Lrp'
3.064586 Phase b'+1:lemma:for'
3.047746 O b'+1:lemma:chip-seq'
3.047210 Gtype b'lemma:ptac'
3.045585 Phase b'-1:lemma:until'
3.043072 O b'-1:lemma:dpd'
3.032395 Gtype b'lemma:WT'
3.021945 Technique b'+1:lemma:chip-exo'
3.010904 Supp b'lemma:iptg'
3.001563 Anti b'+1:lemma:polyclonal'
2.979219 Supp b'lemma:Leu'
2.976675 O b'lemma:chip-arca'
2.972333 Gtype b'lemma:deltaseqa'
2.972333 Gtype b'-1:lemma:old'
2.971020 O b'lemma:affyexp'
2.960652 Air b'lemma:anerobically'
2.938684 Supp b'+1:lemma:Deficient'
2.913605 Technique b'-1:lemma:chip-exo'
2.890017 O b'lemma:argr'
2.883731 O b'+1:lemma:sparging'
2.880763 Temp b'-1:lemma:43'
2.874818 Med b'-1:lemma:ml'
2.860187 pH b'lemma:.5'
2.860187 pH b'-1:lemma:ph5'
2.852362 Gtype b'+1:lemma:pq'
2.822626 O b'lemma:soxs'
2.822626 O b'lemma:soxr'
2.796727 Temp b'lemma:43'
2.764833 Med b'lemma:L'
2.764833 Med b'+1:lemma:broth'
2.762054 O b'+1:postag:NNP'
2.755377 O b'-1:lemma:stpa'
2.737365 Supp b'-1:lemma:\xc2\xb5m'
2.727294 Temp b'lemma:37'
2.708825 Med b'-1:lemma:LB'
2.705181 O b'-1:lemma:\xc2\xb0c'
2.692331 O b'+1:lemma:pq'
2.691434 Substrain b'+1:lemma:phtpg'
2.690410 O b'lemma:purr'
2.677844 Technique b'+1:lemma:rna-seq'
2.650316 Technique b'-1:lemma:input'
2.638625 Supp b'lemma:methanol'
2.634944 Med b'+1:lemma:contain'
2.625097 Supp b'lemma:Adenine'
2.623356 O b'-1:lemma:l1'
2.607859 O b'postag:DT'
2.598485 Med b'+1:lemma:2.0'
2.589920 Gtype b'-1:lemma:phtpg'
2.581102 Med b'lemma:glucose'
2.575292 Temp b'-1:lemma:37'
2.558206 Gtype b'lemma:\xce\xb4soxr'
2.550759 Gtype b'+1:lemma:flagtag'
2.550220 Gtype b'lemma:\xce\xb4oxyr'
2.549389 Gtype b'+1:lemma:aerobic'
2.517172 Supp b'lemma:leucine'
2.411127 O b'-1:lemma:media'
2.376812 O b'lemma:2-3'
2.368251 Med b'lemma:broth'
2.368251 Med b'-1:lemma:L'
2.353760 Gtype b'-1:lemma:_'
2.331107 O b'lemma:culture'
2.315361 Med b'postag:NNP'
2.299371 O b'lemma:Fur'
2.288936 Supp b'+1:lemma:iptg'
2.280717 Air b'lemma:anaerobically'
2.278786 Air b'-1:lemma:-'
2.266514 O b'lemma:genotype/variation'
2.259722 OD b'lemma:0.3'
2.256007 Technique b'postag:NNP'
2.224072 Air b'+1:lemma:at'
Top negative:
-0.002409 Med b'lemma:-lrb-'
-0.002765 O b'+1:lemma:minimal'
-0.002819 Med b'postag:-LRB-'
-0.002883 Supp b'-1:lemma:-'
-0.003857 Agit b'postag:NN'
-0.005100 Air b'-1:lemma:anaerobically'
-0.007289 O b'lemma:lb'
-0.010248 O b'lemma:mg/ml'
-0.010248 O b'-1:lemma:150'
-0.010774 Supp b'+1:postag:CD'
-0.011526 Air b'-1:lemma:95'
-0.011818 Air b'-1:postag:CC'
-0.012619 Supp b'lemma:mm'
-0.013393 O b'-1:lemma:1'
-0.013525 O b'+1:lemma:rep1'
-0.013581 Gtype b'-1:postag:SYM'
-0.013782 OD b'+1:lemma:-lrb-'
-0.015901 OD b'-1:postag:DT'
-0.015963 O b'-1:lemma:70'
-0.016924 OD b'+1:postag:-LRB-'
-0.019258 O b'lemma:m63'
-0.020349 Gversion b'-1:postag:NN'
-0.021440 OD b'+1:lemma:0.4'
-0.022594 Air b'-1:lemma:or'
-0.022941 OD b'-1:lemma:a'
-0.026771 O b'lemma:glucose'
-0.027346 O b'+1:lemma:25'
-0.028054 OD b'lemma:~'
-0.029216 Gtype b'+1:lemma:cra'
-0.030932 Gtype b'-1:postag:NN'
-0.031890 O b'+1:lemma:~'
-0.033995 O b'+1:lemma:95'
-0.035517 O b'-1:lemma:-lrb-'
-0.036049 O b'lemma:pahse'
-0.036135 O b'lemma:purify'
-0.036681 Gtype b'-1:postag:CD'
-0.038170 Med b'-1:lemma:m63'
-0.042355 O b'+1:lemma:antibody'
-0.042769 O b'+1:lemma:0.4'
-0.043812 O b'-1:lemma:25'
-0.050331 Phase b'-1:lemma:at'
-0.051370 O b'+1:lemma:5'
-0.052602 Phase b'+1:postag:NN'
-0.053544 OD b'+1:postag:CD'
-0.055596 O b'+1:lemma:o2'
-0.056365 O b'lemma:25'
-0.065474 O b'-1:lemma:o2'
-0.070061 Gtype b'-1:postag:DT'
-0.070234 O b'-1:lemma:the'
-0.072402 O b'+1:lemma:shake'
-0.079220 Strain b'+1:postag:NN'
-0.079451 O b'+1:lemma:grow'
-0.084137 Gtype b'lemma:_'
-0.085976 O b'+1:lemma:mm'
-0.093102 Med b'+1:postag:CC'
-0.095082 Air b'postag:CD'
-0.097194 O b'lemma:o2'
-0.101822 O b'-1:lemma:30'
-0.102523 O b'-1:postag:VBN'
-0.107653 O b'lemma:cell'
-0.112633 Supp b'-1:postag:NN'
-0.116970 O b'lemma:grow'
-0.120130 O b'lemma:30'
-0.121820 Supp b'+1:postag:IN'
-0.124237 Supp b'-1:postag:NNP'
-0.125482 O b'-1:lemma:mm'
-0.128019 O b'+1:lemma:300'
-0.129344 O b'lemma:\xc2\xb0c'
-0.144557 Med b'-1:postag:NN'
-0.144770 O b'-1:postag:IN'
-0.146162 O b'-1:lemma:of'
-0.149674 Gtype b'+1:lemma:\xe2\x88\x86'
-0.154228 OD b'postag:JJ'
-0.167420 O b'lemma:n2'
-0.171707 O b'lemma:co2'
-0.172528 O b'lemma:medium'
-0.179654 Med b'+1:postag:NNS'
-0.184135 O b'-1:lemma:n2'
-0.189770 O b'+1:lemma:-rrb-'
-0.192678 O b'lemma:aerobically'
-0.195762 O b'-1:lemma:with'
-0.196208 O b'-1:lemma:ml'
-0.204813 Gtype b'-1:lemma:mg1655'
-0.205462 O b'lemma:mg1655'
-0.206806 O b'-1:lemma:e.'
-0.207893 O b'lemma:minimal'
-0.224766 Temp b'postag:JJ'
-0.225885 O b'+1:lemma:.'
-0.225885 O b'+1:postag:.'
-0.227519 O b'lemma:e.'
-0.236166 O b'-1:lemma:rifampicin'
-0.247403 O b'-1:lemma:0.2'
-0.259717 Phase b'-1:postag:NN'
-0.263053 Supp b'lemma:and'
-0.270443 O b'-1:postag:-LRB-'
-0.283380 O b'-1:lemma:um'
-0.283380 O b'+1:lemma:paraquat'
-0.289228 Med b'postag:CD'
-0.290905 O b'-1:lemma:0.1'
-0.295102 OD b'+1:lemma:and'
-0.300641 O b'-1:lemma:until'
-0.306863 O b'+1:lemma:phase'
-0.322631 O b'lemma:phase'
-0.347838 O b'+1:lemma:until'
-0.351130 O b'+1:lemma:_'
-0.365836 O b'-1:lemma:\xe2\x88\x86'
-0.367298 Anti b'+1:postag:JJ'
-0.375233 Gtype b'postag:CD'
-0.377830 Supp b'postag:CC'
-0.381771 O b'postag:RB'
-0.385165 Technique b'-1:lemma::'
-0.387729 O b'+1:postag:IN'
-0.405861 O b'+1:postag:-RRB-'
-0.419130 O b'-1:lemma:from'
-0.425366 O b'-1:lemma:od600'
-0.432532 O b'-1:lemma:cra'
-0.441409 Temp b'postag:NN'
-0.447218 O b'+1:lemma:arginine'
-0.454986 O b'+1:lemma:%'
-0.469664 Med b'+1:postag:IN'
-0.481296 Technique b'-1:postag::'
-0.482628 Gversion b'+1:postag:NN'
-0.484056 O b'-1:lemma:grow'
-0.485046 pH b'postag:NN'
-0.490017 Med b'+1:postag:NN'
-0.496154 O b'lemma:od600'
-0.500947 Supp b'+1:postag:VBN'
-0.517928 O b'+1:lemma:cell'
-0.520851 O b'-1:lemma:sample'
-0.520873 OD b'+1:postag:CC'
-0.521577 O b'+1:lemma:\xc2\xb0c'
-0.525190 Anti b'+1:lemma:anti-fur'
-0.527700 O b'-1:lemma:mid-log'
-0.533028 O b'lemma:dissolve'
-0.550892 O b'lemma:150'
-0.550892 O b'+1:lemma:mg/ml'
-0.551731 Phase b'-1:postag:JJ'
-0.571169 O b'+1:lemma:0.3'
-0.600234 O b'postag:VBP'
-0.623446 O b'lemma:anaerobic'
-0.655760 Gtype b'lemma:delta'
-0.668609 Supp b'+1:lemma:rifampicin'
-0.673715 O b'lemma:\xce\xb4fur'
-0.724036 OD b'+1:postag:NN'
-0.742000 O b'+1:postag:NNS'
-0.743016 O b'+1:lemma:c'
-0.767018 O b'-1:lemma:~'
-0.767765 Air b'-1:postag:JJ'
-0.779655 O b'lemma:media'
-0.790170 O b'lemma:\xe2\x88\x86'
-0.816065 O b'lemma:fecl2'
-0.817183 O b'lemma:dpd'
-0.835270 O b'lemma:0.3'
-0.843288 OD b'lemma:-lrb-'
-0.853171 O b'+1:lemma:fecl2'
-0.889568 O b'lemma:0.2'
-0.905209 OD b'postag:-LRB-'
-0.911568 O b'lemma:anaerobically'
-0.915420 O b'lemma:of'
-0.950583 Temp b'+1:lemma:to'
-0.950583 Temp b'+1:postag:TO'
-0.951305 O b'lemma:20'
-0.951858 O b'-1:lemma:co2'
-1.045949 O b'lemma:0.1'
-1.056174 O b'+1:lemma:supplement'
-1.088532 Supp b'+1:lemma:acetate'
-1.092708 O b'+1:postag:VBG'
-1.103190 O b'-1:postag:VBG'
-1.119328 O b'lemma:2h'
-1.119328 O b'-1:lemma:additional'
-1.131219 Supp b'-1:lemma:%'
-1.142227 O b'-1:lemma:rpob'
-1.149700 O b'lemma:mid-log'
-1.157098 Med b'-1:postag:IN'
-1.158833 O b'-1:lemma:dissolve'
-1.158833 O b'+1:lemma:methanol'
-1.159433 O b'-1:postag::'
-1.197832 Air b'postag:NN'
-1.224684 O b'-1:lemma:37'
-1.250547 O b'+1:lemma:g/l'
-1.287510 O b'+1:lemma:at'
-1.343885 O b'+1:lemma:+'
-1.550316 Air b'+1:postag:JJ'
-1.550420 O b'+1:lemma:in'
-1.655179 O b'-1:lemma:IP'
-1.671646 Anti b'postag:NNP'
-1.676942 O b'lemma:37'
-1.709933 O b'lemma:rifampicin'
-1.740594 O b'-1:lemma:nsrr'
-1.813332 O b'lemma:wt'
-1.837591 Supp b'postag:JJ'
-1.875538 O b'-1:lemma:ompr'
-1.956251 O b'+1:lemma:hour'
-2.104216 O b'-1:lemma:2'
-2.173171 Phase b'postag:JJ'
-2.198016 O b'+1:lemma:2'
-2.302343 O b'lemma:methanol'
-2.686054 O b'+1:lemma:1'
-3.664514 O b'-1:lemma:_'
-4.711592 O b'-1:lemma::'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.15174828379035918, 'c2': 0.004631150546332649}
best CV score:0.8039437168420447
model size: 0.06M
Flat F1: 0.7844164550442063
precision recall f1-score support
OD 1.000 0.405 0.577 37
pH 1.000 1.000 1.000 12
Technique 0.952 0.909 0.930 22
Med 0.800 0.842 0.821 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.947 0.973 19
Air 0.807 0.742 0.773 62
Anti 0.571 0.444 0.500 9
Strain 1.000 1.000 1.000 1
Gtype 0.866 0.792 0.828 106
Substrain 0.000 0.000 0.000 1
Supp 0.807 0.676 0.736 136
Gversion 0.000 0.000 0.000 0
avg / total 0.848 0.746 0.784 480
Top likely transitions:
Agit -> Agit 6.791682
OD -> OD 6.218349
Med -> Med 5.630081
Temp -> Temp 5.606252
Anti -> Anti 5.592992
Air -> Air 5.496160
Gtype -> Gtype 5.032489
O -> O 4.908086
Phase -> Phase 4.730701
Gversion -> Gversion 4.476840
Technique -> Technique 4.434337
Supp -> Supp 4.084657
pH -> pH 1.968754
Substrain -> Gtype 1.897449
O -> Supp 1.725753
Air -> O 1.650897
O -> Gtype 1.589697
Gtype -> Supp 1.521503
O -> Technique 1.404884
Technique -> Air 0.905351
O -> Temp 0.873962
O -> Anti 0.719006
Med -> O 0.502394
O -> Gversion 0.498028
Temp -> O 0.179242
OD -> Phase 0.164045
O -> Phase 0.156883
Phase -> O 0.137263
Gtype -> Air 0.075321
Gtype -> pH 0.063986
O -> Med 0.051856
O -> Strain 0.030496
Supp -> O 0.016375
Anti -> O 0.009641
O -> OD -0.006635
OD -> Med -0.051166
Technique -> OD -0.093116
Supp -> OD -0.100162
Supp -> Air -0.273856
Gtype -> O -0.276598
Technique -> Gtype -0.372736
Gtype -> Anti -0.373755
Gtype -> Med -0.384161
OD -> O -0.433058
Agit -> O -0.645946
O -> Air -0.659946
Substrain -> O -0.912411
Gtype -> OD -0.984268
Supp -> Med -1.212185
Med -> Supp -1.967819
Top unlikely transitions:
OD -> OD 6.218349
Med -> Med 5.630081
Temp -> Temp 5.606252
Anti -> Anti 5.592992
Air -> Air 5.496160
Gtype -> Gtype 5.032489
O -> O 4.908086
Phase -> Phase 4.730701
Gversion -> Gversion 4.476840
Technique -> Technique 4.434337
Supp -> Supp 4.084657
pH -> pH 1.968754
Substrain -> Gtype 1.897449
O -> Supp 1.725753
Air -> O 1.650897
O -> Gtype 1.589697
Gtype -> Supp 1.521503
O -> Technique 1.404884
Technique -> Air 0.905351
O -> Temp 0.873962
O -> Anti 0.719006
Med -> O 0.502394
O -> Gversion 0.498028
Temp -> O 0.179242
OD -> Phase 0.164045
O -> Phase 0.156883
Phase -> O 0.137263
Gtype -> Air 0.075321
Gtype -> pH 0.063986
O -> Med 0.051856
O -> Strain 0.030496
Supp -> O 0.016375
Anti -> O 0.009641
O -> OD -0.006635
OD -> Med -0.051166
Technique -> OD -0.093116
Supp -> OD -0.100162
Supp -> Air -0.273856
Gtype -> O -0.276598
Technique -> Gtype -0.372736
Gtype -> Anti -0.373755
Gtype -> Med -0.384161
OD -> O -0.433058
Agit -> O -0.645946
O -> Air -0.659946
Substrain -> O -0.912411
Gtype -> OD -0.984268
Supp -> Med -1.212185
Med -> Supp -1.967819
Phase -> OD -2.190300
Top positive:
10.442312 Supp b'lemma:Iron'
9.237513 O b'lemma:_'
8.066306 Phase b'lemma:stationary'
8.024906 Air b'lemma:aerobic'
7.908903 O b'lemma:1'
7.889018 Supp b'lemma:nitrate'
7.674820 Air b'lemma:anaerobic'
7.633175 Technique b'lemma:ChIP-exo'
6.899479 Strain b'lemma:k-12'
6.830547 Med b'lemma:MOPS'
6.657016 O b'lemma:rpob'
6.360670 O b'lemma:2'
6.250166 Gtype b'lemma:type'
6.190584 Substrain b'lemma:mg1655'
6.146067 Air b'-1:lemma:ChIP-Seq'
6.024964 Gversion b'lemma:asm584v2'
5.999859 Technique b'lemma:chipseq'
5.989574 Phase b'lemma:mid-log'
5.930186 Supp b'lemma:pq'
5.914739 O b'lemma:3'
5.824268 O b'lemma:Custom'
5.823157 O b'-1:lemma:tag'
5.790768 O b'-1:lemma:ChIP-exo'
5.787399 Gtype b'lemma:\xce\xb4cra'
5.679226 Air b'lemma:Aerobic'
5.626571 OD b'lemma:od600'
5.547367 Technique b'lemma:ChIP-Seq'
5.529761 O b'lemma:rep2'
5.480516 Med b'lemma:LB'
5.426406 O b'lemma:rep1'
5.393841 Gversion b'lemma:nc'
5.303166 Gtype b'lemma:flag-tag'
5.303166 Gtype b'-1:lemma:c-terminal'
5.281475 Supp b'+1:lemma:\xc2\xb5m'
5.272063 O b'lemma:b'
5.200310 O b'lemma:rep3'
5.142193 OD b'lemma:od450'
5.096163 O b'lemma:\xcf\x8332'
4.990214 O b'lemma:Cra'
4.926222 O b'lemma:a'
4.919918 Gtype b'-1:lemma:\xe2\x88\x86'
4.861544 Supp b'lemma:nh4cl'
4.812167 Gtype b'+1:lemma:type'
4.803668 Gtype b'lemma:wt'
4.778995 Gtype b'lemma:delta-arca'
4.686613 Gtype b'lemma:arca8myc'
4.679240 O b'postag::'
4.678684 O b'postag:IN'
4.626493 OD b'+1:lemma:stationary'
4.555800 Med b'lemma:lb'
4.525977 Gversion b'-1:lemma:nc'
4.514336 Supp b'lemma:glucose'
4.505232 Supp b'lemma:Fe'
4.499806 Gtype b'lemma:fnr8myc'
4.365930 Technique b'lemma:rna-seq'
4.353053 Supp b'lemma:rifampicin'
4.322748 Gtype b'lemma:\xce\xb4fur'
4.244815 Anti b'lemma:none'
4.188590 O b'-1:lemma:Aerobic'
4.181353 Temp b'-1:lemma:sample'
4.061718 Supp b'lemma:dpd'
4.014721 Gtype b'lemma:delta-fnr'
3.991603 Technique b'lemma:rnaseq'
3.988938 O b'lemma:-'
3.968688 O b'-1:lemma:0.3-0.35'
3.942988 Technique b'lemma:chip-seq'
3.894918 Supp b'lemma:acetate'
3.894666 O b'lemma:or'
3.891632 O b'lemma:.'
3.891632 O b'postag:.'
3.833478 Supp b'lemma:arginine'
3.829349 Supp b'lemma:no3'
3.814064 Temp b'-1:lemma:\xcf\x8332'
3.734701 Gtype b'+1:lemma:ph5'
3.730485 Gtype b'lemma:\xce\xb4ompr'
3.716590 Anti b'lemma:seqa'
3.692742 Gtype b'lemma:wild-type'
3.691658 Vess b'lemma:flask'
3.691658 Vess b'-1:lemma:warm'
3.680323 O b'+1:lemma:od600'
3.566813 pH b'lemma:ph5'
3.566813 pH b'+1:lemma:.5'
3.560406 Gtype b'lemma:nsrr'
3.535215 O b'-1:lemma:type'
3.524717 O b'+1:postag:RB'
3.508650 Supp b'lemma:fructose'
3.483038 O b'lemma:chip'
3.461944 Supp b'-1:lemma:Cra'
3.457426 Strain b'+1:lemma:substr'
3.436475 Supp b'+1:lemma:1'
3.379628 Anti b'lemma:anti-myc'
3.377053 Anti b'+1:lemma:antibody'
3.370766 Technique b'-1:lemma:IP'
3.365056 Gtype b'lemma:\xe2\x88\x86'
3.358640 O b'-1:lemma:anaerobic'
3.348760 O b'lemma:CEL'
3.347149 Med b'+1:lemma:0.4'
3.346755 Anti b'lemma:anti-rpos'
3.291412 Gtype b'-1:lemma:ptac'
3.278055 Gversion b'lemma:chip-seq'
3.276131 O b'-1:lemma:0.3'
3.234361 O b'-1:lemma:glucose'
3.215739 O b'lemma:with'
3.173824 Technique b'+1:lemma:chip-exo'
3.151844 Med b'lemma:m63'
3.095334 O b'lemma:2-3'
3.095161 Gversion b'lemma:.2'
3.095161 Gversion b'-1:lemma:u00096'
3.091572 O b'postag:VBN'
3.064702 Gversion b'lemma:u00096'
3.064702 Gversion b'+1:lemma:.2'
3.050277 Temp b'-1:lemma:37'
3.030666 Gtype b'+1:lemma:with'
3.027065 Supp b'+1:lemma:hour'
3.014173 O b'+1:lemma:chip-seq'
3.004960 Gtype b'+1:lemma:flagtag'
2.985168 O b'lemma:for'
2.975989 Supp b'-1:lemma:+'
2.944441 O b'lemma:oxyr'
2.938101 Gtype b'-1:lemma:phtpg'
2.932689 Gtype b'-1:lemma:rpob'
2.920019 Gtype b'+1:lemma:pq'
2.881207 Supp b'lemma:nacl'
2.860299 O b'lemma:s'
2.845172 Supp b'+1:lemma:2'
2.832035 pH b'lemma:.5'
2.832035 pH b'-1:lemma:ph5'
2.826929 Supp b'lemma:Leu'
2.812269 O b'+1:postag:NNP'
2.781059 Air b'lemma:anaerobically'
2.778609 O b'-1:lemma:lb'
2.760280 O b'+1:lemma:o.d.'
2.735776 Temp b'lemma:\xc2\xb0c'
2.720218 O b'lemma:at'
2.711579 Technique b'-1:lemma:chip-exo'
2.699401 O b'lemma:ompr'
2.690406 Air b'-1:lemma:-'
2.684684 Gtype b'lemma:\xce\xb4soxs'
2.665215 Supp b'+1:lemma:_'
2.660630 Phase b'-1:lemma:until'
2.607235 Med b'lemma:L'
2.607235 Med b'+1:lemma:broth'
2.606476 Med b'+1:lemma:minimal'
2.603782 O b'+1:lemma:pq'
2.599075 Supp b'-1:lemma:\xc2\xb5m'
2.567370 O b'-1:lemma:\xc2\xb0c'
2.524994 Supp b'lemma:Adenine'
2.522705 Gtype b'lemma:deltaseqa'
2.522705 Gtype b'-1:lemma:old'
2.484836 Temp b'-1:lemma:43'
2.470532 Supp b'lemma:20'
2.469573 O b'lemma:culture'
2.450105 Technique b'-1:lemma:input'
2.449141 O b'postag:DT'
2.444936 Med b'+1:lemma:2.0'
2.421299 Med b'-1:lemma:ml'
2.419343 Temp b'lemma:43'
2.410866 O b'+1:lemma:mid-log'
2.405996 Supp b'lemma:iptg'
2.397598 Air b'postag:RB'
2.397388 Med b'+1:lemma:g/l'
2.360509 Gtype b'-1:lemma:nsrr'
2.349789 O b'lemma:condition'
2.348126 Gtype b'lemma:ptac'
2.347726 Gversion b'lemma:000913'
2.337057 O b'lemma:chip-arca'
2.336687 Gtype b'lemma:pk4854'
2.327964 Med b'lemma:media'
2.272835 O b'-1:lemma:stpa'
2.265750 O b'lemma:Lrp'
2.265406 O b'lemma:soxs'
2.265406 O b'lemma:soxr'
2.262228 O b'postag:VBG'
2.253322 O b'-1:lemma:l1'
2.237857 Air b'lemma:anerobically'
2.184062 O b'-1:lemma:media'
2.159204 Supp b'-1:lemma:with'
2.158856 O b'lemma:genotype/variation'
2.157878 O b'-1:lemma:dpd'
2.152540 Gtype b'lemma:dfnr'
2.148134 O b'lemma:affyexp'
2.137699 O b'postag:CC'
2.111648 Supp b'lemma:methanol'
2.108494 O b'+1:postag:VBP'
2.075540 OD b'lemma:phase'
2.075417 Gversion b'postag:CD'
2.068323 Supp b'lemma:0.2'
2.064252 Gtype b'-1:lemma:vector'
2.056563 OD b'lemma:0.3'
2.050748 OD b'-1:lemma:~'
2.050625 Gtype b'lemma:WT'
2.032571 Temp b'lemma:37'
2.018067 Gtype b'+1:lemma:_'
2.017840 O b'lemma:Fur'
2.011856 O b'lemma:argr'
2.006861 Technique b'+1:lemma:rna-seq'
1.998918 O b'lemma:purr'
1.996704 Phase b'+1:lemma:for'
1.989638 Med b'lemma:glucose'
1.943868 Phase b'-1:lemma:mid-log'
Top negative:
0.000312 OD b'-1:lemma:mid-log'
0.000096 OD b'-1:postag:VBN'
0.000087 Gtype b'-1:lemma:small'
0.000087 Med b'-1:lemma:g/l'
0.000084 Agit b'+1:postag:NN'
0.000056 O b'+1:lemma:ChIP-Seq'
0.000053 Supp b'-1:lemma:1g/l'
0.000051 O b'lemma:hour'
0.000051 Gtype b'lemma:cra'
0.000036 OD b'lemma:and'
0.000021 Temp b'lemma:-lrb-'
0.000021 Temp b'postag:-LRB-'
0.000019 O b'-1:lemma:l2'
0.000014 Technique b'+1:lemma:-rrb-'
0.000014 Technique b'+1:postag:-RRB-'
0.000007 Gtype b'lemma:small'
-0.000002 Gversion b'-1:postag:NN'
-0.000046 O b'-1:lemma:g/l'
-0.000070 Supp b'-1:lemma:and'
-0.000073 O b'-1:lemma:1'
-0.000129 Med b'postag:CD'
-0.000455 O b'-1:lemma:20'
-0.000511 OD b'+1:postag:-LRB-'
-0.000708 OD b'+1:postag:NNS'
-0.000735 Gtype b'postag:CD'
-0.000807 Gtype b'+1:postag:NNS'
-0.000867 O b'+1:postag:CD'
-0.000974 Gtype b'-1:postag:CD'
-0.001057 O b'+1:lemma:~'
-0.001061 O b'+1:lemma:fnr'
-0.001528 Air b'+1:lemma:70'
-0.001678 O b'-1:lemma:the'
-0.001682 O b'postag:VBP'
-0.002024 O b'+1:lemma:min'
-0.002325 Phase b'-1:postag:NN'
-0.002352 O b'lemma:n2'
-0.002519 O b'lemma:\xce\xb4fur'
-0.002662 Air b'lemma:,'
-0.002662 Air b'postag:,'
-0.005486 Gtype b'+1:lemma:2'
-0.006088 O b'+1:lemma:o2'
-0.006828 Air b'-1:lemma:and'
-0.008457 Temp b'-1:lemma:\xc2\xb0c'
-0.009847 O b'-1:lemma:,'
-0.009847 O b'-1:postag:,'
-0.010744 O b'+1:lemma:phase'
-0.011644 O b'lemma:cell'
-0.011902 Gtype b'-1:postag:NNP'
-0.013075 OD b'+1:lemma:and'
-0.013273 O b'-1:lemma:\xe2\x88\x86'
-0.013803 O b'lemma:e.'
-0.013807 O b'-1:lemma:sample'
-0.013821 O b'+1:lemma:mm'
-0.014710 O b'+1:lemma:rep1'
-0.015201 Air b'+1:lemma:-lrb-'
-0.015350 OD b'+1:postag:CD'
-0.017836 O b'+1:lemma:grow'
-0.019975 O b'lemma:anaerobic'
-0.024268 Air b'+1:postag:CD'
-0.025672 O b'lemma:medium'
-0.025888 O b'+1:lemma:95'
-0.026118 O b'lemma:k-12'
-0.027112 O b'+1:lemma:25'
-0.029447 Air b'postag:CC'
-0.030441 O b'lemma:m63'
-0.030598 OD b'+1:postag:CC'
-0.032367 O b'lemma:grow'
-0.035584 Air b'+1:postag:-LRB-'
-0.039003 O b'lemma:mg/ml'
-0.039003 O b'-1:lemma:150'
-0.041109 O b'-1:lemma:e.'
-0.043248 Supp b'-1:postag:NN'
-0.049539 Gversion b'+1:postag:NN'
-0.050421 O b'lemma:25'
-0.053998 Temp b'postag:JJ'
-0.060680 O b'-1:lemma:at'
-0.060890 Phase b'-1:postag:JJ'
-0.061474 O b'lemma:lb'
-0.064297 Med b'-1:postag:NN'
-0.065687 O b'-1:postag:VBN'
-0.067931 O b'-1:lemma:o2'
-0.068731 O b'+1:lemma:5'
-0.069126 O b'+1:lemma:shake'
-0.069173 O b'-1:lemma:25'
-0.071503 Air b'-1:postag:CC'
-0.075779 O b'+1:lemma:0.4'
-0.077365 Air b'-1:lemma:or'
-0.080349 O b'lemma:30'
-0.087136 O b'-1:lemma:mm'
-0.089014 pH b'postag:NN'
-0.089177 O b'lemma:o2'
-0.097371 Supp b'lemma:and'
-0.101711 Gtype b'-1:postag:DT'
-0.104337 O b'+1:lemma:antibody'
-0.113298 O b'-1:lemma:um'
-0.113298 O b'+1:lemma:paraquat'
-0.118293 Phase b'+1:postag:NN'
-0.119989 O b'-1:lemma:30'
-0.126122 Phase b'-1:lemma:at'
-0.135089 Supp b'postag:CC'
-0.137129 O b'-1:postag:IN'
-0.138660 O b'+1:lemma:arginine'
-0.139143 O b'+1:lemma:until'
-0.162860 O b'lemma:150'
-0.162860 O b'+1:lemma:mg/ml'
-0.164772 OD b'postag:JJ'
-0.171975 Temp b'postag:NN'
-0.183842 O b'lemma:phase'
-0.184042 O b'+1:lemma:-rrb-'
-0.185930 O b'-1:lemma:-lrb-'
-0.185933 O b'-1:lemma:rifampicin'
-0.195035 Med b'-1:postag:IN'
-0.204004 Anti b'+1:postag:JJ'
-0.210272 Gtype b'lemma:delta'
-0.217084 Supp b'+1:postag:VBN'
-0.218515 O b'lemma:co2'
-0.219042 O b'-1:lemma:of'
-0.220276 O b'+1:lemma:%'
-0.228465 Med b'+1:postag:NN'
-0.229909 O b'-1:lemma:n2'
-0.230847 OD b'+1:postag:NN'
-0.248221 O b'-1:lemma:ml'
-0.250359 Anti b'+1:lemma:anti-fur'
-0.279734 O b'+1:lemma:0.3'
-0.283648 O b'-1:postag:-LRB-'
-0.286892 O b'-1:lemma:mid-log'
-0.312152 Gtype b'-1:postag:NN'
-0.322232 O b'-1:lemma:0.1'
-0.331861 O b'+1:postag:-RRB-'
-0.336159 O b'-1:lemma:from'
-0.340286 Med b'+1:postag:IN'
-0.341633 O b'+1:postag:IN'
-0.355453 O b'-1:lemma:cra'
-0.366315 O b'+1:lemma:.'
-0.366315 O b'+1:postag:.'
-0.376920 O b'postag:RB'
-0.405053 O b'lemma:\xe2\x88\x86'
-0.424434 O b'+1:postag:NNS'
-0.430071 O b'lemma:aerobically'
-0.440727 Air b'postag:CD'
-0.446825 O b'lemma:anaerobically'
-0.447447 O b'-1:lemma:od600'
-0.465924 O b'+1:lemma:cell'
-0.468267 O b'lemma:od600'
-0.472878 O b'lemma:dissolve'
-0.502409 O b'lemma:20'
-0.508433 O b'-1:lemma:~'
-0.524904 Air b'-1:postag:JJ'
-0.543671 O b'+1:lemma:fecl2'
-0.554628 O b'-1:lemma:0.2'
-0.556138 Temp b'+1:lemma:to'
-0.556138 Temp b'+1:postag:TO'
-0.589220 Supp b'+1:lemma:rifampicin'
-0.610212 O b'lemma:0.3'
-0.622831 O b'lemma:media'
-0.657974 O b'-1:lemma:37'
-0.677014 Supp b'-1:lemma:%'
-0.678005 O b'+1:lemma:\xc2\xb0c'
-0.707832 O b'lemma:of'
-0.722898 O b'-1:lemma:rpob'
-0.723244 O b'+1:lemma:+'
-0.755790 OD b'lemma:-lrb-'
-0.789730 O b'lemma:0.2'
-0.814406 O b'-1:lemma:grow'
-0.909402 O b'-1:postag::'
-0.915529 O b'-1:postag:VBG'
-0.926818 Technique b'-1:postag::'
-0.933497 OD b'postag:-LRB-'
-0.980631 O b'lemma:fecl2'
-1.017727 O b'-1:lemma:dissolve'
-1.017727 O b'+1:lemma:methanol'
-1.020513 O b'lemma:2h'
-1.020513 O b'-1:lemma:additional'
-1.034839 O b'lemma:37'
-1.121637 O b'lemma:0.1'
-1.205775 O b'+1:lemma:supplement'
-1.230075 Supp b'+1:lemma:acetate'
-1.250120 O b'+1:postag:VBG'
-1.256564 O b'lemma:wt'
-1.276371 O b'+1:lemma:g/l'
-1.304341 O b'-1:lemma:IP'
-1.328183 O b'-1:lemma:co2'
-1.391087 O b'-1:lemma:ompr'
-1.452522 Anti b'postag:NNP'
-1.489210 O b'+1:lemma:at'
-1.603327 Air b'+1:postag:JJ'
-1.726036 O b'lemma:mid-log'
-1.737094 O b'lemma:rifampicin'
-1.758957 O b'-1:lemma:nsrr'
-1.762287 Supp b'postag:JJ'
-1.770399 Air b'postag:NN'
-1.810971 O b'+1:lemma:hour'
-2.081711 Phase b'postag:JJ'
-2.115674 O b'lemma:methanol'
-2.200974 O b'+1:lemma:in'
-2.263160 O b'+1:lemma:2'
-2.388655 O b'-1:lemma:2'
-2.590523 O b'+1:lemma:1'
-4.545984 O b'-1:lemma::'
-4.681648 O b'-1:lemma:_'
********** TRAINING AND TESTING REPORT **********
Training file: training-data-set-70_v4.txt
best params:{'c1': 0.2964346107181682, 'c2': 0.033092970074011845}
best CV score:0.7978231610325259
model size: 0.07M
Flat F1: 0.8182231777447608
precision recall f1-score support
OD 0.769 0.400 0.526 25
pH 1.000 1.000 1.000 12
Technique 1.000 0.909 0.952 22
Med 0.897 0.912 0.904 57
Temp 0.818 1.000 0.900 18
Vess 0.000 0.000 0.000 0
Agit 0.000 0.000 0.000 0
Phase 1.000 0.789 0.882 19
Air 0.763 0.763 0.763 59
Anti 0.875 0.778 0.824 9
Strain 1.000 1.000 1.000 1
Gtype 0.810 0.833 0.821 102
Substrain 0.000 0.000 0.000 1
Supp 0.879 0.740 0.803 127
Gversion 0.000 0.000 0.000 0
avg / total 0.854 0.794 0.818 452
Top likely transitions:
OD -> OD 6.341340
Agit -> Agit 5.796664
Med -> Med 5.137165
Temp -> Temp 4.995762
Anti -> Anti 4.990782
Gtype -> Gtype 4.240477
Gversion -> Gversion 4.225898
Supp -> Supp 4.204534
Air -> Air 3.999870
Phase -> Phase 3.766897
Technique -> Technique 3.408533
O -> O 3.070414
pH -> pH 2.382635
O -> Supp 1.706269
Gtype -> Supp 1.701317
Phase -> Supp 1.187792
Air -> Temp 1.062230
O -> Gtype 1.028035
O -> Technique 0.914562
Vess -> Temp 0.909296
Substrain -> Gtype 0.750526
Air -> Phase 0.620936
Med -> O 0.613845
Temp -> Agit 0.571012
Air -> Med 0.331698
O -> Anti 0.318145
O -> Gversion 0.070972
Med -> Supp 0.067887
Technique -> Air 0.035349
Air -> O 0.020752
Supp -> O 0.013814
Supp -> Technique -0.037404
Vess -> O -0.060438
Gtype -> Phase -0.068194
O -> Phase -0.091450
OD -> O -0.114114
Supp -> Gtype -0.122317
Gtype -> OD -0.173122
Anti -> O -0.230490
Gtype -> Med -0.240559
O -> Air -0.241130
O -> Temp -0.243070
Technique -> pH -0.267697
Phase -> OD -0.299766
Supp -> Med -0.355333
Gversion -> O -0.394835
OD -> Med -0.450024
Gtype -> Anti -0.546414
O -> Agit -0.704801
Agit -> O -0.801535
Top unlikely transitions:
Temp -> Temp 4.995762
Anti -> Anti 4.990782
Gtype -> Gtype 4.240477
Gversion -> Gversion 4.225898
Supp -> Supp 4.204534
Air -> Air 3.999870
Phase -> Phase 3.766897
Technique -> Technique 3.408533
O -> O 3.070414
pH -> pH 2.382635
O -> Supp 1.706269
Gtype -> Supp 1.701317
Phase -> Supp 1.187792
Air -> Temp 1.062230
O -> Gtype 1.028035
O -> Technique 0.914562
Vess -> Temp 0.909296
Substrain -> Gtype 0.750526
Air -> Phase 0.620936
Med -> O 0.613845
Temp -> Agit 0.571012
Air -> Med 0.331698
O -> Anti 0.318145
O -> Gversion 0.070972
Med -> Supp 0.067887
Technique -> Air 0.035349
Air -> O 0.020752
Supp -> O 0.013814
Supp -> Technique -0.037404
Vess -> O -0.060438
Gtype -> Phase -0.068194
O -> Phase -0.091450
OD -> O -0.114114
Supp -> Gtype -0.122317
Gtype -> OD -0.173122
Anti -> O -0.230490
Gtype -> Med -0.240559
O -> Air -0.241130
O -> Temp -0.243070
Technique -> pH -0.267697
Phase -> OD -0.299766
Supp -> Med -0.355333
Gversion -> O -0.394835
OD -> Med -0.450024
Gtype -> Anti -0.546414
O -> Agit -0.704801
Agit -> O -0.801535
Gtype -> O -0.877514
Substrain -> O -0.942951
Technique -> O -1.070076
Top positive:
5.560165 Technique b'lemma[:2]:Ch'
3.867275 O b'lemma[:2]:re'
3.624712 Air b'lemma:anaerobic'
3.330563 Phase b'lemma:mid-log'
3.251308 O b'-1:lemma:tag'
3.248997 O b'+1:lemma:m63'
3.228929 Gtype b'-1:lemma:\xe2\x88\x86'
3.210040 O b'lemma:1'
3.210040 O b'lemma[:2]:1'
3.159131 OD b'lemma[:2]:od'
3.135910 Gtype b'+1:lemma:flagtag'
3.047581 Supp b'+1:lemma:\xc2\xb5m'
2.949460 Med b'+1:lemma:0.4'
2.916942 Air b'lemma:aerobic'
2.901664 Gtype b'lemma[:1]:\xce\xb4'
2.895390 Gtype b'lemma:arca8myc'
2.891054 O b'+1:lemma:od600'
2.884994 Gtype b'lemma[:2]:cr'
2.868477 Phase b'lemma:stationary'
2.758780 Supp b'lemma:Iron'
2.758780 Supp b'lemma[:2]:Ir'
2.751464 Supp b'+1:lemma:1'
2.749032 Air b'lemma[:1]:a'
2.739001 Air b'-1:lemma:ChIP-Seq'
2.637207 Supp b'+1:lemma:2'
2.611400 Anti b'+1:lemma:antibody'
2.538029 Substrain b'lemma:mg1655'
2.532452 O b'-1:lemma:ChIP-exo'
2.530749 Gversion b'-1:lemma:nc'
2.522114 O b'lemma:rpob'
2.497323 O b'lemma:_'
2.497323 O b'lemma[:1]:_'
2.497323 O b'lemma[:2]:_'
2.467576 O b'lemma:2'
2.467576 O b'lemma[:2]:2'
2.451387 Technique b'lemma[:2]:rn'
2.450652 O b'lemma:c-terminal'
2.450652 O b'+1:lemma:flag-tag'
2.450652 O b'lemma[:2]:c-'
2.442475 Gtype b'lemma[:2]:de'
2.418091 O b'lemma:3'
2.418091 O b'lemma[:2]:3'
2.408360 OD b'+1:lemma:stationary'
2.385694 Temp b'-1:lemma:sample'
2.372903 Gtype b'-1:lemma:rpob'
2.354163 Gtype b'lemma:type'
2.354163 Gtype b'lemma[:2]:ty'
2.346327 O b'lemma[:2]:ge'
2.344928 Technique b'lemma:chipseq'
2.307523 Gtype b'lemma[:1]:w'
2.303430 Med b'lemma[:1]:L'
2.301957 Technique b'lemma[:1]:C'
2.289093 Supp b'lemma:arginine'
2.273052 O b'-1:lemma:Aerobic'
2.264869 Med b'+1:lemma:2.0'
2.254065 O b'-1:lemma:anaerobic'
2.249649 Gtype b'lemma[:1]:W'
2.162423 Anti b'lemma[:2]:an'
2.160242 Strain b'lemma:k-12'
2.160242 Strain b'lemma[:2]:k-'
2.144723 Supp b'lemma:acetate'
2.129184 Gtype b'lemma:flag-tag'
2.129184 Gtype b'-1:lemma:c-terminal'
2.111214 Air b'lemma[:1]:A'
2.098945 Gtype b'lemma:nsrr'
2.098945 Gtype b'lemma[:2]:ns'
2.095022 Supp b'-1:lemma:supplement'
2.082819 O b'-1:lemma:fructose'
2.076750 pH b'lemma:ph5'
2.076750 pH b'+1:lemma:.5'
2.072261 Temp b'-1:lemma:\xcf\x8332'
2.065755 O b'lemma:b'
2.065755 O b'lemma[:2]:b'
2.065419 Gtype b'+1:lemma:type'
2.051267 Air b'lemma[:2]:an'
2.036209 Technique b'lemma[:2]:ch'
2.031416 O b'+1:lemma:pq'
2.016412 Temp b'lemma[:1]:3'
2.004704 Supp b'+1:lemma:mid-log'
1.984448 O b'-1:lemma:0.3-0.35'
1.967808 Supp b'lemma[:2]:0.'
1.947363 Supp b'lemma:nitrate'
1.947363 Supp b'lemma[:2]:ni'
1.937420 Gversion b'lemma:chip-seq'
1.913900 Gversion b'lemma:nc'
1.913900 Gversion b'lemma[:2]:nc'
1.912878 O b'lemma:0.4'
1.907802 Strain b'+1:lemma:substr'
1.897924 Air b'lemma[:2]:ae'
1.871109 Phase b'-1:lemma:mid-log'
1.845125 Air b'lemma:Aerobic'
1.845125 Air b'lemma[:2]:Ae'
1.827855 Supp b'-1:lemma:Cra'
1.824332 Supp b'lemma:Fe'
1.824332 Supp b'lemma[:2]:Fe'
1.824175 Supp b'lemma:pq'
1.824175 Supp b'lemma[:2]:pq'
1.809404 Supp b'+1:lemma:_'
1.808766 OD b'lemma[:1]:o'
1.806302 Med b'isUpper'
1.798647 Gtype b'lemma[:2]:fl'
1.798534 Supp b'lemma[:2]:gl'
1.795820 O b'lemma:Custom'
1.795820 O b'lemma[:2]:Cu'
1.790374 Air b'+1:lemma:37'
1.790074 Gversion b'lemma[:2]:00'
1.785485 Supp b'lemma[:2]:fe'
1.780403 O b'+1:postag:RB'
1.756454 Supp b'lemma:iptg'
1.735997 Supp b'lemma[:1]:I'
1.733781 Gtype b'lemma:fnr8myc'
1.728269 Gversion b'lemma:asm584v2'
1.728269 Gversion b'lemma[:2]:as'
1.720876 Supp b'+1:lemma:hour'
1.715461 Temp b'-1:lemma:37'
1.694020 Substrain b'lemma[:2]:mg'
1.689985 Strain b'lemma[:1]:k'
1.673566 O b'postag::'
1.673566 O b'postag[:1]::'
1.673566 O b'postag[:2]::'
1.669711 Supp b'lemma[:1]:2'
1.661627 O b'+1:postag:VBD'
1.659863 Vess b'lemma:flask'
1.659863 Vess b'-1:lemma:warm'
1.656423 Gtype b'lemma[:2]:ar'
1.655250 Supp b'lemma[:1]:1'
1.654055 Med b'lemma[:1]:m'
1.646225 Med b'lemma:MOPS'
1.646225 Med b'lemma[:1]:M'
1.646225 Med b'lemma[:2]:MO'
1.643933 Vess b'lemma[:2]:fl'
1.636408 O b'-1:lemma:medium'
1.620415 O b'postag[:1]:V'
1.620415 O b'postag[:2]:VB'
1.611470 Supp b'lemma:fructose'
1.588047 Temp b'+1:lemma:\xc2\xb0c'
1.571785 Technique b'symb'
1.567665 Supp b'lemma:dpd'
1.567665 Supp b'lemma[:2]:dp'
1.561904 Gtype b'+1:lemma:_'
1.558404 O b'+1:lemma:grow'
1.556080 Technique b'-1:lemma:input'
1.531102 Gtype b'+1:lemma::'
1.516656 O b'isNumber'
1.513499 Technique b'+1:lemma:chip-exo'
1.484457 pH b'+1:postag:CD'
1.479000 Gtype b'lemma[:2]:wi'
1.463363 O b'+1:lemma:sparging'
1.455056 Med b'+1:lemma:supplement'
1.451942 Anti b'-1:lemma::'
1.445199 Gtype b'+1:postag::'
1.418029 Gtype b'-1:lemma:_'
1.414884 Gtype b'hGreek'
1.409332 Supp b'lemma[:2]:ac'
1.384832 Med b'lemma:lb'
1.384832 Med b'lemma[:2]:lb'
1.381767 Gtype b'lemma:wt'
1.381767 Gtype b'lemma[:2]:wt'
1.376804 O b'isLower'
1.370539 O b'-1:lemma:wt'
1.369037 Supp b'-1:lemma:media'
1.368164 O b'lemma:chip'
1.362593 Temp b'-1:lemma:43'
1.357457 O b'lemma:.'
1.357457 O b'postag:.'
1.357457 O b'postag[:1]:.'
1.357457 O b'postag[:2]:.'
1.357457 O b'lemma[:2]:.'
1.343506 Gtype b'-1:lemma:vector'
1.332982 Gtype b'lemma:pk4854'
1.332982 Gtype b'lemma[:2]:pk'
1.331836 O b'-1:lemma:antibody'
1.322840 Gtype b'lemma[:1]:f'
1.319451 Gtype b'-1:lemma:dna'
1.319338 Med b'+1:lemma:g/l'
1.314272 O b'lemma:\xcf\x8332'
1.314272 O b'lemma[:1]:\xcf\x83'
1.314272 O b'lemma[:2]:\xcf\x833'
1.313977 Technique b'-1:lemma:chip-exo'
1.307181 O b'-1:lemma:mg/ml'
1.305428 Supp b'-1:lemma:+'
1.296639 O b'lemma:purr'
1.293394 Supp b'-1:lemma:0.2'
1.292494 O b'-1:lemma:l1'
1.282743 Supp b'lemma[:2]:fr'
1.274232 Supp b'lemma:rifampicin'
1.268226 Air b'+1:lemma:L'
1.266974 Supp b'lemma:nh4cl'
1.266974 Supp b'lemma[:2]:nh'
1.264907 Supp b'lemma[:2]:ri'
1.262307 Supp b'-1:postag:CD'
1.260796 O b'+1:lemma:acetate'
1.259575 OD b'lemma[:2]:0.'
1.259142 Technique b'-1:lemma:_'
1.246498 Gversion b'postag:NN'
1.244033 Gtype b'symb'
1.240305 Supp b'lemma[:2]:ip'
1.236703 Temp b'lemma:43'
1.236703 Temp b'lemma[:2]:43'
1.228007 Gtype b'+1:lemma:control'
Top negative:
-0.001047 O b'+1:lemma:0.4'
-0.001061 Supp b'postag[:1]:C'
-0.001343 O b'lemma[:2]:co'
-0.001801 Phase b'+1:postag:NN'
-0.001816 Supp b'lemma[:1]:r'
-0.003273 Supp b'+1:lemma:-rrb-'
-0.004514 Agit b'isUpper'
-0.005651 Supp b'+1:postag:-RRB-'
-0.007968 O b'lemma[:2]:0.'
-0.008508 Air b'isUpper'
-0.008563 Supp b'-1:postag::'
-0.008695 Air b'postag:-RRB-'
-0.008695 Air b'postag[:2]:-R'
-0.008695 Air b'lemma[:2]:-r'
-0.009571 OD b'hUpper'
-0.009571 OD b'hLower'
-0.011349 Supp b'hUpper'
-0.011349 Supp b'hLower'
-0.011606 O b'-1:postag:JJ'
-0.011683 pH b'postag[:1]:N'
-0.011683 pH b'postag[:2]:NN'
-0.012286 Supp b'isNumber'
-0.014704 Air b'lemma:-rrb-'
-0.017854 O b'-1:lemma:control'
-0.021700 Med b'postag:NN'
-0.024044 O b'lemma:sample'
-0.026147 Supp b'postag:CD'
-0.026147 Supp b'postag[:2]:CD'
-0.028829 Air b'-1:postag:RB'
-0.029219 Gtype b'lemma[:1]:n'
-0.031150 Agit b'symb'
-0.031420 O b'-1:postag:VBN'
-0.031634 Air b'lemma[:1]:n'
-0.032172 O b'+1:lemma:rep1'
-0.032713 O b'+1:postag:JJ'
-0.033674 OD b'postag:CD'
-0.033674 OD b'postag[:2]:CD'
-0.033755 OD b'postag[:1]:N'
-0.033755 OD b'postag[:2]:NN'
-0.035292 Air b'+1:lemma:anaerobically'
-0.038458 Temp b'postag:NN'
-0.040319 Supp b'-1:lemma:%'
-0.041803 Temp b'hGreek'
-0.050279 Med b'-1:lemma:lb'
-0.052295 O b'-1:lemma:ml'
-0.053032 O b'+1:lemma:sample'
-0.061146 Gtype b'lemma[:1]:m'
-0.063531 Vess b'hUpper'
-0.063531 Vess b'hLower'
-0.067513 Supp b'isLower'
-0.080308 Temp b'postag[:1]:N'
-0.080308 Temp b'postag[:2]:NN'
-0.082384 Supp b'isUpper'
-0.086496 Supp b'+1:lemma:acetate'
-0.089008 O b'lemma[:2]:30'
-0.093208 O b'lemma[:2]:fe'
-0.093816 O b'lemma[:1]:m'
-0.096992 O b'lemma[:1]:d'
-0.099835 Gtype b'+1:lemma:-rrb-'
-0.103697 O b'lemma[:2]:od'
-0.106617 Anti b'symb'
-0.111217 Phase b'postag:JJ'
-0.111217 Phase b'postag[:1]:J'
-0.111217 Phase b'postag[:2]:JJ'
-0.115123 Gversion b'+1:postag:NN'
-0.120980 Air b'-1:postag:-RRB-'
-0.124184 Gtype b'lemma[:2]:rp'
-0.124385 Air b'-1:lemma:-rrb-'
-0.134523 O b'lemma:phase'
-0.137832 Air b'isLower'
-0.138018 O b'-1:postag:RB'
-0.142066 Anti b'+1:lemma:anti-fur'
-0.154765 O b'+1:lemma:hour'
-0.162942 O b'lemma[:1]:n'
-0.164561 O b'lemma:20'
-0.167133 OD b'postag[:1]:C'
-0.167374 O b'lemma:2h'
-0.167374 O b'-1:lemma:additional'
-0.167374 O b'lemma[:2]:2h'
-0.168348 OD b'isLower'
-0.175723 O b'-1:lemma:stir'
-0.176493 O b'+1:lemma:0.3'
-0.176731 O b'+1:lemma:culture'
-0.176925 O b'-1:lemma:dna'
-0.178939 O b'+1:lemma:delta'
-0.190332 Air b'symb'
-0.197832 Anti b'+1:postag:JJ'
-0.203484 Supp b'lemma[:2]:mi'
-0.206368 Air b'postag[:1]:N'
-0.206368 Air b'postag[:2]:NN'
-0.207030 Gtype b'-1:lemma:,'
-0.207030 Gtype b'-1:postag:,'
-0.209582 O b'-1:lemma:IP'
-0.213265 Gtype b'+1:lemma:\xe2\x88\x86'
-0.215249 Gtype b'-1:lemma:mg1655'
-0.217685 O b'lemma:fecl2'
-0.220159 O b'+1:lemma:-rrb-'
-0.220935 Gtype b'-1:postag:NN'
-0.227404 Technique b'postag:NN'
-0.230401 Technique b'-1:lemma::'
-0.232009 Air b'lemma:aerobically'
-0.236029 O b'+1:lemma:phase'
-0.238310 O b'lemma:wt'
-0.238310 O b'lemma[:2]:wt'
-0.243118 Temp b'-1:postag:NN'
-0.244383 O b'lemma[:2]:ph'
-0.246532 O b'-1:lemma:-lrb-'
-0.249897 O b'-1:lemma:37'
-0.254341 O b'lemma[:1]:L'
-0.257191 Gtype b'-1:postag:SYM'
-0.263503 O b'lemma[:2]:di'
-0.264603 O b'lemma[:1]:w'
-0.264928 O b'-1:lemma:grow'
-0.265388 O b'+1:lemma:antibody'
-0.273738 O b'lemma[:2]:ce'
-0.277706 O b'lemma[:1]:k'
-0.279240 O b'lemma[:1]:4'
-0.290259 Supp b'postag:JJ'
-0.290259 Supp b'postag[:1]:J'
-0.290259 Supp b'postag[:2]:JJ'
-0.292361 O b'-1:lemma:rna'
-0.304576 Technique b'isUpper'
-0.307541 Technique b'isLower'
-0.310027 O b'lemma[:2]:me'
-0.339328 O b'-1:lemma:ompr'
-0.339987 O b'-1:postag:-LRB-'
-0.345773 O b'lemma:150'
-0.345773 O b'-1:lemma:concentration'
-0.345773 O b'+1:lemma:mg/ml'
-0.345773 O b'lemma[:2]:15'
-0.347446 Med b'-1:postag:NN'
-0.348906 Supp b'lemma:2'
-0.348906 Supp b'lemma[:2]:2'
-0.351429 Air b'+1:lemma:-lrb-'
-0.352484 Air b'+1:postag:-LRB-'
-0.353556 O b'-1:lemma:cra'
-0.356329 Supp b'+1:lemma:dpd'
-0.369302 Agit b'hUpper'
-0.369302 Agit b'hLower'
-0.381952 Technique b'-1:postag::'
-0.384746 O b'-1:lemma:rifampicin'
-0.399014 O b'lemma:cell'
-0.402687 O b'+1:lemma:dissolve'
-0.409688 Gtype b'isNumber'
-0.436191 O b'lemma[:2]:gl'
-0.440734 O b'+1:lemma:cell'
-0.442622 O b'+1:postag:NNS'
-0.452887 O b'lemma[:1]:p'
-0.455834 O b'+1:lemma:.'
-0.455834 O b'+1:postag:.'
-0.459545 O b'lemma[:2]:mg'
-0.481182 Air b'-1:postag:JJ'
-0.482835 OD b'+1:postag:NN'
-0.494269 Air b'postag:NN'
-0.509176 O b'+1:postag:-RRB-'
-0.510299 Anti b'postag:NNP'
-0.510777 Air b'+1:postag:RB'
-0.521353 Gtype b'lemma[:1]:g'
-0.528030 O b'lemma:dissolve'
-0.528030 O b'+1:lemma:methanol'
-0.544300 Med b'-1:postag:VBN'
-0.544878 O b'lemma:mid-log'
-0.558725 Med b'+1:postag:NN'
-0.565193 O b'lemma[:1]:0'
-0.574181 OD b'+1:postag:CD'
-0.594752 Supp b'-1:lemma:glucose'
-0.616524 O b'-1:lemma:sample'
-0.632529 Phase b'hUpper'
-0.632529 Phase b'hLower'
-0.635261 Gtype b'lemma:delta'
-0.674300 O b'lemma:methanol'
-0.674300 O b'-1:lemma:dissolve'
-0.705737 Med b'-1:postag:CD'
-0.728901 O b'+1:lemma:supplement'
-0.740907 O b'lemma[:2]:ri'
-0.792014 Phase b'-1:postag:NN'
-0.804108 O b'lemma[:1]:A'
-0.910559 OD b'isNumber'
-0.914108 O b'+1:lemma:g/l'
-0.932717 O b'lemma:rifampicin'
-0.954851 O b'-1:lemma:supplement'
-0.966998 Gtype b'lemma[:1]:a'
-0.969405 Supp b'lemma[:1]:c'
-1.019136 O b'lemma[:1]:\xce\xb4'
-1.110821 Med b'symb'
-1.170611 Gversion b'isLower'
-1.220694 Gtype b'lemma[:1]:c'
-1.221441 Gtype b'isUpper'
-1.282188 O b'+1:postag:VBG'
-1.331537 OD b'postag[:1]:-'
-1.377314 Supp b'symb'
-1.414732 Supp b'hGreek'
-1.450462 OD b'lemma[:1]:-'
-1.661998 O b'-1:lemma:2'
-1.785964 O b'+1:lemma:2'
-1.952351 O b'-1:postag::'
-1.971208 O b'+1:lemma:1'
-2.697878 O b'-1:lemma::'
-2.771750 O b'-1:postag:VBG'
-4.091719 O b'-1:lemma:_'