Estefani Gaytan Nunez

upload

...@@ -7,38 +7,36 @@ from pandas import DataFrame as DF ...@@ -7,38 +7,36 @@ from pandas import DataFrame as DF
7 import matplotlib.pyplot as plt 7 import matplotlib.pyplot as plt
8 8
9 # Objective 9 # Objective
10 -# Drawn figures of grid reports 10 +# Drawn figures of grid reports
11 # 11 #
12 # Input parameters 12 # Input parameters
13 -# --inputPath=PATH Path of inputfiles 13 +# --inputPath Path of inputfiles
14 -# --outputPath=PATH Path to place output figures 14 +# --outputPath Path to place output figures
15 -# --figureName single run specific name figure, multifigure first part of name 15 +# --figureName single run specific name figure, multifigure first part of name
16 -# --inputFile Use it for a single report 16 +# --join boolean, all figures together
17 -# --version CRF-script version of reports
18 # 17 #
19 # Output 18 # Output
20 # training and test data set 19 # training and test data set
21 # 20 #
22 # Examples 21 # Examples
23 # python figures-reports.py 22 # python figures-reports.py
24 -# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ 23 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
25 -# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ 24 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
26 # --figureName FiguresGrid 25 # --figureName FiguresGrid
27 -# --inputFile report_Run1_v11.txt 26 +# --join
28 -# -version v11
29 27
30 -# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v1 --inputFile report_Run1_v11.txt ..version v11 28 +
29 +
30 +# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
31 __author__ = 'egaytan' 31 __author__ = 'egaytan'
32 32
33 #################################################################################### 33 ####################################################################################
34 # FUNCTIONS # 34 # FUNCTIONS #
35 #################################################################################### 35 ####################################################################################
36 -def Filter(rfile, options,v): 36 +
37 - if options[0]=='all': 37 +def savescreen(output, dic, path):
38 - if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True) 38 + if output:
39 - elif rfile in options: 39 + DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
40 - return(True)
41 - return(False)
42 40
43 #################################################################################### 41 ####################################################################################
44 # MAIN PROGRAM # 42 # MAIN PROGRAM #
...@@ -50,8 +48,7 @@ if __name__ == '__main__': ...@@ -50,8 +48,7 @@ if __name__ == '__main__':
50 parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH') 48 parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
51 parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH') 49 parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
52 parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE') 50 parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
53 - parser.add_option('--version', dest='version', help='script version', metavar='FILE') 51 + parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
54 - parser.add_option('--inputFile', dest='inputFile', help='Use it for a specific report files', metavar='FILE', default='all,')
55 52
56 (options, args) = parser.parse_args() 53 (options, args) = parser.parse_args()
57 if len(args) > 0: 54 if len(args) > 0:
...@@ -61,56 +58,40 @@ if __name__ == '__main__': ...@@ -61,56 +58,40 @@ if __name__ == '__main__':
61 print('-------------------------------- PARAMETERS --------------------------------') 58 print('-------------------------------- PARAMETERS --------------------------------')
62 print('Path of output from CoreNLP: ' + str(options.inputPath)) 59 print('Path of output from CoreNLP: ' + str(options.inputPath))
63 print('Path to place output figures: ' + str(options.outputPath)) 60 print('Path to place output figures: ' + str(options.outputPath))
64 - print('Specific or first part of figurename: ' + str(options.figureName)) 61 + print('Figurename: ' + str(options.figureName))
65 - print('CRF-script version: ' + str(options.version))
66 -
67 print('-------------------------------- PROCESSING --------------------------------') 62 print('-------------------------------- PROCESSING --------------------------------')
63 + reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
64 + print(','.join(reportFileList))
68 65
69 - rawInputRepotsList = str(options.inputFile).split(',') 66 + for inputFile in reportFileList:
70 - reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ] 67 + scores = df(dict)
71 - scores = df(dict) 68 + for report in reportFileList:
72 - #CV={} 69 + with open(os.path.join(options.inputPath, report), 'r') as File:
73 - print('Report files: ' + str(options.inputFile )) 70 + string = File.read()
74 - print('\n'.join(reportFileList)) 71 + scores[report[7:11]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
75 - print('----------------------------------- NOTE -----------------------------------') 72 + summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
76 - print('\n-------- All chosen report files should be in inputPath given---------------\n') 73 + scores[report[7:11]]['precision']=summaryScores[0]
74 + scores[report[7:11]]['recall']=summaryScores[1]
75 + scores[report[7:11]]['f1-score']=summaryScores[2]
77 76
78 - print('------------------------------- SAVING DATA --------------------------------\n')
79 - for report in reportFileList:
80 - with open(os.path.join(options.inputPath, report), 'r') as File:
81 - string = File.read()
82 - scores[report[7:11]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
83 - summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
84 - scores[report[7:11]]['precision']=summaryScores[0]
85 - scores[report[7:11]]['recall']=summaryScores[1]
86 - scores[report[7:11]]['f1-score']=summaryScores[2]
87 -
88 print(DF(scores).T) 77 print(DF(scores).T)
89 - print('------------------------------- SAVING TABLE --------------------------------\n') 78 + scoresTable = DF(scores).T
90 - with open(os.path.join(options.inputPath, str(options.figureName) ), 'w') as File: 79 + print('------------------------------- SAVING DATA --------------------------------')
91 - 80 + print('Saving score-table: ' + str(options.table))
92 - scoresTable = DF(scores).T 81 + imageName = os.path.join(options.outputPath, options.figureName)
93 - 82 + savescreen(options.table, scores, imageName)
94 - imageName=os.path.join(options.outputPath, options.figureName) 83 + fig = plt.figure()
95 - ylab = "score", 84 + fig.set_figheight(13)
96 - fig = plt.figure() 85 + fig.set_figwidth(20)
97 - plt.grid(False) 86 + plt.ylim(0.7, 1.1)
98 - plt.rcParams.update({'font.size': 15}) 87 + plt.xlabel("Runs")
99 - fig.set_figheight(13) 88 + plt.ylabel("score")
100 - fig.set_figwidth(20) 89 + plt.rcParams.update()
101 - plt.xlabel("Runs") 90 + plt.grid()
102 - plt.ylabel("score") 91 + plt.plot(scoresTable['precision'],'o--', label='precision', linewidth=3, markersize=15)
103 - plt.xticks(range(8),scoresTable["CV"].index) 92 + plt.plot(scoresTable['f1-score'], 'o--', label='F1', linewidth=3, markersize=15)
104 - plt.plot(scoresTable['CV'], "--", color="red", label="CV") 93 + plt.plot(scoresTable['recall'], 'o--', label='recall' , linewidth=3, markersize=15)
105 - plt.plot(scoresTable['precision'], color="blue", label="precision") 94 + plt.plot(scoresTable['CV'], 'o--', label='CV' , linewidth=3, markersize=15)
106 - plt.plot(scoresTable['f1-score'], color="orange", label="F1") 95 + plt.legend(loc='lower right')
107 - plt.plot(scoresTable['recall'], color="g", label="recall") 96 + plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
108 - plt.legend(loc='lower right') 97 + fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
109 - plt.tight_layout()
110 - fig.savefig(imageName, pad_inches=0.5)
111 -
112 -
113 -
114 -
115 -
116 -
......
1 +# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +######### BEST MODELS ##########
8 +
9 +# Run1
10 +# Todas las condiciones
11 +dfa <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=15),
12 + Strategy=rep(c(
13 + "Agit",
14 + "Gversion",
15 + "Substrain",
16 + "Vess",
17 + "OD",
18 + "Anti",
19 + "Supp",
20 + "Air",
21 + "Gtype",
22 + "Med",
23 + "Temp",
24 + "Technique",
25 + "Phase",
26 + "pH",
27 + "Strain"
28 + ),3),
29 + Score=c(
30 + 0,
31 + 0,
32 + 0,
33 + 0,
34 + 1,
35 + 1,
36 + 0.883,
37 + 0.92,
38 + 0.905,
39 + 0.852,
40 + 0.818,
41 + 0.88,
42 + 1,
43 + 1,
44 + 1,
45 + 0,
46 + 0,
47 + 0,
48 + 0,
49 + 0.405,
50 + 0.444,
51 + 0.669,
52 + 0.742,
53 + 0.811,
54 + 0.912,
55 + 1,
56 + 1,
57 + 0.947,
58 + 1,
59 + 1,
60 + 0,
61 + 0,
62 + 0,
63 + 0,
64 + 0.577,
65 + 0.615,
66 + 0.762,
67 + 0.821,
68 + 0.856,
69 + 0.881,
70 + 0.9,
71 + 0.936,
72 + 0.973,
73 + 1,
74 + 1
75 + ))
76 +
77 +# Solo condiciones con F1-score > 0
78 +# Run 1
79 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
80 + Strategy=rep(c(
81 + "OD",
82 + "Anti",
83 + "Supp",
84 + "Air",
85 + "Gtype",
86 + "Med",
87 + "Temp",
88 + "Technique",
89 + "Phase",
90 + "pH",
91 + "Strain"
92 + ),3),
93 + Score=c(
94 + 1,
95 + 1,
96 + 0.883,
97 + 0.92,
98 + 0.905,
99 + 0.852,
100 + 0.818,
101 + 0.88,
102 + 1,
103 + 1,
104 + 1,
105 + 0.405,
106 + 0.444,
107 + 0.669,
108 + 0.742,
109 + 0.811,
110 + 0.912,
111 + 1,
112 + 1,
113 + 0.947,
114 + 1,
115 + 1,
116 + 0.577,
117 + 0.615,
118 + 0.762,
119 + 0.821,
120 + 0.856,
121 + 0.881,
122 + 0.9,
123 + 0.936,
124 + 0.973,
125 + 1,
126 + 1
127 + ))
128 +
129 +head(df)
130 +
131 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
132 + geom_line(aes(color=Measure))+
133 + geom_point(aes(color=Measure))+
134 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
135 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
136 + #geom_text(aes(label = Score))+
137 + labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
138 + theme(
139 + legend.position="top",
140 + # Centrar título: plot.title = element_text(hjust = 0.5),
141 + axis.line = element_line(colour = "gray"),
142 + panel.background = element_blank(),
143 + panel.grid.major = element_blank(),
144 + panel.grid.minor = element_blank(),
145 + panel.border = element_blank()
146 + )
147 +pa
148 +
149 +ggsave(".png")
150 +
151 +# Solo condiciones con F1-score > 0
152 +# Run 7
153 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
154 + Strategy=rep(c(
155 + "Anti",
156 + "OD",
157 + "Supp",
158 + "Air",
159 + "Gtype",
160 + "Temp",
161 + "Med",
162 + "Technique",
163 + "Phase",
164 + "pH",
165 + "Strain"
166 + ),3),
167 + Score=c(
168 + 0.571,
169 + 1,
170 + 0.886,
171 + 0.939,
172 + 0.876,
173 + 0.818,
174 + 0.897,
175 + 0.952,
176 + 1,
177 + 1,
178 + 1,
179 + 0.444,
180 + 0.405,
181 + 0.684,
182 + 0.742,
183 + 0.802,
184 + 1,
185 + 0.912,
186 + 0.909,
187 + 0.947,
188 + 1,
189 + 1,
190 + 0.5,
191 + 0.577,
192 + 0.772,
193 + 0.829,
194 + 0.837,
195 + 0.9,
196 + 0.904,
197 + 0.93,
198 + 0.973,
199 + 1,
200 + 1
201 + ))
202 +
203 +head(df)
204 +
205 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
206 + geom_line(aes(color=Measure))+
207 + geom_point(aes(color=Measure))+
208 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
209 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
210 + #geom_text(aes(label = Score))+
211 + labs(title="Scores by condition (Best model, Run7)",x="Condition", y = "Score")+
212 + theme(
213 + legend.position="top",
214 + # Centrar título: plot.title = element_text(hjust = 0.5),
215 + axis.line = element_line(colour = "gray"),
216 + panel.background = element_blank(),
217 + panel.grid.major = element_blank(),
218 + panel.grid.minor = element_blank(),
219 + panel.border = element_blank()
220 + )
221 +pa
222 +
223 +ggsave(".png")
224 +
1 +# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +######### BEST MODEL ##########
8 +
9 +# Solo condiciones con F1-score > 0
10 +# Run 6 (report_Run6_v11.txt)
11 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
12 + Strategy=rep(c(
13 + "Air",
14 + "Anti",
15 + "Gtype",
16 + "Med",
17 + "OD",
18 + "pH",
19 + "Phase",
20 + "Supp",
21 + "Technique",
22 + "Temp",
23 + "Vess"
24 + ),3),
25 + Score=c(
26 + 0.565,
27 + 1,
28 + 0.889,
29 + 1,
30 + 1,
31 + 1,
32 + 0.882,
33 + 0.811,
34 + 1,
35 + 0.923,
36 + 1,
37 + 0.377,
38 + 1,
39 + 0.847,
40 + 0.943,
41 + 0.818,
42 + 1,
43 + 1,
44 + 0.799,
45 + 0.913,
46 + 0.828,
47 + 1,
48 + 0.452,
49 + 1,
50 + 0.867,
51 + 0.971,
52 + 0.9,
53 + 1,
54 + 0.938,
55 + 0.805,
56 + 0.955,
57 + 0.873,
58 + 1
59 + ))
60 +
61 +head(df)
62 +
63 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
64 + geom_line(aes(color=Measure))+
65 + geom_point(aes(color=Measure))+
66 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
67 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
68 + #geom_text(aes(label = Score))+
69 + labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
70 + theme(
71 + legend.position="top",
72 + # Centrar título: plot.title = element_text(hjust = 0.5),
73 + axis.line = element_line(colour = "gray"),
74 + panel.background = element_blank(),
75 + panel.grid.major = element_blank(),
76 + panel.grid.minor = element_blank(),
77 + panel.border = element_blank()
78 + )
79 +pa
80 +
81 +ggsave(".png")
82 +
1 +# Based on http://zevross.com/blog/2019/04/02/easy-multi-panel-plots-in-r-using-facet_wrap-and-facet_grid-from-ggplot2/
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +organism = 'ECO'
8 +
9 +if (organism == 'ECO') {
10 +######### ECO DEVELOPMENT DATASET ##########
11 +
12 +# ECO-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in E. coli development dataset
13 +df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
14 + Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
15 + Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
16 + Score=c(
17 + 0.78, 0.79, 0.81, 0.81, 0.41, 0.56, 0.63, 0.63, 0.53, 0.65, 0.71, 0.71,
18 + 0.78, 0.89, 0.93, 1.00, 0.41, 0.35, 0.13, 0.01, 0.53, 0.50, 0.23, 0.02,
19 + 0.82, 0.82, 0.84, 0.84, 0.55, 0.66, 0.72, 0.72, 0.66, 0.73, 0.78, 0.78,
20 + 0.82, 0.88, 0.94, 1.00, 0.55, 0.39, 0.20, 0.01, 0.66, 0.54, 0.33, 0.02))
21 +filename = "ECO-dev-multi-panel.png"
22 +title_plot = "E. coli development dataset"
23 +} else if (organism == 'STM')
24 +{
25 +######### STM DEVELOPMENT DATASET ##########
26 +
27 +# STM-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in Salmonella evaluation dataset
28 +df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
29 + Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
30 + Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
31 + Score=c(
32 + 0.78, 0.77, 0.76, 0.76, 0.33, 0.49, 0.54, 0.54, 0.47, 0.60, 0.63, 0.63,
33 + 0.78, 0.81, 0.70, 0.88, 0.33, 0.33, 0.10, 0.01, 0.47, 0.47, 0.18, 0.02,
34 + 0.84, 0.82, 0.81, 0.81, 0.47, 0.59, 0.65, 0.65, 0.60, 0.68, 0.72, 0.72,
35 + 0.84, 0.84, 0.77, 0.86, 0.47, 0.40, 0.17, 0.01, 0.60, 0.55, 0.27, 0.02))
36 +filename = "STM-dev-multi-panel.png"
37 +title_plot = "Salmonella evaluation dataset"
38 +}
39 +
40 +head(df)
41 +
42 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
43 + geom_line(aes(color=Measure))+
44 + geom_point(aes(color=Measure))+
45 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
46 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
47 + geom_text(aes(label = Score))+
48 + labs(title=title_plot,x="Strategies", y = "Score")+
49 + #theme_classic()+
50 + theme(
51 + legend.position="top",
52 + # Centrar título: plot.title = element_text(hjust = 0.5),
53 + axis.line = element_line(colour = "gray"),
54 + panel.background = element_blank(),
55 + panel.grid.major = element_blank(),
56 + panel.grid.minor = element_blank(),
57 + panel.border = element_blank(),
58 + )+
59 + facet_wrap(~Panel, scale="free")
60 +
61 +ggsave(filename)
62 +
1 +#========================================variant10=====================================
2 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run1 --variant 10 > ../../outputs/enero/Run1_v10.txt
3 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run2 --variant 10 --S1 > ../../outputs/enero/Run2_v10.txt
4 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run3 --variant 10 --S2 > ../../outputs/enero/Run3_v10.txt
5 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run4 --variant 10 --S1 --S2 > ../../outputs/enero/Run4_v10.txt
6 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run5 --variant 10 --S3 > ../../outputs/enero/Run5_v10.txt
7 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run6 --variant 10 --S1 --S3 > ../../outputs/enero/Run6_v10.txt
8 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run7 --variant 10 --S2 --S3 > ../../outputs/enero/Run7_v10.txt
9 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run8 --variant 10 --S1 --S2 --S3 > ../../outputs/enero/Run8_v10.txt
10 +#=======================================S4 v10=======================================
11 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run9 --variant 10 --S4 > ../../outputs/enero/Run9_v10.txt
12 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run10 --variant 10 --S4 --S1 > ../../outputs/enero/Run10_v10.txt
13 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run11 --variant 10 --S4 --S2 > ../../outputs/enero/Run10_v10.txt
14 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run12 --variant 10 --S4 --S1 --S2 > ../../outputs/enero/Run12_v10.txt
15 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run13 --variant 10 --S4 --S3 > ../../outputs/enero/Run13_v10.txt
16 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run14 --variant 10 --S4 --S1 --S3 > ../../outputs/enero/Run14_v10.txt
17 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run15 --variant 10 --S4 --S2 --S3 > ../../outputs/enero/Run15_v10.txt
18 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run16 --variant 10 --S4 --S1 --S2 --S3 > ../../outputs/enero/Run16_v10.txt
19 +#========================================variant11=====================================
20 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run1 --variant 11 > ../../outputs/enero/Run1_v11.txt
21 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run2 --variant 11 --S1 > ../../outputs/enero/Run2_v11.txt
22 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run3 --variant 11 --S2 > ../../outputs/enero/Run3_v11.txt
23 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run4 --variant 11 --S1 --S2 > ../../outputs/enero/Run4_v11.txt
24 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run5 --variant 11 --S3 > ../../outputs/enero/Run5_v11.txt
25 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run6 --variant 11 --S1 --S3 > ../../outputs/enero/Run6_v11.txt
26 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run7 --variant 11 --S2 --S3 > ../../outputs/enero/Run7_v11.txt
27 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run8 --variant 11 --S1 --S2 --S3 > ../../outputs/enero/Run8_v11.txt
28 +#=======================================S4 v11=======================================
29 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run9 --variant 11 --S4 > ../../outputs/enero/Run9_v11.txt
30 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run10 --variant 11 --S4 --S1 > ../../outputs/enero/Run10_v11.txt
31 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run11 --variant 11 --S4 --S2 > ../../outputs/enero/Run11_v11.txt
32 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run12 --variant 11 --S4 --S1 --S2 > ../../outputs/enero/Run12_v11.txt
33 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run13 --variant 11 --S4 --S3 > ../../outputs/enero/Run13_v11.txt
34 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run14 --variant 11 --S4 --S1 --S3 > ../../outputs/enero/Run14_v11.txt
35 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run15 --variant 11 --S4 --S2 --S3 > ../../outputs/enero/Run15_v11.txt
36 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run16 --variant 11 --S4 --S1 --S2 --S3 > ../../outputs/enero/Run16_v11.txt
37 +#========================================variant12=====================================
38 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run1 --variant 12 > ../../outputs/enero/Run1_v12.txt
39 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run2 --variant 12 --S1 > ../../outputs/enero/Run2_v12.txt
40 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run3 --variant 12 --S2 > ../../outputs/enero/Run3_v12.txt
41 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run4 --variant 12 --S1 --S2 > ../../outputs/enero/Run4_v12.txt
42 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run5 --variant 12 --S3 > ../../outputs/enero/Run5_v12.txt
43 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run6 --variant 12 --S1 --S3 > ../../outputs/enero/Run6_v12.txt
44 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run7 --variant 12 --S2 --S3 > ../../outputs/enero/Run7_v12.txt
45 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run8 --variant 12 --S1 --S2 --S3 > ../../outputs/enero/Run8_v12.txt
46 +#=======================================S4 v12=======================================
47 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run9 --variant 12 --S4 > ../../outputs/enero/Run9_v12.txt
48 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run10 --variant 12 --S4 --S1 > ../../outputs/enero/Run10_v12.txt
49 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run11 --variant 12 --S4 --S2 > ../../outputs/enero/Run12_v12.txt
50 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run12 --variant 12 --S4 --S1 --S2 > ../../outputs/enero/Run12_v12.txt
51 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run13 --variant 12 --S4 --S3 > ../../outputs/enero/Run13_v12.txt
52 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run14 --variant 12 --S4 --S1 --S3 > ../../outputs/enero/Run14_v12.txt
53 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run15 --variant 12 --S4 --S2 --S3 > ../../outputs/enero/Run15_v12.txt
54 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run16 --variant 12 --S4 --S1 --S2 --S3 > ../../outputs/enero/Run16_v12.txt
55 +#========================================variant13=====================================
56 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run1 --variant 13 > ../../outputs/enero/Run1_v13.txt
57 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run2 --variant 13 --S1 > ../../outputs/enero/Run2_v13.txt
58 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run3 --variant 13 --S2 > ../../outputs/enero/Run3_v13.txt
59 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run4 --variant 13 --S1 --S2 > ../../outputs/enero/Run4_v13.txt
60 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run5 --variant 13 --S3 > ../../outputs/enero/Run5_v13.txt
61 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run6 --variant 13 --S1 --S3 > ../../outputs/enero/Run6_v13.txt
62 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run7 --variant 13 --S2 --S3 > ../../outputs/enero/Run7_v13.txt
63 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run8 --variant 13 --S1 --S2 --S3 > ../../outputs/enero/Run8_v13.txt
64 +#=======================================S4 v13=======================================
65 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run9 --variant 13 --S4 > ../../outputs/enero/Run9_v13.txt
66 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run10 --variant 13 --S4 --S1 > ../../outputs/enero/Run10_v13.txt
67 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run11 --variant 13 --S4 --S2 > ../../outputs/enero/Run13_v13.txt
68 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run12 --variant 13 --S4 --S1 --S2 > ../../outputs/enero/Run13_v13.txt
69 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run13 --variant 13 --S4 --S3 > ../../outputs/enero/Run13_v13.txt
70 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run14 --variant 13 --S4 --S1 --S3 > ../../outputs/enero/Run14_v13.txt
71 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run15 --variant 13 --S4 --S2 --S3 > ../../outputs/enero/Run15_v13.txt
72 +python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run16 --variant 13 --S4 --S1 --S2 --S3 > ../../outputs/enero/Run16_v13.txt
This diff could not be displayed because it is too large.
1 # -*- coding: UTF-8 -*- 1 # -*- coding: UTF-8 -*-
2 2
3 import os 3 import os
4 -from itertools import chain 4 +#from itertools import chain
5 from optparse import OptionParser 5 from optparse import OptionParser
6 from time import time 6 from time import time
7 from collections import Counter 7 from collections import Counter
......
1 +# -*- coding: UTF-8 -*-
2 +
3 +import os # Access operative sistem
4 +#from itertools import chain # No se ocupa
5 +from optparse import OptionParser # Number of transitions
6 +from time import time # Return the time in seconds since the epoch as a float
7 +from collections import Counter # Dict subclass for counting hashable objects
8 +#import re # No se ocupa
9 +
10 +import nltk # Natural Language Toolkit platform to work with human language data
11 +import sklearn # Free software machine learning
12 +import scipy.stats # library of statistical functions
13 +import sys # to exit from Python.
14 +
15 +from sklearn.externals import joblib # provide lightweight pipelining
16 +from sklearn.metrics import make_scorer # Make a scorer from a performance metric or loss function
17 +from sklearn.cross_validation import cross_val_score # Evaluate a score by cross-validation
18 +from sklearn.grid_search import RandomizedSearchCV # Randomized search on hyper parameters
19 +
20 +import sklearn_crfsuite # Thin CRFsuite
21 +from sklearn_crfsuite import scorers # Added scorers.sequence_accuracy
22 +from sklearn_crfsuite import metrics # Add flat recall score to metrics
23 +
24 +from pandas import DataFrame as DF # Contruct dataframe object
25 +from nltk.corpus import stopwords # To exclude top words
26 +
27 +#-------------------------------------------------------------------------------
28 +# Objective
29 +# Training and evaluation of CRFs with sklearn-crfsuite.
30 +#
31 +# Input parameters
32 +# (1) --inputPath Path of training and test data set
33 +# (2) --outputPath Output path to place output files
34 +# (3) --trainingFile File with training data set
35 +# (4) --testFile File with test data set
36 +# (5) --reportName Number of run
37 +# (6) --variant Part of S2 variant
38 +# (7) --nrules Number of crf transitions
39 +# (8) --S1 Inner word features set
40 +# (9) --S2 Complete word features
41 +# (10) --S3 Extended context features
42 +# (11) --S4 Semantic features
43 +# (12) --excludeStopWords
44 +# (13) --excludeSymbols
45 +
46 +# Output
47 +# 1) Best model
48 +# 2) Report
49 +
50 +# Examples
51 +# python3 training_validation_v14.0.1.py
52 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets
53 +# --trainingFile training-data-set-70-NER.txt
54 +# --testFile test-data-set-30-NER.txt
55 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/
56 +# --nrules 500
57 +# --reportName Run1
58 +# --variant 11
59 +# --S1
60 +# --S2
61 +# --S3
62 +# --S4
63 +
64 +# python3 /home/egaytan/automatic-extraction-growth-conditions/CRF/bin/training/training_validation_v14.0.1.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/data-sets --trainingFile training-data-set-70-NER.txt --testFile test-data-set-30-NER.txt --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/ --nrules 500 --reportName Run1 --variant 10 > ../../outputs/enero/Run1_v10.txt
65 +
66 +##################################################################
67 +# FEATURES #
68 +##################################################################
69 +
70 +#================== COMPLETE WORD FEATURES ======================#
71 +
72 +def isGreek(word):
73 + ## Complete word are greek letters
74 + alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω',
75 + 'α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω']
76 + if word in alphabet:
77 + return True
78 + else:
79 + return False
80 +
81 +#================ INNER OF THE WORD FEATURES ====================#
82 +
83 +def hGreek(word):
84 + ## Search for at least has one greek letter
85 + alphabet = ['Α','Β','Γ','Δ','Ε','Ζ','Η','Θ','Ι','Κ','Λ','Μ','Ν','Ξ','Ο','Π','Ρ','Σ','Τ','Υ','Φ','Χ','Ψ','Ω','α','β','γ','δ','ε','ζ','η','θ','ι','κ','λ','μ','ν','ξ','ο','π','ρ','ς','σ','τ','υ','φ','χ','ψ','ω']
86 + # hexadicimal code
87 + matches = [letter for letter in word if letter in alphabet]
88 + if (len(matches) > 0):
89 + return(True)
90 + else: return(False)
91 + ## At least a greek letter
92 +
93 +def hNumber(word):
94 + ## Al leats has one greek letter
95 + for l in word:
96 + if l.isdigit():
97 + return True
98 + return False
99 +
100 +def hUpper(word):
101 + ## At least an upper letter
102 + for l in word:
103 + if l.isupper(): return True
104 + return False
105 +
106 +def hLower(word):
107 + ## At least a lower letter
108 + for l in word:
109 + if l.islower(): return True
110 + return False
111 +
112 +#============================FEATURES===========================#
113 +
114 +def word2features(sent, i, S1, S2, S3, S4, v): #SA, v
115 + ## Getting word features
116 +
117 + ## Saving CoreNLP annotations
118 + listElem = sent[i].split('|')
119 + ## Split CoreNLP output by columns
120 + word = listElem[0]
121 + lemma = listElem[1]
122 + postag = listElem[2]
123 + ner = listElem[3]
124 +
125 + #=========================== G =============================#
126 + ## NAME LEVEL G
127 + ## FUTURE TYPE General features
128 +
129 + ## Adding to features dictionary
130 + features = {
131 + ## basal features
132 + 'lemma': lemma,
133 + 'postag': postag
134 + }
135 +
136 + ## Anterior lemma and postag
137 + ## need more tha one word in sentence
138 + if i > 0:
139 + ## Split CoreNLP output by columns
140 + listElem = sent[i - 1].split('|')
141 +
142 + ## Saving CoreNLP annotations
143 + lemma0 = listElem[1]
144 + postag0 = listElem[2]
145 + ## Adding features to dictionary
146 + features.update({
147 + #LemaG anterior
148 + '-1:lemma': lemma0,
149 + #Postag anterior
150 + '-1:postag': postag0,
151 + })
152 +
153 + ## Posterior lemma and postag
154 + ## is not the last word
155 + if i < len(sent) - 1:
156 + ## Posterior word
157 + listElem = sent[i + 1].split('|')
158 + ## Saving CoreNLP annotations
159 + lemma2 = listElem[1]
160 + postag2 = listElem[2]
161 + ## Adding to features dictionary
162 + features.update({
163 + #LemaG posterior
164 + '+1:lemma': lemma2,
165 + #Postag posterior
166 + '+1:postag': postag2,
167 + })
168 +
169 + #=========================== S1 =============================#
170 + ## NAME LEVEL S1
171 + ## FEATURE TYPE Inner word features
172 +
173 + if S1:
174 + ## Adding features to dictionary
175 + features.update({
176 + 'hUpper' : hUpper(word),
177 + 'hLower' : hLower(word),
178 + 'hGreek' : hGreek(word),
179 + 'symb' : word.isalnum()
180 + })
181 + #========== Variants of inner words features ============#
182 + if v == 10:
183 + #word first character
184 + features['word[:1]']= word[:1]
185 +
186 + #word second character
187 + if len(word)>1:
188 + features['word[:2]']= word[:2]
189 +
190 + if v == 11:
191 + #lemma and postag first dharacter
192 + features['lemma[:1]']= lemma[:1]
193 + features['postag[:1]']= postag[:1]
194 +
195 + #lemma and postag secondChar
196 + if len(lemma)>1:
197 + features['lemma[:2]']= lemma[:2]
198 + if len(postag)>1:
199 + features['postag[:2]']= postag[:2]
200 +
201 + if v == 12:
202 + #word first character
203 + features['word[:1]']= word[:1]
204 +
205 + #word second character
206 + if len(word)>1:
207 + features['word[:2]']= word[:2]
208 +
209 + #postag first character
210 + features['postag[:1]']= postag[:1]
211 +
212 + #postag second character
213 + if len(postag)>1:
214 + features['postag[:2]']= postag[:2]
215 +
216 + if v == 13:
217 + #lemma first character
218 + features['lemma[:1]']= lemma[:1]
219 +
220 + #lemma second character
221 + if len(lemma)>1:
222 + features['lemma[:2]']= lemma[:2]
223 +
224 + #=========================== S2 =============================#
225 + ## NAME LEVEL S2
226 + ## FEATURE TYPE Complete word features
227 +
228 + if S2:
229 + #Add features to dictionary
230 + features.update({
231 + 'word' : word,
232 + 'isUpper' : word.isupper(),
233 + 'isLower' : word.islower(),
234 + 'isGreek' : isGreek(word),
235 + 'isNumber' : word.isdigit()
236 + })
237 + ## Anterior word
238 + ## sentence needs more tha one word
239 + if i > 0:
240 + ## Split CoreNLP output by columns
241 + listElem = sent[i - 1].split('|')
242 + ## Saving CoreNLP annotations
243 + word0 = listElem[0]
244 + features['-1:word']= word0
245 +
246 + ## Posterior word
247 + ## is not the last word
248 + if i < len(sent)-1:
249 + ## Split CoreNLP output by columns
250 + listElem = sent[i + 1].split('|')
251 + ## Saving CoreNLP annotations
252 + word2 = listElem[0]
253 + features['+1:word']= word2
254 +
255 + #=========================== S3 =============================#
256 + ## NAME LEVEL S3
257 + ## FEATURE TYPE Extended context features
258 + if S3:
259 + ## more than two words in sentence
260 + if i > 1:
261 + ## Split CoreNLP output by columns
262 + listElem = sent[i - 2].split('|')
263 + ## Saving CoreNLP annotations
264 + ## two anterior lemma and postag
265 + lemma01 = listElem[1]
266 + postag01 = listElem[2]
267 + features['-2:lemma']= lemma01
268 + features['-2:postag']= postag01
269 +
270 + ## is not the penultimate word
271 + if i < len(sent) - 2:
272 + ## Split CoreNLP output by columns
273 + listElem = sent[i + 2].split('|')
274 + ## Saving CoreNLP annotations
275 + lemma02 = listElem[1]
276 + postag02 = listElem[2]
277 + ## two posterior lemma and postag
278 + features['+2:lemma']= lemma02
279 + features['+2:postag']= postag02
280 +
281 + #=========================== S4 =============================#
282 + ## NAME LEVEL S4if S4:
283 + ## FEATURE TYPE NER
284 + if S4:
285 + ## more than one word in sentence
286 + if i > 0:
287 + ## Split CoreNLP output by columns
288 + listElem = sent[i - 1].split('|')
289 + ## =============== Anterior ner ====================##
290 + ## Saving CoreNLP annotations according column position
291 + ner0 = listElem[3]
292 + ## Adding to features dictionary
293 + features['-1:ner'] = ner
294 +
295 + ## is not the last word
296 + if i < len(sent) - 1:
297 + ## Split CoreNLP output by columns
298 + listElem = sent[i + 1].split('|')
299 + ## ============= Posterior ner ====================##
300 + ## Saving CoreNLP annotations according column position
301 + ner2 = listElem[3]
302 + ## Adding to features dictionary
303 + features['+1:ner'] = ner2
304 +
305 + if i > 1:
306 + ## Split CoreNLP output by columns
307 + listElem = sent[i - 2].split('|')
308 + ## Saving CoreNLP annotations
309 + ## =============== 2 Anterior ner =================##
310 + ner01 = listElem[3]
311 + features['-2:ner']= ner01
312 +
313 + ## is not the penultimate word
314 + if i < len(sent) - 2:
315 + ## Split CoreNLP output by columns
316 + listElem = sent[i + 2].split('|')
317 + ## Saving CoreNLP annotations
318 + ner02 = listElem[3]
319 + ## ============= 2 Posterior ner =================##
320 + features['+2:ner']= ner02
321 +
322 + return features
323 +
324 +def sent2features(sent, S1, S2, S3, S4, v):
325 + ## Itering in sentence for each word and saving its features
326 + return [word2features(sent, i, S1, S2, S3, S4, v) for i in range(len(sent))]
327 +
328 +def sent2labels(sent):
329 + ## Save tag, last position by word tokens
330 + return [elem.split('|')[-1] for elem in sent]
331 +
332 +def sent2tokens(sent):
333 + return [token for token, postag, label in sent]
334 +
335 +def print_transitions(trans_features, f):
336 + for (label_from, label_to), weight in trans_features:
337 + f.write("{:6} -> {:7} {:0.6f}\n".format(label_from, label_to, weight))
338 +
339 +def print_state_features(state_features, f):
340 + for (attr, label), weight in state_features:
341 + f.write("{:0.6f} {:8} {}\n".format(weight, label, attr.encode("utf-8")))
342 +
343 +
344 +__author__ = 'egaytan'
345 +
346 +##################################################################
347 +# MAIN PROGRAM #
348 +##################################################################
349 +
350 +if __name__ == "__main__":
351 + ## Defining parameters
352 + parser = OptionParser()
353 + parser.add_option("--inputPath", dest="inputPath", help="Path of training data set", metavar="PATH")
354 + parser.add_option("--outputPath", dest="outputPath", help="Output path to place output files", metavar="PATH")
355 + parser.add_option("--trainingFile", dest="trainingFile", help="File with training data set", metavar="FILE")
356 + parser.add_option("--testFile", dest="testFile", help="File with test data set", metavar="FILE")
357 + parser.add_option("--reportName", dest="reportName", help="Report number run", metavar="FILE")
358 + parser.add_option("--variant", dest="variant", help="Report file", metavar="FILE")
359 + parser.add_option("--S1", dest="S1", help="General features", action="store_true", default=False)
360 + parser.add_option("--S2", dest="S2", help="Inner/Complete word features", action="store_true", default=False)
361 + parser.add_option("--S3", dest="S3", help="Extended context features", action="store_true", default=False)
362 + parser.add_option("--S4", dest="S4", help="Semantic features", action="store_true", default=False)
363 + parser.add_option("--excludeStopWords", dest="excludeStopWords",help="Exclude stop words", action="store_true", default=False)
364 + parser.add_option("--excludeSymbols", dest="excludeSymbols", help="Exclude punctuation marks", action="store_true", default=False)
365 + parser.add_option("--nrules", dest="nrules", help="Number of crf rules on report", type="int")
366 +
367 + (options, args) = parser.parse_args()
368 + if len(args) > 0:
369 + parser.error("Any parameter given.")
370 + sys.exit(1)
371 +
372 + print('-------------------------------- PARAMETERS --------------------------------')
373 + print("Path of test and training data sets: " + options.inputPath)
374 + print("Path of outputs: " + options.outputPath)
375 + print("File with training data set: " + str(options.trainingFile))
376 + print("File with test data set: " + str(options.testFile))
377 + print("reportName: " + str(options.reportName))
378 + print("Exclude stop words: " + str(options.excludeStopWords))
379 + print("Levels: " + "S1: " + str(options.S1) + "S2: " + str(options.S2) + "S3: " + str(options.S3) + "S4: " + str(options.S4))
380 + print("Run variant: " + str(options.variant))
381 + print("Number of rules on report file: " + str(options.nrules))
382 +
383 + symbols = ['.', ',', ':', ';', '?', '!', '\'', '"', '<', '>', '(', ')', '-', '_', '/', '\\', '¿', '¡', '+', '{',
384 + '}', '[', ']', '*', '%', '$', '#', '&', '°', '`', '...']
385 + print("Exclude symbols: " + str(options.excludeSymbols))
386 +
387 + print('-------------------------------- PROCESSING --------------------------------')
388 + print('Reading corpus...')
389 + t0 = time()
390 +
391 + sentencesTrainingData = []
392 + sentencesTestData = []
393 +
394 + stopwords = [word for word in stopwords.words('english')]
395 +
396 + with open(os.path.join(options.inputPath, options.trainingFile), "r") as iFile:
397 + for line in iFile.readlines():
398 + listLine = []
399 + line = line.strip('\n')
400 + for token in line.split():
401 + if options.excludeStopWords:
402 + listToken = token.split('|')
403 + lemma = listToken[1]
404 + if lemma in stopwords:
405 + continue
406 + if options.excludeSymbols:
407 + listToken = token.split('|')
408 + lemma = listToken[1]
409 + if lemma in symbols:
410 + continue
411 + listLine.append(token)
412 + sentencesTrainingData.append(listLine)
413 + print(" Sentences training data: " + str(len(sentencesTrainingData)))
414 +
415 + with open(os.path.join(options.inputPath, options.testFile), "r") as iFile:
416 + for line in iFile.readlines():
417 + listLine = []
418 + line = line.strip('\n')
419 + for token in line.split():
420 + if options.excludeStopWords:
421 + listToken = token.split('|')
422 + lemma = listToken[1]
423 + if lemma in stopwords:
424 + continue
425 + if options.excludeSymbols:
426 + listToken = token.split('|')
427 + lemma = listToken[1]
428 + if lemma in symbols:
429 + continue
430 + listLine.append(token)
431 + sentencesTestData.append(listLine)
432 + print(" Sentences test data: " + str(len(sentencesTestData)))
433 +
434 + print("Reading corpus done in: %fs" % (time() - t0))
435 +
436 + print('-------------------------------- FEATURES --------------------------------')
437 +
438 + Dtraning = sent2features(sentencesTrainingData[0], options.S1, options.S2, options.S3, options.S4, int(options.variant))[2]
439 + Dtest = sent2features(sentencesTestData[0], options.S1, options.S2, options.S3, options.S4, int(options.variant))[2]
440 + print('--------------------------Features Training ---------------------------')
441 + print(DF(list(Dtraning.items())))
442 + print('--------------------------- FeaturesTest -----------------------------')
443 + print(DF(list(Dtest.items())))
444 +
445 + t0 = time()
446 +
447 + X_train = [sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesTrainingData]
448 + y_train = [sent2labels(s) for s in sentencesTrainingData]
449 +
450 + X_test = [sent2features(s, options.S1, options.S2, options.S3, options.S4, options.variant) for s in sentencesTestData]
451 + # print X_test
452 + y_test = [sent2labels(s) for s in sentencesTestData]
453 +
454 + '''
455 + Fixed parameters
456 + crf = sklearn_crfsuite.CRF(
457 + algorithm='lbfgs',
458 + c1=0.1,
459 + c2=0.1,
460 + max_iterations=100,
461 + all_pgossible_transitions=True
462 + )
463 + '''
464 + # Hyperparameter Optimization
465 + crf = sklearn_crfsuite.CRF(
466 + algorithm='lbfgs',
467 + max_iterations=100,
468 + all_possible_transitions=True
469 + )
470 + params_space = {
471 + 'c1': scipy.stats.expon(scale=0.5),
472 + 'c2': scipy.stats.expon(scale=0.05),
473 + }
474 +
475 + # Original: labels = list(crf.classes_)
476 + # Original: labels.remove('O')
477 + labels = list(['Gtype', 'Gversion', 'Med', 'Phase', 'Strain', 'Substrain', 'Supp', 'Technique', 'Temp', 'OD', 'Anti', 'Agit', 'Air', 'Vess', 'pH'])
478 +
479 + # use the same metric for evaluation
480 + f1_scorer = make_scorer(metrics.flat_f1_score, average='weighted', labels=labels)
481 +
482 + # search
483 + rs = RandomizedSearchCV(crf, params_space,
484 + cv=5,
485 + verbose=3,
486 + n_jobs=-1,
487 + n_iter=100,
488 + scoring=f1_scorer,
489 + random_state=42)
490 +
491 + rs.fit(X_train, y_train)
492 +
493 + # Fixed parameters
494 + # crf.fit(X_train, y_train)
495 +
496 + # Best hiperparameters
497 + # crf = rs.best_estimator_
498 +
499 + nameReport = str(options.reportName) + '_v'+ str(options.variant) + '.txt'
500 + with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="w") as oFile:
501 + oFile.write("********** TRAINING AND TESTING REPORT **********\n")
502 + oFile.write("Training file: " + options.trainingFile + '\n')
503 + oFile.write('\n')
504 + oFile.write('best params:' + str(rs.best_params_) + '\n')
505 + oFile.write('best CV score:' + str(rs.best_score_) + '\n')
506 + oFile.write('model size: {:0.2f}M\n'.format(rs.best_estimator_.size_ / 1000000))
507 +
508 + print("Training done in: %fs" % (time() - t0))
509 + t0 = time()
510 +
511 + # Update best crf
512 + crf = rs.best_estimator_
513 +
514 + # Saving model
515 + print(" Saving training model...")
516 + t1 = time()
517 + nameModel = 'model_' + str(options.reportName) + '_v'+ str(options.variant) + '_S1_' + str(options.S1) + '_S2_' + str(options.S2) + '_S3_' + str(options.S3) + '_S4_' + str(options.S4) + '_' + str(options.reportName) + '_v' + str(options.variant) +'.mod'
518 + joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
519 + print(" Saving training model done in: %fs" % (time() - t1))
520 +
521 + # Evaluation against test data
522 + y_pred = crf.predict(X_test)
523 + print("*********************************")
524 + print("Prediction done in: %fs" % (time() - t0))
525 +
526 + with open(os.path.join(options.outputPath, "reports", "report_" + nameReport), mode="a") as oFile:
527 + oFile.write('\n')
528 + oFile.write("Flat F1: " + str(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)))
529 + oFile.write('\n')
530 + # labels = list(crf.classes_)
531 + sorted_labels = sorted(
532 + labels,
533 + key=lambda name: (name[1:], name[0])
534 + )
535 + oFile.write(metrics.flat_classification_report( y_test, y_pred, labels=sorted_labels, digits=3))
536 + oFile.write('\n')
537 +
538 + oFile.write("\nTop likely transitions:\n")
539 + print_transitions(Counter(crf.transition_features_).most_common(options.nrules), oFile)
540 + oFile.write('\n')
541 +
542 + oFile.write("\nTop unlikely transitions:\n")
543 + print_transitions(Counter(crf.transition_features_).most_common()[-options.nrules:], oFile)
544 + oFile.write('\n')
545 +
546 + oFile.write("\nTop positive:\n")
547 + print_state_features(Counter(crf.state_features_).most_common(options.nrules), oFile)
548 + oFile.write('\n')
549 +
550 + oFile.write("\nTop negative:\n")
551 + print_state_features(Counter(crf.state_features_).most_common()[-options.nrules:], oFile)
552 + oFile.write('\n')
553 +