Estefani Gaytan Nunez

upload

...@@ -7,38 +7,36 @@ from pandas import DataFrame as DF ...@@ -7,38 +7,36 @@ from pandas import DataFrame as DF
7 import matplotlib.pyplot as plt 7 import matplotlib.pyplot as plt
8 8
9 # Objective 9 # Objective
10 -# Drawn figures of grid reports 10 +# Drawn figures of grid reports
11 # 11 #
12 # Input parameters 12 # Input parameters
13 -# --inputPath=PATH Path of inputfiles 13 +# --inputPath Path of inputfiles
14 -# --outputPath=PATH Path to place output figures 14 +# --outputPath Path to place output figures
15 -# --figureName single run specific name figure, multifigure first part of name 15 +# --figureName single run specific name figure, multifigure first part of name
16 -# --inputFile Use it for a single report 16 +# --join boolean, all figures together
17 -# --version CRF-script version of reports
18 # 17 #
19 # Output 18 # Output
20 # training and test data set 19 # training and test data set
21 # 20 #
22 # Examples 21 # Examples
23 # python figures-reports.py 22 # python figures-reports.py
24 -# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ 23 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
25 -# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ 24 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
26 # --figureName FiguresGrid 25 # --figureName FiguresGrid
27 -# --inputFile report_Run1_v11.txt 26 +# --join
28 -# -version v11
29 27
30 -# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v1 --inputFile report_Run1_v11.txt ..version v11 28 +
29 +
30 +# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
31 __author__ = 'egaytan' 31 __author__ = 'egaytan'
32 32
33 #################################################################################### 33 ####################################################################################
34 # FUNCTIONS # 34 # FUNCTIONS #
35 #################################################################################### 35 ####################################################################################
36 -def Filter(rfile, options,v): 36 +
37 - if options[0]=='all': 37 +def savescreen(output, dic, path):
38 - if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True) 38 + if output:
39 - elif rfile in options: 39 + DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
40 - return(True)
41 - return(False)
42 40
43 #################################################################################### 41 ####################################################################################
44 # MAIN PROGRAM # 42 # MAIN PROGRAM #
...@@ -50,8 +48,7 @@ if __name__ == '__main__': ...@@ -50,8 +48,7 @@ if __name__ == '__main__':
50 parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH') 48 parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
51 parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH') 49 parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
52 parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE') 50 parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
53 - parser.add_option('--version', dest='version', help='script version', metavar='FILE') 51 + parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
54 - parser.add_option('--inputFile', dest='inputFile', help='Use it for a specific report files', metavar='FILE', default='all,')
55 52
56 (options, args) = parser.parse_args() 53 (options, args) = parser.parse_args()
57 if len(args) > 0: 54 if len(args) > 0:
...@@ -61,56 +58,40 @@ if __name__ == '__main__': ...@@ -61,56 +58,40 @@ if __name__ == '__main__':
61 print('-------------------------------- PARAMETERS --------------------------------') 58 print('-------------------------------- PARAMETERS --------------------------------')
62 print('Path of output from CoreNLP: ' + str(options.inputPath)) 59 print('Path of output from CoreNLP: ' + str(options.inputPath))
63 print('Path to place output figures: ' + str(options.outputPath)) 60 print('Path to place output figures: ' + str(options.outputPath))
64 - print('Specific or first part of figurename: ' + str(options.figureName)) 61 + print('Figurename: ' + str(options.figureName))
65 - print('CRF-script version: ' + str(options.version))
66 -
67 print('-------------------------------- PROCESSING --------------------------------') 62 print('-------------------------------- PROCESSING --------------------------------')
63 + reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
64 + print(','.join(reportFileList))
68 65
69 - rawInputRepotsList = str(options.inputFile).split(',') 66 + for inputFile in reportFileList:
70 - reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ] 67 + scores = df(dict)
71 - scores = df(dict) 68 + for report in reportFileList:
72 - #CV={} 69 + with open(os.path.join(options.inputPath, report), 'r') as File:
73 - print('Report files: ' + str(options.inputFile )) 70 + string = File.read()
74 - print('\n'.join(reportFileList)) 71 + scores[report[7:11]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
75 - print('----------------------------------- NOTE -----------------------------------') 72 + summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
76 - print('\n-------- All chosen report files should be in inputPath given---------------\n') 73 + scores[report[7:11]]['precision']=summaryScores[0]
74 + scores[report[7:11]]['recall']=summaryScores[1]
75 + scores[report[7:11]]['f1-score']=summaryScores[2]
77 76
78 - print('------------------------------- SAVING DATA --------------------------------\n')
79 - for report in reportFileList:
80 - with open(os.path.join(options.inputPath, report), 'r') as File:
81 - string = File.read()
82 - scores[report[7:11]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
83 - summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
84 - scores[report[7:11]]['precision']=summaryScores[0]
85 - scores[report[7:11]]['recall']=summaryScores[1]
86 - scores[report[7:11]]['f1-score']=summaryScores[2]
87 -
88 print(DF(scores).T) 77 print(DF(scores).T)
89 - print('------------------------------- SAVING TABLE --------------------------------\n') 78 + scoresTable = DF(scores).T
90 - with open(os.path.join(options.inputPath, str(options.figureName) ), 'w') as File: 79 + print('------------------------------- SAVING DATA --------------------------------')
91 - 80 + print('Saving score-table: ' + str(options.table))
92 - scoresTable = DF(scores).T 81 + imageName = os.path.join(options.outputPath, options.figureName)
93 - 82 + savescreen(options.table, scores, imageName)
94 - imageName=os.path.join(options.outputPath, options.figureName) 83 + fig = plt.figure()
95 - ylab = "score", 84 + fig.set_figheight(13)
96 - fig = plt.figure() 85 + fig.set_figwidth(20)
97 - plt.grid(False) 86 + plt.ylim(0.7, 1.1)
98 - plt.rcParams.update({'font.size': 15}) 87 + plt.xlabel("Runs")
99 - fig.set_figheight(13) 88 + plt.ylabel("score")
100 - fig.set_figwidth(20) 89 + plt.rcParams.update()
101 - plt.xlabel("Runs") 90 + plt.grid()
102 - plt.ylabel("score") 91 + plt.plot(scoresTable['precision'],'o--', label='precision', linewidth=3, markersize=15)
103 - plt.xticks(range(8),scoresTable["CV"].index) 92 + plt.plot(scoresTable['f1-score'], 'o--', label='F1', linewidth=3, markersize=15)
104 - plt.plot(scoresTable['CV'], "--", color="red", label="CV") 93 + plt.plot(scoresTable['recall'], 'o--', label='recall' , linewidth=3, markersize=15)
105 - plt.plot(scoresTable['precision'], color="blue", label="precision") 94 + plt.plot(scoresTable['CV'], 'o--', label='CV' , linewidth=3, markersize=15)
106 - plt.plot(scoresTable['f1-score'], color="orange", label="F1") 95 + plt.legend(loc='lower right')
107 - plt.plot(scoresTable['recall'], color="g", label="recall") 96 + plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
108 - plt.legend(loc='lower right') 97 + fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
109 - plt.tight_layout()
110 - fig.savefig(imageName, pad_inches=0.5)
111 -
112 -
113 -
114 -
115 -
116 -
......
1 +# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +######### BEST MODELS ##########
8 +
9 +# Run1
10 +# Todas las condiciones
11 +dfa <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=15),
12 + Strategy=rep(c(
13 + "Agit",
14 + "Gversion",
15 + "Substrain",
16 + "Vess",
17 + "OD",
18 + "Anti",
19 + "Supp",
20 + "Air",
21 + "Gtype",
22 + "Med",
23 + "Temp",
24 + "Technique",
25 + "Phase",
26 + "pH",
27 + "Strain"
28 + ),3),
29 + Score=c(
30 + 0,
31 + 0,
32 + 0,
33 + 0,
34 + 1,
35 + 1,
36 + 0.883,
37 + 0.92,
38 + 0.905,
39 + 0.852,
40 + 0.818,
41 + 0.88,
42 + 1,
43 + 1,
44 + 1,
45 + 0,
46 + 0,
47 + 0,
48 + 0,
49 + 0.405,
50 + 0.444,
51 + 0.669,
52 + 0.742,
53 + 0.811,
54 + 0.912,
55 + 1,
56 + 1,
57 + 0.947,
58 + 1,
59 + 1,
60 + 0,
61 + 0,
62 + 0,
63 + 0,
64 + 0.577,
65 + 0.615,
66 + 0.762,
67 + 0.821,
68 + 0.856,
69 + 0.881,
70 + 0.9,
71 + 0.936,
72 + 0.973,
73 + 1,
74 + 1
75 + ))
76 +
77 +# Solo condiciones con F1-score > 0
78 +# Run 1
79 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
80 + Strategy=rep(c(
81 + "OD",
82 + "Anti",
83 + "Supp",
84 + "Air",
85 + "Gtype",
86 + "Med",
87 + "Temp",
88 + "Technique",
89 + "Phase",
90 + "pH",
91 + "Strain"
92 + ),3),
93 + Score=c(
94 + 1,
95 + 1,
96 + 0.883,
97 + 0.92,
98 + 0.905,
99 + 0.852,
100 + 0.818,
101 + 0.88,
102 + 1,
103 + 1,
104 + 1,
105 + 0.405,
106 + 0.444,
107 + 0.669,
108 + 0.742,
109 + 0.811,
110 + 0.912,
111 + 1,
112 + 1,
113 + 0.947,
114 + 1,
115 + 1,
116 + 0.577,
117 + 0.615,
118 + 0.762,
119 + 0.821,
120 + 0.856,
121 + 0.881,
122 + 0.9,
123 + 0.936,
124 + 0.973,
125 + 1,
126 + 1
127 + ))
128 +
129 +head(df)
130 +
131 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
132 + geom_line(aes(color=Measure))+
133 + geom_point(aes(color=Measure))+
134 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
135 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
136 + #geom_text(aes(label = Score))+
137 + labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
138 + theme(
139 + legend.position="top",
140 + # Centrar título: plot.title = element_text(hjust = 0.5),
141 + axis.line = element_line(colour = "gray"),
142 + panel.background = element_blank(),
143 + panel.grid.major = element_blank(),
144 + panel.grid.minor = element_blank(),
145 + panel.border = element_blank()
146 + )
147 +pa
148 +
149 +ggsave(".png")
150 +
151 +# Solo condiciones con F1-score > 0
152 +# Run 7
153 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
154 + Strategy=rep(c(
155 + "Anti",
156 + "OD",
157 + "Supp",
158 + "Air",
159 + "Gtype",
160 + "Temp",
161 + "Med",
162 + "Technique",
163 + "Phase",
164 + "pH",
165 + "Strain"
166 + ),3),
167 + Score=c(
168 + 0.571,
169 + 1,
170 + 0.886,
171 + 0.939,
172 + 0.876,
173 + 0.818,
174 + 0.897,
175 + 0.952,
176 + 1,
177 + 1,
178 + 1,
179 + 0.444,
180 + 0.405,
181 + 0.684,
182 + 0.742,
183 + 0.802,
184 + 1,
185 + 0.912,
186 + 0.909,
187 + 0.947,
188 + 1,
189 + 1,
190 + 0.5,
191 + 0.577,
192 + 0.772,
193 + 0.829,
194 + 0.837,
195 + 0.9,
196 + 0.904,
197 + 0.93,
198 + 0.973,
199 + 1,
200 + 1
201 + ))
202 +
203 +head(df)
204 +
205 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
206 + geom_line(aes(color=Measure))+
207 + geom_point(aes(color=Measure))+
208 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
209 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
210 + #geom_text(aes(label = Score))+
211 + labs(title="Scores by condition (Best model, Run7)",x="Condition", y = "Score")+
212 + theme(
213 + legend.position="top",
214 + # Centrar título: plot.title = element_text(hjust = 0.5),
215 + axis.line = element_line(colour = "gray"),
216 + panel.background = element_blank(),
217 + panel.grid.major = element_blank(),
218 + panel.grid.minor = element_blank(),
219 + panel.border = element_blank()
220 + )
221 +pa
222 +
223 +ggsave(".png")
224 +
1 +# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +######### BEST MODEL ##########
8 +
9 +# Solo condiciones con F1-score > 0
10 +# Run 6 (report_Run6_v11.txt)
11 +df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
12 + Strategy=rep(c(
13 + "Air",
14 + "Anti",
15 + "Gtype",
16 + "Med",
17 + "OD",
18 + "pH",
19 + "Phase",
20 + "Supp",
21 + "Technique",
22 + "Temp",
23 + "Vess"
24 + ),3),
25 + Score=c(
26 + 0.565,
27 + 1,
28 + 0.889,
29 + 1,
30 + 1,
31 + 1,
32 + 0.882,
33 + 0.811,
34 + 1,
35 + 0.923,
36 + 1,
37 + 0.377,
38 + 1,
39 + 0.847,
40 + 0.943,
41 + 0.818,
42 + 1,
43 + 1,
44 + 0.799,
45 + 0.913,
46 + 0.828,
47 + 1,
48 + 0.452,
49 + 1,
50 + 0.867,
51 + 0.971,
52 + 0.9,
53 + 1,
54 + 0.938,
55 + 0.805,
56 + 0.955,
57 + 0.873,
58 + 1
59 + ))
60 +
61 +head(df)
62 +
63 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
64 + geom_line(aes(color=Measure))+
65 + geom_point(aes(color=Measure))+
66 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
67 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
68 + #geom_text(aes(label = Score))+
69 + labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
70 + theme(
71 + legend.position="top",
72 + # Centrar título: plot.title = element_text(hjust = 0.5),
73 + axis.line = element_line(colour = "gray"),
74 + panel.background = element_blank(),
75 + panel.grid.major = element_blank(),
76 + panel.grid.minor = element_blank(),
77 + panel.border = element_blank()
78 + )
79 +pa
80 +
81 +ggsave(".png")
82 +
1 +# Based on http://zevross.com/blog/2019/04/02/easy-multi-panel-plots-in-r-using-facet_wrap-and-facet_grid-from-ggplot2/
2 +
3 +library(ggplot2)
4 +#library(ggpubr)
5 +#library(cowplot)
6 +
7 +organism = 'ECO'
8 +
9 +if (organism == 'ECO') {
10 +######### ECO DEVELOPMENT DATASET ##########
11 +
12 +# ECO-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in E. coli development dataset
13 +df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
14 + Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
15 + Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
16 + Score=c(
17 + 0.78, 0.79, 0.81, 0.81, 0.41, 0.56, 0.63, 0.63, 0.53, 0.65, 0.71, 0.71,
18 + 0.78, 0.89, 0.93, 1.00, 0.41, 0.35, 0.13, 0.01, 0.53, 0.50, 0.23, 0.02,
19 + 0.82, 0.82, 0.84, 0.84, 0.55, 0.66, 0.72, 0.72, 0.66, 0.73, 0.78, 0.78,
20 + 0.82, 0.88, 0.94, 1.00, 0.55, 0.39, 0.20, 0.01, 0.66, 0.54, 0.33, 0.02))
21 +filename = "ECO-dev-multi-panel.png"
22 +title_plot = "E. coli development dataset"
23 +} else if (organism == 'STM')
24 +{
25 +######### STM DEVELOPMENT DATASET ##########
26 +
27 +# STM-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in Salmonella evaluation dataset
28 +df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
29 + Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
30 + Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
31 + Score=c(
32 + 0.78, 0.77, 0.76, 0.76, 0.33, 0.49, 0.54, 0.54, 0.47, 0.60, 0.63, 0.63,
33 + 0.78, 0.81, 0.70, 0.88, 0.33, 0.33, 0.10, 0.01, 0.47, 0.47, 0.18, 0.02,
34 + 0.84, 0.82, 0.81, 0.81, 0.47, 0.59, 0.65, 0.65, 0.60, 0.68, 0.72, 0.72,
35 + 0.84, 0.84, 0.77, 0.86, 0.47, 0.40, 0.17, 0.01, 0.60, 0.55, 0.27, 0.02))
36 +filename = "STM-dev-multi-panel.png"
37 +title_plot = "Salmonella evaluation dataset"
38 +}
39 +
40 +head(df)
41 +
42 +pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
43 + geom_line(aes(color=Measure))+
44 + geom_point(aes(color=Measure))+
45 + scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
46 + #scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
47 + geom_text(aes(label = Score))+
48 + labs(title=title_plot,x="Strategies", y = "Score")+
49 + #theme_classic()+
50 + theme(
51 + legend.position="top",
52 + # Centrar título: plot.title = element_text(hjust = 0.5),
53 + axis.line = element_line(colour = "gray"),
54 + panel.background = element_blank(),
55 + panel.grid.major = element_blank(),
56 + panel.grid.minor = element_blank(),
57 + panel.border = element_blank(),
58 + )+
59 + facet_wrap(~Panel, scale="free")
60 +
61 +ggsave(filename)
62 +
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.
1 # -*- coding: UTF-8 -*- 1 # -*- coding: UTF-8 -*-
2 2
3 import os 3 import os
4 -from itertools import chain 4 +#from itertools import chain
5 from optparse import OptionParser 5 from optparse import OptionParser
6 from time import time 6 from time import time
7 from collections import Counter 7 from collections import Counter
......
This diff is collapsed. Click to expand it.