Estefani Gaytan Nunez

upload

......@@ -10,35 +10,33 @@ import matplotlib.pyplot as plt
# Drawn figures of grid reports
#
# Input parameters
# --inputPath=PATH Path of inputfiles
# --outputPath=PATH Path to place output figures
# --inputPath Path of inputfiles
# --outputPath Path to place output figures
# --figureName single run specific name figure, multifigure first part of name
# --inputFile Use it for a single report
# --version CRF-script version of reports
# --join boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --inputFile report_Run1_v11.txt
# -version v11
# --join
# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v1 --inputFile report_Run1_v11.txt ..version v11
# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'
####################################################################################
# FUNCTIONS #
####################################################################################
def Filter(rfile, options,v):
if options[0]=='all':
if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True)
elif rfile in options:
return(True)
return(False)
def savescreen(output, dic, path):
if output:
DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
####################################################################################
# MAIN PROGRAM #
......@@ -50,8 +48,7 @@ if __name__ == '__main__':
parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
parser.add_option('--version', dest='version', help='script version', metavar='FILE')
parser.add_option('--inputFile', dest='inputFile', help='Use it for a specific report files', metavar='FILE', default='all,')
parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
(options, args) = parser.parse_args()
if len(args) > 0:
......@@ -61,21 +58,13 @@ if __name__ == '__main__':
print('-------------------------------- PARAMETERS --------------------------------')
print('Path of output from CoreNLP: ' + str(options.inputPath))
print('Path to place output figures: ' + str(options.outputPath))
print('Specific or first part of figurename: ' + str(options.figureName))
print('CRF-script version: ' + str(options.version))
print('Figurename: ' + str(options.figureName))
print('-------------------------------- PROCESSING --------------------------------')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
print(','.join(reportFileList))
rawInputRepotsList = str(options.inputFile).split(',')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ]
for inputFile in reportFileList:
scores = df(dict)
#CV={}
print('Report files: ' + str(options.inputFile ))
print('\n'.join(reportFileList))
print('----------------------------------- NOTE -----------------------------------')
print('\n-------- All chosen report files should be in inputPath given---------------\n')
print('------------------------------- SAVING DATA --------------------------------\n')
for report in reportFileList:
with open(os.path.join(options.inputPath, report), 'r') as File:
string = File.read()
......@@ -86,31 +75,23 @@ if __name__ == '__main__':
scores[report[7:11]]['f1-score']=summaryScores[2]
print(DF(scores).T)
print('------------------------------- SAVING TABLE --------------------------------\n')
with open(os.path.join(options.inputPath, str(options.figureName) ), 'w') as File:
scoresTable = DF(scores).T
imageName=os.path.join(options.outputPath, options.figureName)
ylab = "score",
print('------------------------------- SAVING DATA --------------------------------')
print('Saving score-table: ' + str(options.table))
imageName = os.path.join(options.outputPath, options.figureName)
savescreen(options.table, scores, imageName)
fig = plt.figure()
plt.grid(False)
plt.rcParams.update({'font.size': 15})
fig.set_figheight(13)
fig.set_figwidth(20)
plt.ylim(0.7, 1.1)
plt.xlabel("Runs")
plt.ylabel("score")
plt.xticks(range(8),scoresTable["CV"].index)
plt.plot(scoresTable['CV'], "--", color="red", label="CV")
plt.plot(scoresTable['precision'], color="blue", label="precision")
plt.plot(scoresTable['f1-score'], color="orange", label="F1")
plt.plot(scoresTable['recall'], color="g", label="recall")
plt.rcParams.update()
plt.grid()
plt.plot(scoresTable['precision'],'o--', label='precision', linewidth=3, markersize=15)
plt.plot(scoresTable['f1-score'], 'o--', label='F1', linewidth=3, markersize=15)
plt.plot(scoresTable['recall'], 'o--', label='recall' , linewidth=3, markersize=15)
plt.plot(scoresTable['CV'], 'o--', label='CV' , linewidth=3, markersize=15)
plt.legend(loc='lower right')
plt.tight_layout()
fig.savefig(imageName, pad_inches=0.5)
plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
......
# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
library(ggplot2)
#library(ggpubr)
#library(cowplot)
######### BEST MODELS ##########
# Run1
# Todas las condiciones
dfa <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=15),
Strategy=rep(c(
"Agit",
"Gversion",
"Substrain",
"Vess",
"OD",
"Anti",
"Supp",
"Air",
"Gtype",
"Med",
"Temp",
"Technique",
"Phase",
"pH",
"Strain"
),3),
Score=c(
0,
0,
0,
0,
1,
1,
0.883,
0.92,
0.905,
0.852,
0.818,
0.88,
1,
1,
1,
0,
0,
0,
0,
0.405,
0.444,
0.669,
0.742,
0.811,
0.912,
1,
1,
0.947,
1,
1,
0,
0,
0,
0,
0.577,
0.615,
0.762,
0.821,
0.856,
0.881,
0.9,
0.936,
0.973,
1,
1
))
# Solo condiciones con F1-score > 0
# Run 1
df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
Strategy=rep(c(
"OD",
"Anti",
"Supp",
"Air",
"Gtype",
"Med",
"Temp",
"Technique",
"Phase",
"pH",
"Strain"
),3),
Score=c(
1,
1,
0.883,
0.92,
0.905,
0.852,
0.818,
0.88,
1,
1,
1,
0.405,
0.444,
0.669,
0.742,
0.811,
0.912,
1,
1,
0.947,
1,
1,
0.577,
0.615,
0.762,
0.821,
0.856,
0.881,
0.9,
0.936,
0.973,
1,
1
))
head(df)
pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
geom_line(aes(color=Measure))+
geom_point(aes(color=Measure))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
#scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
#geom_text(aes(label = Score))+
labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
theme(
legend.position="top",
# Centrar título: plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "gray"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
pa
ggsave(".png")
# Solo condiciones con F1-score > 0
# Run 7
df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
Strategy=rep(c(
"Anti",
"OD",
"Supp",
"Air",
"Gtype",
"Temp",
"Med",
"Technique",
"Phase",
"pH",
"Strain"
),3),
Score=c(
0.571,
1,
0.886,
0.939,
0.876,
0.818,
0.897,
0.952,
1,
1,
1,
0.444,
0.405,
0.684,
0.742,
0.802,
1,
0.912,
0.909,
0.947,
1,
1,
0.5,
0.577,
0.772,
0.829,
0.837,
0.9,
0.904,
0.93,
0.973,
1,
1
))
head(df)
pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
geom_line(aes(color=Measure))+
geom_point(aes(color=Measure))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
#scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
#geom_text(aes(label = Score))+
labs(title="Scores by condition (Best model, Run7)",x="Condition", y = "Score")+
theme(
legend.position="top",
# Centrar título: plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "gray"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
pa
ggsave(".png")
# Based on http://www.sthda.com/english/wiki/ggplot2-line-plot-quick-start-guide-r-software-and-data-visualization
library(ggplot2)
#library(ggpubr)
#library(cowplot)
######### BEST MODEL ##########
# Solo condiciones con F1-score > 0
# Run 6 (report_Run6_v11.txt)
df <- data.frame(Measure=rep(c("Precision", "Recall", "F1-score"), each=11),
Strategy=rep(c(
"Air",
"Anti",
"Gtype",
"Med",
"OD",
"pH",
"Phase",
"Supp",
"Technique",
"Temp",
"Vess"
),3),
Score=c(
0.565,
1,
0.889,
1,
1,
1,
0.882,
0.811,
1,
0.923,
1,
0.377,
1,
0.847,
0.943,
0.818,
1,
1,
0.799,
0.913,
0.828,
1,
0.452,
1,
0.867,
0.971,
0.9,
1,
0.938,
0.805,
0.955,
0.873,
1
))
head(df)
pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
geom_line(aes(color=Measure))+
geom_point(aes(color=Measure))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
#scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
#geom_text(aes(label = Score))+
labs(title="Scores by condition (Best model, Run1)",x="Condition", y = "Score")+
theme(
legend.position="top",
# Centrar título: plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "gray"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank()
)
pa
ggsave(".png")
# Based on http://zevross.com/blog/2019/04/02/easy-multi-panel-plots-in-r-using-facet_wrap-and-facet_grid-from-ggplot2/
library(ggplot2)
#library(ggpubr)
#library(cowplot)
organism = 'ECO'
if (organism == 'ECO') {
######### ECO DEVELOPMENT DATASET ##########
# ECO-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in E. coli development dataset
df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
Score=c(
0.78, 0.79, 0.81, 0.81, 0.41, 0.56, 0.63, 0.63, 0.53, 0.65, 0.71, 0.71,
0.78, 0.89, 0.93, 1.00, 0.41, 0.35, 0.13, 0.01, 0.53, 0.50, 0.23, 0.02,
0.82, 0.82, 0.84, 0.84, 0.55, 0.66, 0.72, 0.72, 0.66, 0.73, 0.78, 0.78,
0.82, 0.88, 0.94, 1.00, 0.55, 0.39, 0.20, 0.01, 0.66, 0.54, 0.33, 0.02))
filename = "ECO-dev-multi-panel.png"
title_plot = "E. coli development dataset"
} else if (organism == 'STM')
{
######### STM DEVELOPMENT DATASET ##########
# STM-DEV-WITH-EFFECT-COMBINATION: Combination of strategies with effect in Salmonella evaluation dataset
df <- data.frame(Panel=rep(c("Combination of strategies (effect)", "Separated strategies (effect)", "Combination of strategies (no effect)", "Separated strategies (no effect)"), each=12),
Measure=rep(c("Precision", "Recall", "F1-score"), each=4),
Strategy=c(rep(c("D", "D+V", "D+V+At", "D+V+At+Au"),3),rep(c("D", "V", "At", "Au"),3)),
Score=c(
0.78, 0.77, 0.76, 0.76, 0.33, 0.49, 0.54, 0.54, 0.47, 0.60, 0.63, 0.63,
0.78, 0.81, 0.70, 0.88, 0.33, 0.33, 0.10, 0.01, 0.47, 0.47, 0.18, 0.02,
0.84, 0.82, 0.81, 0.81, 0.47, 0.59, 0.65, 0.65, 0.60, 0.68, 0.72, 0.72,
0.84, 0.84, 0.77, 0.86, 0.47, 0.40, 0.17, 0.01, 0.60, 0.55, 0.27, 0.02))
filename = "STM-dev-multi-panel.png"
title_plot = "Salmonella evaluation dataset"
}
head(df)
pa<-ggplot(df, aes(x=Strategy, y=Score, group=Measure)) +
geom_line(aes(color=Measure))+
geom_point(aes(color=Measure))+
scale_color_manual(values=c("#999999", "#E69F00", "#56B4E9"))+
#scale_color_manual(values=c("#e6194b", "#3cb44b", "#0082c8"))+
geom_text(aes(label = Score))+
labs(title=title_plot,x="Strategies", y = "Score")+
#theme_classic()+
theme(
legend.position="top",
# Centrar título: plot.title = element_text(hjust = 0.5),
axis.line = element_line(colour = "gray"),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
)+
facet_wrap(~Panel, scale="free")
ggsave(filename)
This diff is collapsed. Click to expand it.
This diff could not be displayed because it is too large.
# -*- coding: UTF-8 -*-
import os
from itertools import chain
#from itertools import chain
from optparse import OptionParser
from time import time
from collections import Counter
......
This diff is collapsed. Click to expand it.