Estefani Gaytan Nunez

upload

...@@ -110,7 +110,6 @@ if __name__ == '__main__': ...@@ -110,7 +110,6 @@ if __name__ == '__main__':
110 recall[k].append(float(tags[k][1])) 110 recall[k].append(float(tags[k][1]))
111 fscore[k].append(float(tags[k][2])) 111 fscore[k].append(float(tags[k][2]))
112 #support[k].append(tags[k][3]) 112 #support[k].append(tags[k][3])
113 - print(DF(precision))
114 #================================HEATMAP================================# 113 #================================HEATMAP================================#
115 ''' 114 '''
116 plt.clf() 115 plt.clf()
...@@ -126,42 +125,42 @@ if __name__ == '__main__': ...@@ -126,42 +125,42 @@ if __name__ == '__main__':
126 heatmap(DF(precision)) 125 heatmap(DF(precision))
127 fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5) 126 fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
128 ''' 127 '''
129 - #print(precision) 128 +
130 - #lines = ['-', '--', '-.', ':', '.', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_'] 129 +
131 - #================================SINGLE PLOT================================ 130 + print('-----------------------------------PRECISION------------------------------------------------')
131 + print(DF(precision))
132 + #================================ PRECISION ================================
132 lines = ['-','--','-.',':','o','v','^','<','>','s','p','*','H','+','x','D','|'] 133 lines = ['-','--','-.',':','o','v','^','<','>','s','p','*','H','+','x','D','|']
133 - imageName = str(os.path.join(options.outputPath, options.figureName)) + '_' + str(options.version) 134 + imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_' + str(options.version)
134 fig = plt.figure() 135 fig = plt.figure()
135 plt.rcParams.update({'font.size': 15}) 136 plt.rcParams.update({'font.size': 15})
137 + plt.title('Precision')
136 fig.set_figheight(13) 138 fig.set_figheight(13)
137 fig.set_figwidth(20) 139 fig.set_figwidth(20)
138 plt.xlabel("Runs") 140 plt.xlabel("Runs")
139 plt.ylabel("score") 141 plt.ylabel("score")
140 plt.ylim(-0.2, 1.2) 142 plt.ylim(-0.2, 1.2)
141 - #lines=['-', '--', '-.', ':', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_']
142 - #lines = [ 'r--', 'rs', 'r^', 'r:', 'rH', 'g--', 'gs', 'g^', 'g|', 'gH' , 'b--', 'bs', 'b^', 'b|', 'bH', 'r+']
143 lines = [ 'r--', 'r-.', 'r:', 'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:'] 143 lines = [ 'r--', 'r-.', 'r:', 'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']
144 for i,k in enumerate(tags.keys()): 144 for i,k in enumerate(tags.keys()):
145 plt.grid(False) 145 plt.grid(False)
146 plt.plot(precision[k], lines[i], label=k, linewidth=4) 146 plt.plot(precision[k], lines[i], label=k, linewidth=4)
147 for a,b in zip(range(8), precision[k]): 147 for a,b in zip(range(8), precision[k]):
148 plt.text(a, b+0.03, str(b), fontsize=10) 148 plt.text(a, b+0.03, str(b), fontsize=10)
149 -
150 plt.legend(loc='lower right') 149 plt.legend(loc='lower right')
151 plt.tight_layout() 150 plt.tight_layout()
152 plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8']) 151 plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
153 fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5) 152 fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
154 - 153 + #VARIANTS
155 - imageName = str(os.path.join(options.outputPath, options.figureName)) + '_variants_' + str(options.version) 154 + imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_variants_' + str(options.version)
156 fig = plt.figure() 155 fig = plt.figure()
157 plt.rcParams.update({'font.size': 15}) 156 plt.rcParams.update({'font.size': 15})
157 + plt.title('Precision')
158 fig.set_figheight(13) 158 fig.set_figheight(13)
159 fig.set_figwidth(20) 159 fig.set_figwidth(20)
160 plt.xlabel("Runs") 160 plt.xlabel("Runs")
161 plt.ylabel("score") 161 plt.ylabel("score")
162 plt.ylim(0.4, 1.2) 162 plt.ylim(0.4, 1.2)
163 variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ] 163 variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
164 - #lines = [ 'r^', 'ro', 'g^', 'go', 'b^', 'bo' , 'm^', 'mo', 'c^', 'co', 'ch', 'rh', 'gh', 'bh','mh']
165 lines = ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:'] 164 lines = ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
166 for i,k in enumerate(variantTags): 165 for i,k in enumerate(variantTags):
167 plt.grid(False) 166 plt.grid(False)
......
1 +from optparse import OptionParser
2 +import re
3 +from collections import defaultdict as df
4 +import os
5 +import random
6 +from pandas import DataFrame as DF
7 +#from seaborn import heatmap
8 +import numpy as np
9 +import numpy.random
10 +import matplotlib.pyplot as plt
11 +
12 +# Objective
13 +# Drawn figures of grid reports
14 +#
15 +# Input parameters
16 +# --inputPath=PATH Path of inputfiles
17 +# --outputPath=PATH Path to place output figures
18 +# --figureName single run specific name figure, multifigure first part of name
19 +# --inputFile Use it for a single report
20 +# --version CRF-script version of reports
21 +#
22 +# Output
23 +# training and test data set
24 +#
25 +# Examples
26 +# python figures-reports.py
27 +# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/
28 +# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/
29 +# --figureName FiguresGrid
30 +# --inputFile report_Run1_v11.txt
31 +# --version v11
32 +
33 +# python figures-tag-report.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v11 --version v11
34 +__author__ = 'egaytan'
35 +
36 +####################################################################################
37 +# FUNCTIONS #
38 +####################################################################################
39 +def Filter(rfile, options,v):
40 + if options[0]=='all':
41 + if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True)
42 + elif rfile in options:
43 + return(True)
44 + return(False)
45 +
46 +def figures(score, tags, title, imageName, imageNamev):
47 + fig = plt.figure()
48 + plt.rcParams.update({'font.size': 15})
49 + plt.title(title)
50 + fig.set_figheight(13)
51 + fig.set_figwidth(20)
52 + plt.xlabel("Runs")
53 + plt.ylabel("score")
54 + plt.ylim(-0.2, 1.2)
55 + lines = [ 'r--', 'r-.', 'r:', 'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']
56 + for i,k in enumerate(tags.keys()):
57 + plt.grid(False)
58 + plt.plot(score[k], lines[i], label=k, linewidth=4)
59 + for a,b in zip(range(8), score[k]):
60 + plt.text(a, b+0.03, str(b), fontsize=10)
61 + plt.legend(loc='lower right')
62 + plt.tight_layout()
63 + plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
64 + fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
65 +
66 + #-----------------------------------------VARIANTS---------------------------------------------
67 + fig = plt.figure()
68 + plt.rcParams.update({'font.size': 15})
69 + plt.title('score')
70 + fig.set_figheight(13)
71 + fig.set_figwidth(20)
72 + plt.xlabel("Runs")
73 + plt.ylabel("score")
74 + plt.ylim(0.4, 1.2)
75 + variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
76 + lines = ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
77 + for i,k in enumerate(variantTags):
78 + plt.grid(False)
79 + plt.plot(score[k], lines[i], label=k, linewidth=4)
80 + for a,b in zip(range(8), score[k]):
81 + plt.text(a, b+0.03, str(b), fontsize=10)
82 + plt.legend(loc='lower right')
83 + plt.tight_layout()
84 + plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
85 + fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
86 +
87 +
88 +####################################################################################
89 +# MAIN PROGRAM #
90 +####################################################################################
91 +
92 +if __name__ == '__main__':
93 + # Defining parameters
94 + parser = OptionParser()
95 + parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
96 + parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
97 + parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
98 + parser.add_option('--version', dest='version', help='script version', metavar='FILE')
99 + parser.add_option('--inputFile', dest='inputFile', help='Use it for a specific report files', metavar='FILE', default='all,')
100 +
101 + (options, args) = parser.parse_args()
102 + if len(args) > 0:
103 + parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
104 + sys.exit(1)
105 +
106 + print('-------------------------------- PARAMETERS --------------------------------')
107 + print('Path of output from CoreNLP: ' + str(options.inputPath))
108 + print('Path to place output figures: ' + str(options.outputPath))
109 + print('Specific or first part of figurename: ' + str(options.figureName))
110 + print('CRF-script version: ' + str(options.version))
111 +
112 + print('-------------------------------- PROCESSING --------------------------------')
113 +
114 + rawInputRepotsList = str(options.inputFile).split(',')
115 + reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ]
116 + scores = df(dict)
117 + #CV={}
118 + print('Report files: ' + str(options.inputFile ))
119 + print('\n'.join(reportFileList))
120 + print('----------------------------------- NOTE -----------------------------------')
121 + print('\n-------- All chosen report files should be in inputPath given---------------\n')
122 +
123 + print('------------------------------- SAVING DATA --------------------------------\n')
124 + OD, pH, Technique, Med, Temp, Vess, Agit, Phase, Air, Anti, Strain, Gtype, Substrain, Supp, Gversion = [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
125 +
126 + precision = df(list)
127 + recall = df(list)
128 + fscore = df(list)
129 + support = df(list)
130 + for report in reportFileList:
131 + tags = {}
132 + with open(os.path.join(options.inputPath, report), 'r') as File:
133 + string = File.read()
134 + tags['OD']= re.findall('OD\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
135 + tags['pH']= re.findall('pH\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
136 + tags['Technique']= re.findall('Technique\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
137 + tags['Med']= re.findall('Med\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
138 + tags['Temp']= re.findall('Temp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
139 + tags['Vess']= re.findall('Vess\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
140 + tags['Agit']= re.findall('Agit\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
141 + tags['Phase']= re.findall('Phase\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
142 + tags['Air']= re.findall('Air\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
143 + tags['Anti']= re.findall('Anti\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
144 + tags['Strain']= re.findall('Strain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
145 + tags['Gtype']= re.findall('Gtype\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
146 + tags['Substrain']= re.findall('Substrain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
147 + tags['Supp']= re.findall('Supp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
148 + tags['Gversion']= re.findall('Gversion\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
149 +
150 + for k in tags.keys():
151 + precision[k].append(float(tags[k][0]))
152 + recall[k].append(float(tags[k][1]))
153 + fscore[k].append(float(tags[k][2]))
154 +
155 +
156 +print('------------------------------------------PRECISION----------------------------------------')
157 +print(DF(precision))
158 +imageName = str(os.path.join(options.outputPath, options.figureName)) + '_precision_' + str(options.version)
159 +imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_precision_variants_' + str(options.version)
160 +figures(precision, tags, 'precision', imageName, imageNamev)
161 +
162 +print('-------------------------------------------RECALL-----------------------------------------')
163 +print(DF(recall))
164 +imageName = str(os.path.join(options.outputPath, options.figureName)) + '_recall_' + str(options.version)
165 +imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_recall_variants_' + str(options.version)
166 +figures(recall, tags, 'recall', imageName, imageNamev)
167 +
168 +print('-------------------------------------------FSCORE-----------------------------------------')
169 +print(DF(fscore))
170 +imageName = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_' + str(options.version)
171 +imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_variants_' + str(options.version)
172 +figures(recall, tags, 'recall', imageName, imageNamev)
...@@ -423,7 +423,7 @@ if __name__ == "__main__": ...@@ -423,7 +423,7 @@ if __name__ == "__main__":
423 # Saving model 423 # Saving model
424 print(" Saving training model...") 424 print(" Saving training model...")
425 t1 = time() 425 t1 = time()
426 - #nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '_S3_' + str(options.S3) + '.mod' 426 +
427 nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + '_S3_' + str(options.S3) + '_' + str(options.Gridname) + str(options.version) + '.mod' 427 nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + '_S3_' + str(options.S3) + '_' + str(options.Gridname) + str(options.version) + '.mod'
428 joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel)) 428 joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
429 print(" Saving training model done in: %fs" % (time() - t1)) 429 print(" Saving training model done in: %fs" % (time() - t1))
......