Estefani Gaytan Nunez

upload

......@@ -110,7 +110,6 @@ if __name__ == '__main__':
recall[k].append(float(tags[k][1]))
fscore[k].append(float(tags[k][2]))
#support[k].append(tags[k][3])
print(DF(precision))
#================================HEATMAP================================#
'''
plt.clf()
......@@ -126,42 +125,42 @@ if __name__ == '__main__':
heatmap(DF(precision))
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
'''
#print(precision)
#lines = ['-', '--', '-.', ':', '.', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_']
#================================SINGLE PLOT================================
print('-----------------------------------PRECISION------------------------------------------------')
print(DF(precision))
#================================ PRECISION ================================
lines = ['-','--','-.',':','o','v','^','<','>','s','p','*','H','+','x','D','|']
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_' + str(options.version)
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_' + str(options.version)
fig = plt.figure()
plt.rcParams.update({'font.size': 15})
plt.title('Precision')
fig.set_figheight(13)
fig.set_figwidth(20)
plt.xlabel("Runs")
plt.ylabel("score")
plt.ylim(-0.2, 1.2)
#lines=['-', '--', '-.', ':', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_']
#lines = [ 'r--', 'rs', 'r^', 'r:', 'rH', 'g--', 'gs', 'g^', 'g|', 'gH' , 'b--', 'bs', 'b^', 'b|', 'bH', 'r+']
lines = [ 'r--', 'r-.', 'r:', 'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']
for i,k in enumerate(tags.keys()):
plt.grid(False)
plt.plot(precision[k], lines[i], label=k, linewidth=4)
for a,b in zip(range(8), precision[k]):
plt.text(a, b+0.03, str(b), fontsize=10)
plt.legend(loc='lower right')
plt.tight_layout()
plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_variants_' + str(options.version)
#VARIANTS
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_variants_' + str(options.version)
fig = plt.figure()
plt.rcParams.update({'font.size': 15})
plt.title('Precision')
fig.set_figheight(13)
fig.set_figwidth(20)
plt.xlabel("Runs")
plt.ylabel("score")
plt.ylim(0.4, 1.2)
variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
#lines = [ 'r^', 'ro', 'g^', 'go', 'b^', 'bo' , 'm^', 'mo', 'c^', 'co', 'ch', 'rh', 'gh', 'bh','mh']
lines = ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
for i,k in enumerate(variantTags):
plt.grid(False)
......
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
#from seaborn import heatmap
import numpy as np
import numpy.random
import matplotlib.pyplot as plt
# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath=PATH Path of inputfiles
# --outputPath=PATH Path to place output figures
# --figureName single run specific name figure, multifigure first part of name
# --inputFile Use it for a single report
# --version CRF-script version of reports
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/
# --figureName FiguresGrid
# --inputFile report_Run1_v11.txt
# --version v11
# python figures-tag-report.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v11 --version v11
__author__ = 'egaytan'
####################################################################################
# FUNCTIONS #
####################################################################################
def Filter(rfile, options,v):
if options[0]=='all':
if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True)
elif rfile in options:
return(True)
return(False)
def figures(score, tags, title, imageName, imageNamev):
fig = plt.figure()
plt.rcParams.update({'font.size': 15})
plt.title(title)
fig.set_figheight(13)
fig.set_figwidth(20)
plt.xlabel("Runs")
plt.ylabel("score")
plt.ylim(-0.2, 1.2)
lines = [ 'r--', 'r-.', 'r:', 'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']
for i,k in enumerate(tags.keys()):
plt.grid(False)
plt.plot(score[k], lines[i], label=k, linewidth=4)
for a,b in zip(range(8), score[k]):
plt.text(a, b+0.03, str(b), fontsize=10)
plt.legend(loc='lower right')
plt.tight_layout()
plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
#-----------------------------------------VARIANTS---------------------------------------------
fig = plt.figure()
plt.rcParams.update({'font.size': 15})
plt.title('score')
fig.set_figheight(13)
fig.set_figwidth(20)
plt.xlabel("Runs")
plt.ylabel("score")
plt.ylim(0.4, 1.2)
variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
lines = ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
for i,k in enumerate(variantTags):
plt.grid(False)
plt.plot(score[k], lines[i], label=k, linewidth=4)
for a,b in zip(range(8), score[k]):
plt.text(a, b+0.03, str(b), fontsize=10)
plt.legend(loc='lower right')
plt.tight_layout()
plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
####################################################################################
# MAIN PROGRAM #
####################################################################################
if __name__ == '__main__':
# Defining parameters
parser = OptionParser()
parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
parser.add_option('--version', dest='version', help='script version', metavar='FILE')
parser.add_option('--inputFile', dest='inputFile', help='Use it for a specific report files', metavar='FILE', default='all,')
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
sys.exit(1)
print('-------------------------------- PARAMETERS --------------------------------')
print('Path of output from CoreNLP: ' + str(options.inputPath))
print('Path to place output figures: ' + str(options.outputPath))
print('Specific or first part of figurename: ' + str(options.figureName))
print('CRF-script version: ' + str(options.version))
print('-------------------------------- PROCESSING --------------------------------')
rawInputRepotsList = str(options.inputFile).split(',')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ]
scores = df(dict)
#CV={}
print('Report files: ' + str(options.inputFile ))
print('\n'.join(reportFileList))
print('----------------------------------- NOTE -----------------------------------')
print('\n-------- All chosen report files should be in inputPath given---------------\n')
print('------------------------------- SAVING DATA --------------------------------\n')
OD, pH, Technique, Med, Temp, Vess, Agit, Phase, Air, Anti, Strain, Gtype, Substrain, Supp, Gversion = [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
precision = df(list)
recall = df(list)
fscore = df(list)
support = df(list)
for report in reportFileList:
tags = {}
with open(os.path.join(options.inputPath, report), 'r') as File:
string = File.read()
tags['OD']= re.findall('OD\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['pH']= re.findall('pH\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Technique']= re.findall('Technique\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Med']= re.findall('Med\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Temp']= re.findall('Temp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Vess']= re.findall('Vess\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Agit']= re.findall('Agit\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Phase']= re.findall('Phase\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Air']= re.findall('Air\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Anti']= re.findall('Anti\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Strain']= re.findall('Strain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Gtype']= re.findall('Gtype\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Substrain']= re.findall('Substrain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Supp']= re.findall('Supp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
tags['Gversion']= re.findall('Gversion\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
for k in tags.keys():
precision[k].append(float(tags[k][0]))
recall[k].append(float(tags[k][1]))
fscore[k].append(float(tags[k][2]))
print('------------------------------------------PRECISION----------------------------------------')
print(DF(precision))
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_precision_' + str(options.version)
imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_precision_variants_' + str(options.version)
figures(precision, tags, 'precision', imageName, imageNamev)
print('-------------------------------------------RECALL-----------------------------------------')
print(DF(recall))
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_recall_' + str(options.version)
imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_recall_variants_' + str(options.version)
figures(recall, tags, 'recall', imageName, imageNamev)
print('-------------------------------------------FSCORE-----------------------------------------')
print(DF(fscore))
imageName = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_' + str(options.version)
imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_variants_' + str(options.version)
figures(recall, tags, 'recall', imageName, imageNamev)
......@@ -423,7 +423,7 @@ if __name__ == "__main__":
# Saving model
print(" Saving training model...")
t1 = time()
#nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '_S3_' + str(options.S3) + '.mod'
nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + '_S3_' + str(options.S3) + '_' + str(options.Gridname) + str(options.version) + '.mod'
joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
print(" Saving training model done in: %fs" % (time() - t1))
......