upload

Estefani Gaytan Nunez
Commit 09110bbe338f17e52991b824e28c82ad57339aae 09110bbe 1 parent 61e734de
Showing 21 changed files with 187 additions and 16 deletions
CRF/bin/figures-tag-report.py
CRF/bin/figures-tag-report_v2.py
CRF/bin/training_validation_v10.py
CRF/figures/FiguresGrid_sep10_v10.png
CRF/figures/FiguresGrid_sep17_fscore_v13.png
CRF/figures/FiguresGrid_sep17_precision_v13.png
CRF/figures/FiguresGrid_sep17_precision_variants_v13.png
CRF/figures/FiguresGrid_sep17_recall_v13.png
CRF/figures/FiguresGrid_sep17_v11.png
CRF/figures/FiguresGrid_sep17_v12.png
CRF/figures/FiguresGrid_sep17_v13.png
CRF/figures/FiguresGrid_sep17_variants_v11.png
CRF/figures/FiguresGrid_sep17_variants_v12.png
CRF/outputs/Run_1.txt → CRF/outputs/sep17/Run_1.txt
CRF/outputs/Run_2.txt → CRF/outputs/sep17/Run_2.txt
CRF/outputs/Run_3.txt → CRF/outputs/sep17/Run_3.txt
CRF/outputs/Run_4.txt → CRF/outputs/sep17/Run_4.txt
CRF/outputs/Run_5.txt → CRF/outputs/sep17/Run_5.txt
CRF/outputs/Run_6.txt → CRF/outputs/sep17/Run_6.txt
CRF/outputs/Run_7.txt → CRF/outputs/sep17/Run_7.txt
--- a/CRF/bin/figures-tag-report.py
View file @09110bb
+++ b/CRF/bin/figures-tag-report.py
View file @09110bb
@@ -109,8 +109,7 @@ if __name__ == '__main__':
                 precision[k].append(float(tags[k][0]))
                 recall[k].append(float(tags[k][1]))
                 fscore[k].append(float(tags[k][2]))
-                #support[k].append(tags[k][3])    
+                #support[k].append(tags[k][3])       
-    print(DF(precision))
     #================================HEATMAP================================#
     '''
     plt.clf()
@@ -126,42 +125,42 @@ if __name__ == '__main__':
     heatmap(DF(precision))
     fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
     '''
-    #print(precision)
+
-    #lines = ['-', '--', '-.', ':', '.', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_']
+
-    #================================SINGLE PLOT================================
+    print('-----------------------------------PRECISION------------------------------------------------')   
+    print(DF(precision))
+    #================================ PRECISION ================================
     lines = ['-','--','-.',':','o','v','^','<','>','s','p','*','H','+','x','D','|']
-    imageName = str(os.path.join(options.outputPath, options.figureName)) + '_' +  str(options.version)
+    imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_' +  str(options.version)
     fig = plt.figure()
     plt.rcParams.update({'font.size': 15})
+    plt.title('Precision')
     fig.set_figheight(13)
     fig.set_figwidth(20)
     plt.xlabel("Runs")   
     plt.ylabel("score")
-    plt.ylim(-0.2, 1.2)
+    plt.ylim(-0.2, 1.2)      
-    #lines=['-', '--', '-.', ':', ',', 'o', 'v', '^', '<', '>', '1', '2', '3', '4', 's', 'p', '*', 'h', 'H', '+', 'x', 'D', 'd', '|', '_']
-    #lines = [ 'r--', 'rs', 'r^', 'r:', 'rH',  'g--', 'gs', 'g^', 'g|', 'gH' , 'b--', 'bs', 'b^', 'b|', 'bH', 'r+']
     lines = [ 'r--', 'r-.', 'r:',  'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']   
     for i,k in enumerate(tags.keys()):
         plt.grid(False)	
         plt.plot(precision[k], lines[i], label=k, linewidth=4)
         for a,b in zip(range(8), precision[k]):
             plt.text(a, b+0.03, str(b),  fontsize=10)
-
     plt.legend(loc='lower right')
-    plt.tight_layout()
+    plt.tight_layout()    
     plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
     fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
-
+    #VARIANTS
-    imageName = str(os.path.join(options.outputPath, options.figureName)) + '_variants_' +  str(options.version)
+    imageName = str(os.path.join(options.outputPath, options.figureName)) + '_PRECISION_variants_' +  str(options.version)
     fig = plt.figure()
     plt.rcParams.update({'font.size': 15})
+    plt.title('Precision')
     fig.set_figheight(13)
     fig.set_figwidth(20)
     plt.xlabel("Runs")
     plt.ylabel("score")
     plt.ylim(0.4, 1.2)       
-    variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
+    variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]    
-    #lines = [ 'r^', 'ro',  'g^', 'go', 'b^', 'bo' , 'm^', 'mo', 'c^', 'co', 'ch',  'rh', 'gh', 'bh','mh']
     lines =  ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
     for i,k in enumerate(variantTags):
         plt.grid(False)
--- a/CRF/bin/figures-tag-report_v2.py 0 → 100644
View file @09110bb
+++ b/CRF/bin/figures-tag-report_v2.py 0 → 100644
View file @09110bb
+from optparse import OptionParser
+import re
+from collections import defaultdict as df
+import os
+import random
+from pandas import DataFrame as DF
+#from seaborn import heatmap
+import numpy as np
+import numpy.random
+import matplotlib.pyplot as plt
+
+# Objective
+# Drawn figures of grid reports 
+#
+# Input parameters
+# --inputPath=PATH              Path of inputfiles
+# --outputPath=PATH             Path to place output figures
+# --figureName            single run specific name figure, multifigure first part of name
+# --inputFile             Use it for a single report
+# --version               CRF-script version of reports
+#
+# Output
+# training and test data set
+#
+# Examples
+# python figures-reports.py
+# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/
+# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/
+# --figureName FiguresGrid
+# --inputFile report_Run1_v11.txt
+# --version v11
+
+# python figures-tag-report.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v11 --version v11
+__author__ = 'egaytan'
+
+####################################################################################
+#                                   FUNCTIONS                                      #
+####################################################################################
+def Filter(rfile, options,v):
+  if options[0]=='all':
+    if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True)
+  elif rfile in options:
+    return(True)
+  return(False)
+
+def figures(score, tags, title, imageName, imageNamev):    
+    fig = plt.figure()
+    plt.rcParams.update({'font.size': 15})
+    plt.title(title)
+    fig.set_figheight(13)
+    fig.set_figwidth(20)
+    plt.xlabel("Runs")   
+    plt.ylabel("score")
+    plt.ylim(-0.2, 1.2)
+    lines = [ 'r--', 'r-.', 'r:',  'g--', 'g-.', 'g:', 'b--', 'b-.', 'b:' , 'm--', 'm-.', 'm:', 'c--', 'c-.', 'c:']
+    for i,k in enumerate(tags.keys()):
+        plt.grid(False)
+        plt.plot(score[k], lines[i], label=k, linewidth=4)
+        for a,b in zip(range(8), score[k]):
+            plt.text(a, b+0.03, str(b),  fontsize=10)
+    plt.legend(loc='lower right')
+    plt.tight_layout()
+    plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
+    fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
+    
+    #-----------------------------------------VARIANTS---------------------------------------------
+    fig = plt.figure()
+    plt.rcParams.update({'font.size': 15})
+    plt.title('score')
+    fig.set_figheight(13)
+    fig.set_figwidth(20)
+    plt.xlabel("Runs")
+    plt.ylabel("score")
+    plt.ylim(0.4, 1.2)
+    variantTags = [k for k in tags.keys() if len(set(tags[k]))>2 ]
+    lines =  ['r--', 'g--', 'b--', 'm--', 'c--', 'r-.', 'g-.', 'b-.', 'm-.', 'c-.', 'r:', 'g:', 'b:' , 'm:', 'c:']
+    for i,k in enumerate(variantTags):
+        plt.grid(False)
+        plt.plot(score[k], lines[i], label=k, linewidth=4)
+        for a,b in zip(range(8), score[k]):
+            plt.text(a, b+0.03, str(b), fontsize=10)
+    plt.legend(loc='lower right')
+    plt.tight_layout()
+    plt.xticks(range(8),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run8'])
+    fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)
+
+
+####################################################################################
+#                                   MAIN PROGRAM                                  #
+####################################################################################
+
+if __name__ == '__main__':
+    # Defining parameters
+    parser = OptionParser()
+    parser.add_option('--inputPath',  dest='inputPath',   help='Path of output from CoreNLP',           metavar='PATH')
+    parser.add_option('--outputPath', dest='outputPath',  help='Path to place output figures',          metavar='PATH')
+    parser.add_option('--figureName', dest='figureName',  help='Specific or first part of figurename',  metavar='FILE')
+    parser.add_option('--version', dest='version',  help='script version',  metavar='FILE')
+    parser.add_option('--inputFile',  dest='inputFile',   help='Use it for a specific report files',            metavar='FILE', default='all,')
+
+    (options, args) = parser.parse_args()
+    if len(args) > 0:
+        parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
+        sys.exit(1)
+
+    print('-------------------------------- PARAMETERS --------------------------------')
+    print('Path of output from CoreNLP: ' + str(options.inputPath))
+    print('Path to place output figures: ' + str(options.outputPath))
+    print('Specific or first part of figurename: ' + str(options.figureName))
+    print('CRF-script version: ' + str(options.version))
+
+    print('-------------------------------- PROCESSING --------------------------------')
+
+    rawInputRepotsList = str(options.inputFile).split(',')
+    reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ]
+    scores = df(dict)
+    #CV={}
+    print('Report files: ' + str(options.inputFile  ))
+    print('\n'.join(reportFileList))
+    print('----------------------------------- NOTE -----------------------------------')
+    print('\n-------- All chosen report files should be in inputPath given---------------\n')
+
+    print('------------------------------- SAVING DATA --------------------------------\n')
+    OD, pH, Technique, Med, Temp, Vess, Agit, Phase, Air, Anti, Strain, Gtype, Substrain, Supp, Gversion = [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
+
+    precision = df(list)
+    recall = df(list)
+    fscore = df(list)
+    support  = df(list)
+    for report in reportFileList:
+        tags = {}
+        with open(os.path.join(options.inputPath, report), 'r') as File:
+            string = File.read()
+            tags['OD']= re.findall('OD\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]     
+            tags['pH']= re.findall('pH\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Technique']= re.findall('Technique\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Med']= re.findall('Med\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Temp']= re.findall('Temp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Vess']= re.findall('Vess\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Agit']= re.findall('Agit\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Phase']= re.findall('Phase\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Air']= re.findall('Air\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Anti']= re.findall('Anti\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Strain']= re.findall('Strain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Gtype']= re.findall('Gtype\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Substrain']= re.findall('Substrain\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Supp']= re.findall('Supp\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+            tags['Gversion']= re.findall('Gversion\s+(\d+.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
+
+            for k in tags.keys():
+                precision[k].append(float(tags[k][0]))
+                recall[k].append(float(tags[k][1]))
+                fscore[k].append(float(tags[k][2]))
+
+
+print('------------------------------------------PRECISION----------------------------------------')
+print(DF(precision))
+imageName = str(os.path.join(options.outputPath, options.figureName)) + '_precision_' +  str(options.version)
+imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_precision_variants_' +  str(options.version)
+figures(precision, tags, 'precision', imageName, imageNamev)
+
+print('-------------------------------------------RECALL-----------------------------------------')
+print(DF(recall))
+imageName = str(os.path.join(options.outputPath, options.figureName)) + '_recall_' +  str(options.version)
+imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_recall_variants_' +  str(options.version)
+figures(recall, tags, 'recall', imageName, imageNamev)
+
+print('-------------------------------------------FSCORE-----------------------------------------')
+print(DF(fscore))
+imageName = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_' +  str(options.version)
+imageNamev = str(os.path.join(options.outputPath, options.figureName)) + '_fscore_variants_' +  str(options.version)
+figures(recall, tags, 'recall', imageName, imageNamev)
--- a/CRF/bin/training_validation_v10.py
View file @09110bb
+++ b/CRF/bin/training_validation_v10.py
View file @09110bb
@@ -423,7 +423,7 @@ if __name__ == "__main__":
     # Saving model
     print("     Saving training model...")
     t1 = time()
-    #nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + str(options.version) + '_S3_' + str(options.S3) + '.mod'
+    
     nameModel = 'model_S1_' + str(options.S1) + '_S2_' + str(options.S2) + '_S3_' + str(options.S3) + '_' + str(options.Gridname)   + str(options.version) + '.mod'
     joblib.dump(crf, os.path.join(options.outputPath, "models", nameModel))
     print("        Saving training model done in: %fs" % (time() - t1))
--- a/CRF/figures/FiguresGrid_sep10_v10.png deleted 100644 → 0
View file @61e734d
+++ b/CRF/figures/FiguresGrid_sep10_v10.png deleted 100644 → 0
View file @61e734d
--- a/CRF/figures/FiguresGrid_sep17_fscore_v13.png 0 → 100644
View file @09110bb
+++ b/CRF/figures/FiguresGrid_sep17_fscore_v13.png 0 → 100644
View file @09110bb
--- a/CRF/figures/FiguresGrid_sep17_precision_v13.png 0 → 100644
View file @09110bb
+++ b/CRF/figures/FiguresGrid_sep17_precision_v13.png 0 → 100644
View file @09110bb
--- a/CRF/figures/FiguresGrid_sep17_precision_variants_v13.png 0 → 100644
View file @09110bb
+++ b/CRF/figures/FiguresGrid_sep17_precision_variants_v13.png 0 → 100644
View file @09110bb
--- a/CRF/figures/FiguresGrid_sep17_recall_v13.png 0 → 100644
View file @09110bb
+++ b/CRF/figures/FiguresGrid_sep17_recall_v13.png 0 → 100644
View file @09110bb
--- a/CRF/figures/FiguresGrid_sep17_v11.png deleted 100644 → 0
View file @61e734d
+++ b/CRF/figures/FiguresGrid_sep17_v11.png deleted 100644 → 0
View file @61e734d
--- a/CRF/figures/FiguresGrid_sep17_v12.png deleted 100644 → 0
View file @61e734d
+++ b/CRF/figures/FiguresGrid_sep17_v12.png deleted 100644 → 0
View file @61e734d
--- a/CRF/figures/FiguresGrid_sep17_v13.png 0 → 100644
View file @09110bb
+++ b/CRF/figures/FiguresGrid_sep17_v13.png 0 → 100644
View file @09110bb
--- a/CRF/figures/FiguresGrid_sep17_variants_v11.png deleted 100644 → 0
View file @61e734d
+++ b/CRF/figures/FiguresGrid_sep17_variants_v11.png deleted 100644 → 0
View file @61e734d
--- a/CRF/figures/FiguresGrid_sep17_variants_v12.png deleted 100644 → 0
View file @61e734d
+++ b/CRF/figures/FiguresGrid_sep17_variants_v12.png deleted 100644 → 0
View file @61e734d
--- a/CRF/outputs/Run_1.txt → CRF/outputs/sep17/Run_1.txt
View file @09110bb
+++ b/CRF/outputs/Run_1.txt → CRF/outputs/sep17/Run_1.txt
View file @09110bb
--- a/CRF/outputs/Run_2.txt → CRF/outputs/sep17/Run_2.txt
View file @09110bb
+++ b/CRF/outputs/Run_2.txt → CRF/outputs/sep17/Run_2.txt
View file @09110bb
--- a/CRF/outputs/Run_3.txt → CRF/outputs/sep17/Run_3.txt
View file @09110bb
+++ b/CRF/outputs/Run_3.txt → CRF/outputs/sep17/Run_3.txt
View file @09110bb
--- a/CRF/outputs/Run_4.txt → CRF/outputs/sep17/Run_4.txt
View file @09110bb
+++ b/CRF/outputs/Run_4.txt → CRF/outputs/sep17/Run_4.txt
View file @09110bb
--- a/CRF/outputs/Run_5.txt → CRF/outputs/sep17/Run_5.txt
View file @09110bb
+++ b/CRF/outputs/Run_5.txt → CRF/outputs/sep17/Run_5.txt
View file @09110bb
--- a/CRF/outputs/Run_6.txt → CRF/outputs/sep17/Run_6.txt
View file @09110bb
+++ b/CRF/outputs/Run_6.txt → CRF/outputs/sep17/Run_6.txt
View file @09110bb
--- a/CRF/outputs/Run_7.txt → CRF/outputs/sep17/Run_7.txt
View file @09110bb
+++ b/CRF/outputs/Run_7.txt → CRF/outputs/sep17/Run_7.txt
View file @09110bb
--- a/CRF/outputs/Run_8.txt → CRF/outputs/sep17/Run_8.txt
View file @09110bb
+++ b/CRF/outputs/Run_8.txt → CRF/outputs/sep17/Run_8.txt
View file @09110bb