figures-report.py 4.94 KB
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt

# Objective
# Drawn figures of grid reports 
#
# Input parameters
# --inputPath=PATH              Path of inputfiles
# --outputPath=PATH             Path to place output figures
# --figureName            single run specific name figure, multifigure first part of name
# --inputFile             Use it for a single report
# --version		  CRF-script version of reports
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/
# --figureName FiguresGrid
# --inputFile report_Run1_v11.txt
# -version v11

# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/ --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/ --figureName FiguresGrid_v1 --inputFile report_Run1_v11.txt ..version v11
__author__ = 'egaytan'

####################################################################################
#                                   FUNCTIONS                                      #
####################################################################################
def Filter(rfile, options,v):
  if options[0]=='all':
    if rfile[0:6]=='report' and rfile[-7:-4]==v: return(True)
  elif rfile in options:
    return(True)
  return(False)

####################################################################################
#                                   MAIN PROGRAM                                  #
####################################################################################

if __name__ == '__main__':
    # Defining parameters
    parser = OptionParser()
    parser.add_option('--inputPath',  dest='inputPath',   help='Path of output from CoreNLP',           metavar='PATH')
    parser.add_option('--outputPath', dest='outputPath',  help='Path to place output figures',          metavar='PATH')
    parser.add_option('--figureName', dest='figureName',  help='Specific or first part of figurename',  metavar='FILE')
    parser.add_option('--version', dest='version',  help='script version',  metavar='FILE')    
    parser.add_option('--inputFile',  dest='inputFile',   help='Use it for a specific report files',            metavar='FILE', default='all,')

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
        sys.exit(1)

    print('-------------------------------- PARAMETERS --------------------------------')
    print('Path of output from CoreNLP: ' + str(options.inputPath))
    print('Path to place output figures: ' + str(options.outputPath))
    print('Specific or first part of figurename: ' + str(options.figureName))
    print('CRF-script version: ' + str(options.version))   

    print('-------------------------------- PROCESSING --------------------------------')

    rawInputRepotsList = str(options.inputFile).split(',')
    reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if Filter(rfile, rawInputRepotsList, str(options.version)) ]
    scores = df(dict)
    #CV={}
    print('Report files: ' + str(options.inputFile  ))
    print('\n'.join(reportFileList))
    print('----------------------------------- NOTE -----------------------------------')
    print('\n-------- All chosen report files should be in inputPath given---------------\n')

    print('------------------------------- SAVING DATA --------------------------------\n')
    for report in reportFileList:
      with open(os.path.join(options.inputPath, report), 'r') as File:
        string = File.read()
        scores[report[7:11]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
        summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]        
        scores[report[7:11]]['precision']=summaryScores[0]
        scores[report[7:11]]['recall']=summaryScores[1]
        scores[report[7:11]]['f1-score']=summaryScores[2]       
    
    print(DF(scores).T)
    scoresTable = DF(scores).T
    
    imageName=os.path.join(options.outputPath, options.figureName)    
    ylab = "score",
    fig = plt.figure()
    plt.grid(False)
    plt.rcParams.update({'font.size': 15})
    fig.set_figheight(13)
    fig.set_figwidth(20)
    plt.xlabel("Runs")    
    plt.ylabel("score")    
    plt.xticks(range(8),scoresTable["CV"].index)
    plt.plot(scoresTable['CV'], "--", color="red", label="CV")    
    plt.plot(scoresTable['precision'], color="blue", label="precision")   
    plt.plot(scoresTable['f1-score'], color="orange", label="F1")    
    plt.plot(scoresTable['recall'], color="g", label="recall")
    plt.legend(loc='lower right')
    plt.tight_layout()
    fig.savefig(imageName, pad_inches=0.5)