figures-report.py 4.63 KB
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt

# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath               Path of inputfiles
# --outputPath              Path to place output figures
# --figureName              single run specific name figure, multifigure first part of name
# --join                    boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --join



# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'

####################################################################################
#                                   FUNCTIONS                                      #
####################################################################################

def savescreen(output, dic, path):
  if output:
      DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)

####################################################################################
#                                   MAIN PROGRAM                                  #
####################################################################################

if __name__ == '__main__':
    # Defining parameters
    parser = OptionParser()
    parser.add_option('--inputPath',  dest='inputPath',   help='Path of output from CoreNLP',           metavar='PATH')
    parser.add_option('--outputPath', dest='outputPath',  help='Path to place output figures',          metavar='PATH')
    parser.add_option('--figureName', dest='figureName',  help='Specific or first part of figurename',  metavar='FILE')
    parser.add_option('--table',       dest='table',        help='save score-table',               action='store_true', default=False)

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
        sys.exit(1)

    print('-------------------------------- PARAMETERS --------------------------------')
    print('Path of output from CoreNLP: ' + str(options.inputPath))
    print('Path to place output figures: ' + str(options.outputPath))
    print('Figurename: ' + str(options.figureName))
    print('-------------------------------- PROCESSING --------------------------------')
    reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
    print(','.join(reportFileList))

    for inputFile in reportFileList:
        scores = df(dict)
        for report in reportFileList:
          with open(os.path.join(options.inputPath, report), 'r') as File:
            string = File.read()
            scores[report[7:16]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
            summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
            scores[report[7:16]]['precision']=summaryScores[0]
            scores[report[7:16]]['recall']=summaryScores[1]
            scores[report[7:16]]['f1-score']=summaryScores[2]

    print(DF(scores).T)
    scoresTable = DF(scores).T
    print('------------------------------- SAVING DATA --------------------------------')
    print('Saving score-table: ' + str(options.table))
    imageName = os.path.join(options.outputPath, options.figureName)
    savescreen(options.table, scoresTable, imageName)
    fig = plt.figure()
    fig.set_figheight(13)
    fig.set_figwidth(20)
    plt.ylim(0.7, 1)
    plt.xlim(-1,65)
    plt.xlabel("Runs")
    plt.ylabel("score")
    plt.rcParams.update()
    plt.grid()
    plt.plot(scoresTable['precision'],'co--', label='precision', linewidth=1, markersize=6)
    plt.plot(scoresTable['f1-score'], 'bo--', label='F1', linewidth=1, markersize=6)
    plt.plot(scoresTable['recall'], 'mo--', label='recall'  , linewidth=1, markersize=6)
    plt.plot(scoresTable['CV'], 'ro--', label='CV' , linewidth=1, markersize=6)
    for k,v in dict(scoresTable).items():
        for a,b in zip(range(64), v):
            plt.text(a-0.5, float(b)+0.0020, b[0:4],  fontsize=5)
    plt.legend(loc='upper left')   
    plt.xticks(range(64), list(scoresTable.index)[0:65], rotation=90, fontsize=8)   
    fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.3)