figures-report.py 5.74 KB

Raw Blame History Permalink

from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt

# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath               Path of inputfiles
# --outputPath              Path to place output figures
# --figureName              single run specific name figure, multifigure first part of name
# --join                    boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --join


# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'

####################################################################################
#                                   FUNCTIONS                                      #
####################################################################################

def savescreen(output, dic, path):
  if output:
      DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)

####################################################################################
#                                   MAIN PROGRAM                                  #
####################################################################################

if __name__ == '__main__':
    # Defining parameters
    parser = OptionParser()
    parser.add_option('--inputPath',  dest='inputPath',   help='Path of output from CoreNLP',           metavar='PATH')
    parser.add_option('--outputPath', dest='outputPath',  help='Path to place output figures',          metavar='PATH')
    parser.add_option('--figureName', dest='figureName',  help='Specific or first part of figurename',  metavar='FILE')
    parser.add_option('--table',       dest='table',        help='save score-table',               action='store_true', default=False)

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
        sys.exit(1)

    print('-------------------------------- PARAMETERS --------------------------------')
    print('Path of output from CoreNLP: ' + str(options.inputPath))
    print('Path to place output figures: ' + str(options.outputPath))
    print('Figurename: ' + str(options.figureName))
    print('-------------------------------- PROCESSING --------------------------------')
    reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
    print(','.join(reportFileList))

    for inputFile in reportFileList:
        scores = df(dict)
        for report in reportFileList:
          with open(os.path.join(options.inputPath, report), 'r') as File:
            string = File.read()
            scores[report[7:16]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
            summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
            scores[report[7:16]]['precision']=summaryScores[0]
            scores[report[7:16]]['recall']=summaryScores[1]
            scores[report[7:16]]['f1-score']=summaryScores[2]

    print(DF(scores).T)
    scoresTable = DF(scores).T
    print('------------------------------- SAVING DATA --------------------------------')
    print('Saving score-table: ' + str(options.table))
    imageName = os.path.join(options.outputPath, options.figureName)
    savescreen(options.table, scoresTable, imageName)
    fig = plt.figure()
    fig.set_figheight(13)
    fig.set_figwidth(20)
    plt.ylim(0.7, 1)
    plt.xlim(-1,65)
    plt.xlabel("Runs")
    plt.ylabel("score")
    plt.rcParams.update()
    plt.grid()
    plt.plot(scoresTable['precision'],'co--', label='precision', linewidth=1, markersize=6)
    plt.plot(scoresTable['f1-score'], 'bo--', label='F1', linewidth=1, markersize=6)
    plt.plot(scoresTable['recall'], 'mo--', label='recall'  , linewidth=1, markersize=6)
    plt.plot(scoresTable['CV'], 'ro--', label='CV' , linewidth=1, markersize=6)
    for k,v in dict(scoresTable).items():
        for a,b in zip(range(64), v):
            plt.text(a-0.5, float(b)+0.00015, b[0:7],  fontsize=6)
            #print(a,b)

    plt.legend(loc='upper left')
    plt.xticks(range(64), list(scoresTable.index)[0:65], rotation=90, fontsize=8)
    #plt.xticks(range(16),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run1-NER(9)', 'run2-NER(10)', 'run3-NER(11)', 'run4-NER(12)', 'run5-NER(13)', 'run6-NER(14)', 'run7-NER(15)', 'run8-NER(16)'], rotation=90)
    #plt.xticks(range(64),['run1_v10', 'run2_v10', 'run3_v10', 'run4_v10', 'run5_v10', 'run6_v10', 'run7_v10', 'run8_v10',	  						'run1_v11', 'run2_v11', 'run3_v11', 'run4_v11', 'run5_v11', 'run6_v11', 'run7_v11', 'run8_v11',	  						'run1_v12', 'run2_v12', 'run3_v12', 'run4_v12', 'run5_v12', 'run6_v12', 'run7_v12', 'run8_v12',	  						'run1_v13', 'run2_v13', 'run3_v13', 'run4_v13', 'run5_v13', 'run6_v13', 'run7_v13', 'run8_v13',	  						'run9_v10', 'run10_v10', 'run11_v10', 'run12_v10', 'run13_v10', 'run14_v10', 'run15_v10', 'run16_v10',  						'run9_v11', 'run10_v11', 'run11_v11', 'run12_v11', 'run13_v11', 'run14_v11', 'run15_v11', 'run16_v11'  						'run9_v12', 'run10_v12', 'run11_v12', 'run12_v12', 'run13_v12', 'run14_v12', 'run15_v12', 'run16_v12','run9_v13', 'run10_v13', 'run11_v13', 'run12_v13', 'run13_v13', 'run14_v13', 'run15_v13', 'run16_v13'], rotation=90)
    fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)