figures-report.py
4.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath Path of inputfiles
# --outputPath Path to place output figures
# --figureName single run specific name figure, multifigure first part of name
# --join boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --join
# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'
####################################################################################
# FUNCTIONS #
####################################################################################
def savescreen(output, dic, path):
if output:
DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
####################################################################################
# MAIN PROGRAM #
####################################################################################
if __name__ == '__main__':
# Defining parameters
parser = OptionParser()
parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
sys.exit(1)
print('-------------------------------- PARAMETERS --------------------------------')
print('Path of output from CoreNLP: ' + str(options.inputPath))
print('Path to place output figures: ' + str(options.outputPath))
print('Figurename: ' + str(options.figureName))
print('-------------------------------- PROCESSING --------------------------------')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
print(','.join(reportFileList))
for inputFile in reportFileList:
scores = df(dict)
for report in reportFileList:
with open(os.path.join(options.inputPath, report), 'r') as File:
string = File.read()
scores[report[7:16]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
scores[report[7:16]]['precision']=summaryScores[0]
scores[report[7:16]]['recall']=summaryScores[1]
scores[report[7:16]]['f1-score']=summaryScores[2]
print(DF(scores).T)
scoresTable = DF(scores).T
print('------------------------------- SAVING DATA --------------------------------')
print('Saving score-table: ' + str(options.table))
imageName = os.path.join(options.outputPath, options.figureName)
savescreen(options.table, scoresTable, imageName)
fig = plt.figure()
fig.set_figheight(13)
fig.set_figwidth(20)
plt.ylim(0.7, 1)
plt.xlim(-1,65)
plt.xlabel("Runs")
plt.ylabel("score")
plt.rcParams.update()
plt.grid()
plt.plot(scoresTable['precision'],'co--', label='precision', linewidth=1, markersize=6)
plt.plot(scoresTable['f1-score'], 'bo--', label='F1', linewidth=1, markersize=6)
plt.plot(scoresTable['recall'], 'mo--', label='recall' , linewidth=1, markersize=6)
plt.plot(scoresTable['CV'], 'ro--', label='CV' , linewidth=1, markersize=6)
for k,v in dict(scoresTable).items():
for a,b in zip(range(64), v):
plt.text(a-0.5, float(b)+0.0020, b[0:7], fontsize=5)
plt.legend(loc='upper left')
plt.xticks(range(64), list(scoresTable.index)[0:65], rotation=90, fontsize=8)
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.3)