figures-report.py
5.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath Path of inputfiles
# --outputPath Path to place output figures
# --figureName single run specific name figure, multifigure first part of name
# --join boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --join
# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'
####################################################################################
# FUNCTIONS #
####################################################################################
def savescreen(output, dic, path):
if output:
DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
####################################################################################
# MAIN PROGRAM #
####################################################################################
if __name__ == '__main__':
# Defining parameters
parser = OptionParser()
parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
sys.exit(1)
print('-------------------------------- PARAMETERS --------------------------------')
print('Path of output from CoreNLP: ' + str(options.inputPath))
print('Path to place output figures: ' + str(options.outputPath))
print('Figurename: ' + str(options.figureName))
print('-------------------------------- PROCESSING --------------------------------')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
print(','.join(reportFileList))
for inputFile in reportFileList:
scores = df(dict)
for report in reportFileList:
with open(os.path.join(options.inputPath, report), 'r') as File:
string = File.read()
scores[report[7:16]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
scores[report[7:16]]['precision']=summaryScores[0]
scores[report[7:16]]['recall']=summaryScores[1]
scores[report[7:16]]['f1-score']=summaryScores[2]
print(DF(scores).T)
scoresTable = DF(scores).T
print('------------------------------- SAVING DATA --------------------------------')
print('Saving score-table: ' + str(options.table))
imageName = os.path.join(options.outputPath, options.figureName)
savescreen(options.table, scoresTable, imageName)
fig = plt.figure()
fig.set_figheight(13)
fig.set_figwidth(20)
plt.ylim(0.7, 1)
plt.xlim(-1,65)
plt.xlabel("Runs")
plt.ylabel("score")
plt.rcParams.update()
plt.grid()
plt.plot(scoresTable['precision'],'co--', label='precision', linewidth=1, markersize=6)
plt.plot(scoresTable['f1-score'], 'bo--', label='F1', linewidth=1, markersize=6)
plt.plot(scoresTable['recall'], 'mo--', label='recall' , linewidth=1, markersize=6)
plt.plot(scoresTable['CV'], 'ro--', label='CV' , linewidth=1, markersize=6)
plt.legend(loc='upper left')
plt.xticks(range(64), list(scoresTable.index)[0:65], rotation=90, fontsize=8)
#plt.xticks(range(16),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run1-NER(9)', 'run2-NER(10)', 'run3-NER(11)', 'run4-NER(12)', 'run5-NER(13)', 'run6-NER(14)', 'run7-NER(15)', 'run8-NER(16)'], rotation=90)
#plt.xticks(range(64),['run1_v10', 'run2_v10', 'run3_v10', 'run4_v10', 'run5_v10', 'run6_v10', 'run7_v10', 'run8_v10', 'run1_v11', 'run2_v11', 'run3_v11', 'run4_v11', 'run5_v11', 'run6_v11', 'run7_v11', 'run8_v11', 'run1_v12', 'run2_v12', 'run3_v12', 'run4_v12', 'run5_v12', 'run6_v12', 'run7_v12', 'run8_v12', 'run1_v13', 'run2_v13', 'run3_v13', 'run4_v13', 'run5_v13', 'run6_v13', 'run7_v13', 'run8_v13', 'run9_v10', 'run10_v10', 'run11_v10', 'run12_v10', 'run13_v10', 'run14_v10', 'run15_v10', 'run16_v10', 'run9_v11', 'run10_v11', 'run11_v11', 'run12_v11', 'run13_v11', 'run14_v11', 'run15_v11', 'run16_v11' 'run9_v12', 'run10_v12', 'run11_v12', 'run12_v12', 'run13_v12', 'run14_v12', 'run15_v12', 'run16_v12','run9_v13', 'run10_v13', 'run11_v13', 'run12_v13', 'run13_v13', 'run14_v13', 'run15_v13', 'run16_v13'], rotation=90)
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)