figures-report.py
5.74 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from optparse import OptionParser
import re
from collections import defaultdict as df
import os
import random
from pandas import DataFrame as DF
import matplotlib.pyplot as plt
# Objective
# Drawn figures of grid reports
#
# Input parameters
# --inputPath Path of inputfiles
# --outputPath Path to place output figures
# --figureName single run specific name figure, multifigure first part of name
# --join boolean, all figures together
#
# Output
# training and test data set
#
# Examples
# python figures-reports.py
# --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13
# --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13
# --figureName FiguresGrid
# --join
# python figures-reports.py --inputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/reports/nov13 --outputPath /home/egaytan/automatic-extraction-growth-conditions/CRF/figures/nov13 --figureName FiguresGrid --join
__author__ = 'egaytan'
####################################################################################
# FUNCTIONS #
####################################################################################
def savescreen(output, dic, path):
if output:
DF.from_dict(dic).to_csv(path+'.csv', sep = "\t", index = True)
####################################################################################
# MAIN PROGRAM #
####################################################################################
if __name__ == '__main__':
# Defining parameters
parser = OptionParser()
parser.add_option('--inputPath', dest='inputPath', help='Path of output from CoreNLP', metavar='PATH')
parser.add_option('--outputPath', dest='outputPath', help='Path to place output figures', metavar='PATH')
parser.add_option('--figureName', dest='figureName', help='Specific or first part of figurename', metavar='FILE')
parser.add_option('--table', dest='table', help='save score-table', action='store_true', default=False)
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error('Any parameter given.\nFor multi input files be sure to seprate the filenames by coma')
sys.exit(1)
print('-------------------------------- PARAMETERS --------------------------------')
print('Path of output from CoreNLP: ' + str(options.inputPath))
print('Path to place output figures: ' + str(options.outputPath))
print('Figurename: ' + str(options.figureName))
print('-------------------------------- PROCESSING --------------------------------')
reportFileList = [ rfile for rfile in os.listdir(options.inputPath) if rfile[0:7] == "report_"]
print(','.join(reportFileList))
for inputFile in reportFileList:
scores = df(dict)
for report in reportFileList:
with open(os.path.join(options.inputPath, report), 'r') as File:
string = File.read()
scores[report[7:16]]['CV']=re.findall('best\sCV\sscore\:(\d+\.\d+)', string)[0]
summaryScores = re.findall('avg\s\/\stotal\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)', string)[0]
scores[report[7:16]]['precision']=summaryScores[0]
scores[report[7:16]]['recall']=summaryScores[1]
scores[report[7:16]]['f1-score']=summaryScores[2]
print(DF(scores).T)
scoresTable = DF(scores).T
print('------------------------------- SAVING DATA --------------------------------')
print('Saving score-table: ' + str(options.table))
imageName = os.path.join(options.outputPath, options.figureName)
savescreen(options.table, scoresTable, imageName)
fig = plt.figure()
fig.set_figheight(13)
fig.set_figwidth(20)
plt.ylim(0.7, 1)
plt.xlim(-1,65)
plt.xlabel("Runs")
plt.ylabel("score")
plt.rcParams.update()
plt.grid()
plt.plot(scoresTable['precision'],'co--', label='precision', linewidth=1, markersize=6)
plt.plot(scoresTable['f1-score'], 'bo--', label='F1', linewidth=1, markersize=6)
plt.plot(scoresTable['recall'], 'mo--', label='recall' , linewidth=1, markersize=6)
plt.plot(scoresTable['CV'], 'ro--', label='CV' , linewidth=1, markersize=6)
for k,v in dict(scoresTable).items():
for a,b in zip(range(64), v):
plt.text(a-0.5, float(b)+0.00015, b[0:7], fontsize=6)
#print(a,b)
plt.legend(loc='upper left')
plt.xticks(range(64), list(scoresTable.index)[0:65], rotation=90, fontsize=8)
#plt.xticks(range(16),['run1', 'run2', 'run3', 'run4', 'run5', 'run6', 'run7', 'run1-NER(9)', 'run2-NER(10)', 'run3-NER(11)', 'run4-NER(12)', 'run5-NER(13)', 'run6-NER(14)', 'run7-NER(15)', 'run8-NER(16)'], rotation=90)
#plt.xticks(range(64),['run1_v10', 'run2_v10', 'run3_v10', 'run4_v10', 'run5_v10', 'run6_v10', 'run7_v10', 'run8_v10', 'run1_v11', 'run2_v11', 'run3_v11', 'run4_v11', 'run5_v11', 'run6_v11', 'run7_v11', 'run8_v11', 'run1_v12', 'run2_v12', 'run3_v12', 'run4_v12', 'run5_v12', 'run6_v12', 'run7_v12', 'run8_v12', 'run1_v13', 'run2_v13', 'run3_v13', 'run4_v13', 'run5_v13', 'run6_v13', 'run7_v13', 'run8_v13', 'run9_v10', 'run10_v10', 'run11_v10', 'run12_v10', 'run13_v10', 'run14_v10', 'run15_v10', 'run16_v10', 'run9_v11', 'run10_v11', 'run11_v11', 'run12_v11', 'run13_v11', 'run14_v11', 'run15_v11', 'run16_v11' 'run9_v12', 'run10_v12', 'run11_v12', 'run12_v12', 'run13_v12', 'run14_v12', 'run15_v12', 'run16_v12','run9_v13', 'run10_v13', 'run11_v13', 'run12_v13', 'run13_v13', 'run14_v13', 'run15_v13', 'run16_v13'], rotation=90)
fig.savefig(imageName, bbox_inches='tight', pad_inches = 0.5)