Carlos-Francisco Méndez-Cruz

Iris dataset for automatic clasification

...@@ -4,40 +4,30 @@ import os ...@@ -4,40 +4,30 @@ import os
4 from time import time 4 from time import time
5 from optparse import OptionParser 5 from optparse import OptionParser
6 from sklearn.naive_bayes import MultinomialNB 6 from sklearn.naive_bayes import MultinomialNB
7 -from sklearn.ensemble import RandomForestClassifier 7 +from sklearn.tree import DecisionTreeClassifier
8 +from sklearn.svm import SVC
8 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \ 9 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
9 classification_report 10 classification_report
10 import sys 11 import sys
11 12
12 __author__ = 'CMendezC' 13 __author__ = 'CMendezC'
13 14
14 -# Goal: training and test Iris dataset 15 +# Goal: training and evaluation Iris dataset
15 16
16 # Parameters: 17 # Parameters:
17 # 1) --inputPath Path to read input files. 18 # 1) --inputPath Path to read input files.
18 # 2) --inputTrainingData File to read training data. 19 # 2) --inputTrainingData File to read training data.
19 # 3) --inputTrainingClasses File to read training true classes. 20 # 3) --inputTrainingClasses File to read training true classes.
20 -# 4) --inputTestData File to read test data. 21 +# 4) --inputEvaluationData File to read test data.
21 -# 5) --inputTestClasses File to read test true classes. 22 +# 5) --inputEvaluationClasses File to read test true classes.
22 # 6) --outputPath Path to place output files. 23 # 6) --outputPath Path to place output files.
23 # 7) --outputFile File to place evaluation report. 24 # 7) --outputFile File to place evaluation report.
24 -# 8) --classifier Classifier: MultinomialNB, SVM, RandomForest. 25 +# 8) --classifier Classifier: MultinomialNB, SVM, DecisionTree.
25 26
26 # Ouput: 27 # Ouput:
27 # 1) Evaluation report. 28 # 1) Evaluation report.
28 29
29 # Execution: 30 # Execution:
30 -# C:\Anaconda3\python trainingTest_Iris_v2.py
31 -# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
32 -# --inputTrainingData training_Data.txt
33 -# --inputTrainingClasses training_TrueClasses.txt
34 -# --inputTestData test_Data.txt
35 -# --inputTestClasses test_TrueClasses.txt
36 -# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
37 -# --outputFile report_MultinomialNB.txt
38 -# --classifier MultinomialNB
39 -
40 -# C:\Anaconda3\python trainingTest_Iris_v2.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputTrainingData training_Data.txt --inputTrainingClasses training_TrueClasses.txt --inputTestData test_Data.txt --inputTestClasses test_TrueClasses.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
41 31
42 ########################################################### 32 ###########################################################
43 # MAIN PROGRAM # 33 # MAIN PROGRAM #
...@@ -52,9 +42,9 @@ if __name__ == "__main__": ...@@ -52,9 +42,9 @@ if __name__ == "__main__":
52 help="File to read training data", metavar="FILE") 42 help="File to read training data", metavar="FILE")
53 parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses", 43 parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses",
54 help="File to read training true classes", metavar="FILE") 44 help="File to read training true classes", metavar="FILE")
55 - parser.add_option("--inputTestData", dest="inputTestData", 45 + parser.add_option("--inputEvaluationData", dest="inputEvaluationData",
56 help="File to read test data", metavar="FILE") 46 help="File to read test data", metavar="FILE")
57 - parser.add_option("--inputTestClasses", dest="inputTestClasses", 47 + parser.add_option("--inputEvaluationClasses", dest="inputEvaluationClasses",
58 help="File to read test true classes", metavar="FILE") 48 help="File to read test true classes", metavar="FILE")
59 parser.add_option("--outputPath", dest="outputPath", 49 parser.add_option("--outputPath", dest="outputPath",
60 help="Path to place output files", metavar="PATH") 50 help="Path to place output files", metavar="PATH")
...@@ -73,8 +63,8 @@ if __name__ == "__main__": ...@@ -73,8 +63,8 @@ if __name__ == "__main__":
73 print("Path to read input files: " + str(options.inputPath)) 63 print("Path to read input files: " + str(options.inputPath))
74 print("File to read training data: " + str(options.inputTrainingData)) 64 print("File to read training data: " + str(options.inputTrainingData))
75 print("File to read training true classes: " + str(options.inputTrainingClasses)) 65 print("File to read training true classes: " + str(options.inputTrainingClasses))
76 - print("File to read test data: " + str(options.inputTestData)) 66 + print("File to read evaluation data: " + str(options.inputEvaluationData))
77 - print("File to read test true classes: " + str(options.inputTestClasses)) 67 + print("File to read evaluation true classes: " + str(options.inputEvaluationClasses))
78 print("Path to place output files: " + str(options.outputPath)) 68 print("Path to place output files: " + str(options.outputPath))
79 print("File to write evaluation report: " + str(options.outputFile)) 69 print("File to write evaluation report: " + str(options.outputFile))
80 print("Classifier: " + str(options.outputFile)) 70 print("Classifier: " + str(options.outputFile))
...@@ -82,24 +72,24 @@ if __name__ == "__main__": ...@@ -82,24 +72,24 @@ if __name__ == "__main__":
82 # Start time 72 # Start time
83 t0 = time() 73 t0 = time()
84 74
85 - print(" Reading training and test data and true classes...") 75 + print(" Reading training and evaluation data and true classes...")
86 trueTrainingClasses = [] 76 trueTrainingClasses = []
87 - trueTestClasses = [] 77 + trueEvaluationClasses = []
88 with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \ 78 with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \
89 as classFile: 79 as classFile:
90 for line in classFile: 80 for line in classFile:
91 line = line.strip('\r\n') 81 line = line.strip('\r\n')
92 trueTrainingClasses.append(line) 82 trueTrainingClasses.append(line)
93 83
94 - with open(os.path.join(options.inputPath, options.inputTestClasses), encoding='utf8', mode='r') \ 84 + with open(os.path.join(options.inputPath, options.inputEvaluationClasses), encoding='utf8', mode='r') \
95 as classFile: 85 as classFile:
96 for line in classFile: 86 for line in classFile:
97 line = line.strip('\r\n') 87 line = line.strip('\r\n')
98 - trueTestClasses.append(line) 88 + trueEvaluationClasses.append(line)
99 - # print(trueTestClasses) 89 + # print(trueEvaluationClasses)
100 90
101 dataTraining = [] 91 dataTraining = []
102 - dataTest = [] 92 + dataEvaluation = []
103 with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \ 93 with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \
104 as dataFile: 94 as dataFile:
105 for line in dataFile: 95 for line in dataFile:
...@@ -112,7 +102,7 @@ if __name__ == "__main__": ...@@ -112,7 +102,7 @@ if __name__ == "__main__":
112 dataTraining.append(listFloat) 102 dataTraining.append(listFloat)
113 print(dataTraining) 103 print(dataTraining)
114 104
115 - with open(os.path.join(options.inputPath, options.inputTestData), encoding='utf8', mode='r') \ 105 + with open(os.path.join(options.inputPath, options.inputEvaluationData), encoding='utf8', mode='r') \
116 as dataFile: 106 as dataFile:
117 for line in dataFile: 107 for line in dataFile:
118 listTemp = [] 108 listTemp = []
...@@ -121,21 +111,21 @@ if __name__ == "__main__": ...@@ -121,21 +111,21 @@ if __name__ == "__main__":
121 listTemp = line.split('\t') 111 listTemp = line.split('\t')
122 for elem in listTemp: 112 for elem in listTemp:
123 listFloat.append(float(elem)) 113 listFloat.append(float(elem))
124 - dataTest.append(listFloat) 114 + dataEvaluation.append(listFloat)
125 - print(dataTest) 115 + print(dataEvaluation)
126 print(" Reading data and true classes done!") 116 print(" Reading data and true classes done!")
127 117
128 if options.classifier == "MultinomialNB": 118 if options.classifier == "MultinomialNB":
129 classifier = MultinomialNB() 119 classifier = MultinomialNB()
130 elif options.classifier == "SVM": 120 elif options.classifier == "SVM":
131 - pass 121 + classifier = SVC()
132 - elif options.classifier == "RandomForest": 122 + elif options.classifier == "DecisionTree":
133 - classifier = RandomForestClassifier() 123 + classifier = DecisionTreeClassifier()
134 124
135 print(" Training...") 125 print(" Training...")
136 classifier.fit(dataTraining, trueTrainingClasses) 126 classifier.fit(dataTraining, trueTrainingClasses)
137 print(" Prediction...") 127 print(" Prediction...")
138 - y_pred = classifier.predict(dataTest) 128 + y_pred = classifier.predict(dataEvaluation)
139 print(" Training and predition done!") 129 print(" Training and predition done!")
140 130
141 # for i in range(len(trueClasses)): 131 # for i in range(len(trueClasses)):
...@@ -145,18 +135,18 @@ if __name__ == "__main__": ...@@ -145,18 +135,18 @@ if __name__ == "__main__":
145 with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile: 135 with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
146 oFile.write('********** EVALUATION REPORT **********\n') 136 oFile.write('********** EVALUATION REPORT **********\n')
147 oFile.write('Classifier: {}\n'.format(options.classifier)) 137 oFile.write('Classifier: {}\n'.format(options.classifier))
148 - oFile.write('Accuracy: {}\n'.format(accuracy_score(trueTestClasses, y_pred))) 138 + oFile.write('Accuracy: {}\n'.format(accuracy_score(trueEvaluationClasses, y_pred)))
149 - oFile.write('Precision: {}\n'.format(precision_score(trueTestClasses, y_pred, average='weighted'))) 139 + oFile.write('Precision: {}\n'.format(precision_score(trueEvaluationClasses, y_pred, average='weighted')))
150 - oFile.write('Recall: {}\n'.format(recall_score(trueTestClasses, y_pred, average='weighted'))) 140 + oFile.write('Recall: {}\n'.format(recall_score(trueEvaluationClasses, y_pred, average='weighted')))
151 - oFile.write('F-score: {}\n'.format(f1_score(trueTestClasses, y_pred, average='weighted'))) 141 + oFile.write('F-score: {}\n'.format(f1_score(trueEvaluationClasses, y_pred, average='weighted')))
152 # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred), 142 # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
153 # precision_score(trueClasses, y_pred, average='weighted'), 143 # precision_score(trueClasses, y_pred, average='weighted'),
154 # recall_score(trueClasses, y_pred, average='weighted'), 144 # recall_score(trueClasses, y_pred, average='weighted'),
155 # f1_score(trueClasses, y_pred, average='weighted'))) 145 # f1_score(trueClasses, y_pred, average='weighted')))
156 oFile.write('Confusion matrix: \n') 146 oFile.write('Confusion matrix: \n')
157 - oFile.write(str(confusion_matrix(trueTestClasses, y_pred)) + '\n') 147 + oFile.write(str(confusion_matrix(trueEvaluationClasses, y_pred)) + '\n')
158 oFile.write('Classification report: \n') 148 oFile.write('Classification report: \n')
159 - oFile.write(classification_report(trueTestClasses, y_pred) + '\n') 149 + oFile.write(classification_report(trueEvaluationClasses, y_pred) + '\n')
160 print(" Saving test report done!") 150 print(" Saving test report done!")
161 151
162 print("Training and test done in: %fs" % (time() - t0)) 152 print("Training and test done in: %fs" % (time() - t0))
......