Showing
1 changed file
with
29 additions
and
39 deletions
... | @@ -4,40 +4,30 @@ import os | ... | @@ -4,40 +4,30 @@ import os |
4 | from time import time | 4 | from time import time |
5 | from optparse import OptionParser | 5 | from optparse import OptionParser |
6 | from sklearn.naive_bayes import MultinomialNB | 6 | from sklearn.naive_bayes import MultinomialNB |
7 | -from sklearn.ensemble import RandomForestClassifier | 7 | +from sklearn.tree import DecisionTreeClassifier |
8 | +from sklearn.svm import SVC | ||
8 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \ | 9 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \ |
9 | classification_report | 10 | classification_report |
10 | import sys | 11 | import sys |
11 | 12 | ||
12 | __author__ = 'CMendezC' | 13 | __author__ = 'CMendezC' |
13 | 14 | ||
14 | -# Goal: training and test Iris dataset | 15 | +# Goal: training and evaluation Iris dataset |
15 | 16 | ||
16 | # Parameters: | 17 | # Parameters: |
17 | # 1) --inputPath Path to read input files. | 18 | # 1) --inputPath Path to read input files. |
18 | # 2) --inputTrainingData File to read training data. | 19 | # 2) --inputTrainingData File to read training data. |
19 | # 3) --inputTrainingClasses File to read training true classes. | 20 | # 3) --inputTrainingClasses File to read training true classes. |
20 | -# 4) --inputTestData File to read test data. | 21 | +# 4) --inputEvaluationData File to read test data. |
21 | -# 5) --inputTestClasses File to read test true classes. | 22 | +# 5) --inputEvaluationClasses File to read test true classes. |
22 | # 6) --outputPath Path to place output files. | 23 | # 6) --outputPath Path to place output files. |
23 | # 7) --outputFile File to place evaluation report. | 24 | # 7) --outputFile File to place evaluation report. |
24 | -# 8) --classifier Classifier: MultinomialNB, SVM, RandomForest. | 25 | +# 8) --classifier Classifier: MultinomialNB, SVM, DecisionTree. |
25 | 26 | ||
26 | # Ouput: | 27 | # Ouput: |
27 | # 1) Evaluation report. | 28 | # 1) Evaluation report. |
28 | 29 | ||
29 | # Execution: | 30 | # Execution: |
30 | -# C:\Anaconda3\python trainingTest_Iris_v2.py | ||
31 | -# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris | ||
32 | -# --inputTrainingData training_Data.txt | ||
33 | -# --inputTrainingClasses training_TrueClasses.txt | ||
34 | -# --inputTestData test_Data.txt | ||
35 | -# --inputTestClasses test_TrueClasses.txt | ||
36 | -# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris | ||
37 | -# --outputFile report_MultinomialNB.txt | ||
38 | -# --classifier MultinomialNB | ||
39 | - | ||
40 | -# C:\Anaconda3\python trainingTest_Iris_v2.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputTrainingData training_Data.txt --inputTrainingClasses training_TrueClasses.txt --inputTestData test_Data.txt --inputTestClasses test_TrueClasses.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB | ||
41 | 31 | ||
42 | ########################################################### | 32 | ########################################################### |
43 | # MAIN PROGRAM # | 33 | # MAIN PROGRAM # |
... | @@ -52,9 +42,9 @@ if __name__ == "__main__": | ... | @@ -52,9 +42,9 @@ if __name__ == "__main__": |
52 | help="File to read training data", metavar="FILE") | 42 | help="File to read training data", metavar="FILE") |
53 | parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses", | 43 | parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses", |
54 | help="File to read training true classes", metavar="FILE") | 44 | help="File to read training true classes", metavar="FILE") |
55 | - parser.add_option("--inputTestData", dest="inputTestData", | 45 | + parser.add_option("--inputEvaluationData", dest="inputEvaluationData", |
56 | help="File to read test data", metavar="FILE") | 46 | help="File to read test data", metavar="FILE") |
57 | - parser.add_option("--inputTestClasses", dest="inputTestClasses", | 47 | + parser.add_option("--inputEvaluationClasses", dest="inputEvaluationClasses", |
58 | help="File to read test true classes", metavar="FILE") | 48 | help="File to read test true classes", metavar="FILE") |
59 | parser.add_option("--outputPath", dest="outputPath", | 49 | parser.add_option("--outputPath", dest="outputPath", |
60 | help="Path to place output files", metavar="PATH") | 50 | help="Path to place output files", metavar="PATH") |
... | @@ -73,8 +63,8 @@ if __name__ == "__main__": | ... | @@ -73,8 +63,8 @@ if __name__ == "__main__": |
73 | print("Path to read input files: " + str(options.inputPath)) | 63 | print("Path to read input files: " + str(options.inputPath)) |
74 | print("File to read training data: " + str(options.inputTrainingData)) | 64 | print("File to read training data: " + str(options.inputTrainingData)) |
75 | print("File to read training true classes: " + str(options.inputTrainingClasses)) | 65 | print("File to read training true classes: " + str(options.inputTrainingClasses)) |
76 | - print("File to read test data: " + str(options.inputTestData)) | 66 | + print("File to read evaluation data: " + str(options.inputEvaluationData)) |
77 | - print("File to read test true classes: " + str(options.inputTestClasses)) | 67 | + print("File to read evaluation true classes: " + str(options.inputEvaluationClasses)) |
78 | print("Path to place output files: " + str(options.outputPath)) | 68 | print("Path to place output files: " + str(options.outputPath)) |
79 | print("File to write evaluation report: " + str(options.outputFile)) | 69 | print("File to write evaluation report: " + str(options.outputFile)) |
80 | print("Classifier: " + str(options.outputFile)) | 70 | print("Classifier: " + str(options.outputFile)) |
... | @@ -82,24 +72,24 @@ if __name__ == "__main__": | ... | @@ -82,24 +72,24 @@ if __name__ == "__main__": |
82 | # Start time | 72 | # Start time |
83 | t0 = time() | 73 | t0 = time() |
84 | 74 | ||
85 | - print(" Reading training and test data and true classes...") | 75 | + print(" Reading training and evaluation data and true classes...") |
86 | trueTrainingClasses = [] | 76 | trueTrainingClasses = [] |
87 | - trueTestClasses = [] | 77 | + trueEvaluationClasses = [] |
88 | with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \ | 78 | with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \ |
89 | as classFile: | 79 | as classFile: |
90 | for line in classFile: | 80 | for line in classFile: |
91 | line = line.strip('\r\n') | 81 | line = line.strip('\r\n') |
92 | trueTrainingClasses.append(line) | 82 | trueTrainingClasses.append(line) |
93 | 83 | ||
94 | - with open(os.path.join(options.inputPath, options.inputTestClasses), encoding='utf8', mode='r') \ | 84 | + with open(os.path.join(options.inputPath, options.inputEvaluationClasses), encoding='utf8', mode='r') \ |
95 | as classFile: | 85 | as classFile: |
96 | for line in classFile: | 86 | for line in classFile: |
97 | line = line.strip('\r\n') | 87 | line = line.strip('\r\n') |
98 | - trueTestClasses.append(line) | 88 | + trueEvaluationClasses.append(line) |
99 | - # print(trueTestClasses) | 89 | + # print(trueEvaluationClasses) |
100 | 90 | ||
101 | dataTraining = [] | 91 | dataTraining = [] |
102 | - dataTest = [] | 92 | + dataEvaluation = [] |
103 | with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \ | 93 | with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \ |
104 | as dataFile: | 94 | as dataFile: |
105 | for line in dataFile: | 95 | for line in dataFile: |
... | @@ -112,7 +102,7 @@ if __name__ == "__main__": | ... | @@ -112,7 +102,7 @@ if __name__ == "__main__": |
112 | dataTraining.append(listFloat) | 102 | dataTraining.append(listFloat) |
113 | print(dataTraining) | 103 | print(dataTraining) |
114 | 104 | ||
115 | - with open(os.path.join(options.inputPath, options.inputTestData), encoding='utf8', mode='r') \ | 105 | + with open(os.path.join(options.inputPath, options.inputEvaluationData), encoding='utf8', mode='r') \ |
116 | as dataFile: | 106 | as dataFile: |
117 | for line in dataFile: | 107 | for line in dataFile: |
118 | listTemp = [] | 108 | listTemp = [] |
... | @@ -121,21 +111,21 @@ if __name__ == "__main__": | ... | @@ -121,21 +111,21 @@ if __name__ == "__main__": |
121 | listTemp = line.split('\t') | 111 | listTemp = line.split('\t') |
122 | for elem in listTemp: | 112 | for elem in listTemp: |
123 | listFloat.append(float(elem)) | 113 | listFloat.append(float(elem)) |
124 | - dataTest.append(listFloat) | 114 | + dataEvaluation.append(listFloat) |
125 | - print(dataTest) | 115 | + print(dataEvaluation) |
126 | print(" Reading data and true classes done!") | 116 | print(" Reading data and true classes done!") |
127 | 117 | ||
128 | if options.classifier == "MultinomialNB": | 118 | if options.classifier == "MultinomialNB": |
129 | classifier = MultinomialNB() | 119 | classifier = MultinomialNB() |
130 | elif options.classifier == "SVM": | 120 | elif options.classifier == "SVM": |
131 | - pass | 121 | + classifier = SVC() |
132 | - elif options.classifier == "RandomForest": | 122 | + elif options.classifier == "DecisionTree": |
133 | - classifier = RandomForestClassifier() | 123 | + classifier = DecisionTreeClassifier() |
134 | 124 | ||
135 | print(" Training...") | 125 | print(" Training...") |
136 | classifier.fit(dataTraining, trueTrainingClasses) | 126 | classifier.fit(dataTraining, trueTrainingClasses) |
137 | print(" Prediction...") | 127 | print(" Prediction...") |
138 | - y_pred = classifier.predict(dataTest) | 128 | + y_pred = classifier.predict(dataEvaluation) |
139 | print(" Training and predition done!") | 129 | print(" Training and predition done!") |
140 | 130 | ||
141 | # for i in range(len(trueClasses)): | 131 | # for i in range(len(trueClasses)): |
... | @@ -145,18 +135,18 @@ if __name__ == "__main__": | ... | @@ -145,18 +135,18 @@ if __name__ == "__main__": |
145 | with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile: | 135 | with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile: |
146 | oFile.write('********** EVALUATION REPORT **********\n') | 136 | oFile.write('********** EVALUATION REPORT **********\n') |
147 | oFile.write('Classifier: {}\n'.format(options.classifier)) | 137 | oFile.write('Classifier: {}\n'.format(options.classifier)) |
148 | - oFile.write('Accuracy: {}\n'.format(accuracy_score(trueTestClasses, y_pred))) | 138 | + oFile.write('Accuracy: {}\n'.format(accuracy_score(trueEvaluationClasses, y_pred))) |
149 | - oFile.write('Precision: {}\n'.format(precision_score(trueTestClasses, y_pred, average='weighted'))) | 139 | + oFile.write('Precision: {}\n'.format(precision_score(trueEvaluationClasses, y_pred, average='weighted'))) |
150 | - oFile.write('Recall: {}\n'.format(recall_score(trueTestClasses, y_pred, average='weighted'))) | 140 | + oFile.write('Recall: {}\n'.format(recall_score(trueEvaluationClasses, y_pred, average='weighted'))) |
151 | - oFile.write('F-score: {}\n'.format(f1_score(trueTestClasses, y_pred, average='weighted'))) | 141 | + oFile.write('F-score: {}\n'.format(f1_score(trueEvaluationClasses, y_pred, average='weighted'))) |
152 | # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred), | 142 | # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred), |
153 | # precision_score(trueClasses, y_pred, average='weighted'), | 143 | # precision_score(trueClasses, y_pred, average='weighted'), |
154 | # recall_score(trueClasses, y_pred, average='weighted'), | 144 | # recall_score(trueClasses, y_pred, average='weighted'), |
155 | # f1_score(trueClasses, y_pred, average='weighted'))) | 145 | # f1_score(trueClasses, y_pred, average='weighted'))) |
156 | oFile.write('Confusion matrix: \n') | 146 | oFile.write('Confusion matrix: \n') |
157 | - oFile.write(str(confusion_matrix(trueTestClasses, y_pred)) + '\n') | 147 | + oFile.write(str(confusion_matrix(trueEvaluationClasses, y_pred)) + '\n') |
158 | oFile.write('Classification report: \n') | 148 | oFile.write('Classification report: \n') |
159 | - oFile.write(classification_report(trueTestClasses, y_pred) + '\n') | 149 | + oFile.write(classification_report(trueEvaluationClasses, y_pred) + '\n') |
160 | print(" Saving test report done!") | 150 | print(" Saving test report done!") |
161 | 151 | ||
162 | print("Training and test done in: %fs" % (time() - t0)) | 152 | print("Training and test done in: %fs" % (time() - t0)) | ... | ... |
-
Please register or login to post a comment