Carlos-Francisco Méndez-Cruz

Iris dataset for automatic clasification

......@@ -4,40 +4,30 @@ import os
from time import time
from optparse import OptionParser
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
classification_report
import sys
__author__ = 'CMendezC'
# Goal: training and test Iris dataset
# Goal: training and evaluation Iris dataset
# Parameters:
# 1) --inputPath Path to read input files.
# 2) --inputTrainingData File to read training data.
# 3) --inputTrainingClasses File to read training true classes.
# 4) --inputTestData File to read test data.
# 5) --inputTestClasses File to read test true classes.
# 4) --inputEvaluationData File to read test data.
# 5) --inputEvaluationClasses File to read test true classes.
# 6) --outputPath Path to place output files.
# 7) --outputFile File to place evaluation report.
# 8) --classifier Classifier: MultinomialNB, SVM, RandomForest.
# 8) --classifier Classifier: MultinomialNB, SVM, DecisionTree.
# Ouput:
# 1) Evaluation report.
# Execution:
# C:\Anaconda3\python trainingTest_Iris_v2.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --inputTrainingData training_Data.txt
# --inputTrainingClasses training_TrueClasses.txt
# --inputTestData test_Data.txt
# --inputTestClasses test_TrueClasses.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --outputFile report_MultinomialNB.txt
# --classifier MultinomialNB
# C:\Anaconda3\python trainingTest_Iris_v2.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputTrainingData training_Data.txt --inputTrainingClasses training_TrueClasses.txt --inputTestData test_Data.txt --inputTestClasses test_TrueClasses.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
###########################################################
# MAIN PROGRAM #
......@@ -52,9 +42,9 @@ if __name__ == "__main__":
help="File to read training data", metavar="FILE")
parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses",
help="File to read training true classes", metavar="FILE")
parser.add_option("--inputTestData", dest="inputTestData",
parser.add_option("--inputEvaluationData", dest="inputEvaluationData",
help="File to read test data", metavar="FILE")
parser.add_option("--inputTestClasses", dest="inputTestClasses",
parser.add_option("--inputEvaluationClasses", dest="inputEvaluationClasses",
help="File to read test true classes", metavar="FILE")
parser.add_option("--outputPath", dest="outputPath",
help="Path to place output files", metavar="PATH")
......@@ -73,8 +63,8 @@ if __name__ == "__main__":
print("Path to read input files: " + str(options.inputPath))
print("File to read training data: " + str(options.inputTrainingData))
print("File to read training true classes: " + str(options.inputTrainingClasses))
print("File to read test data: " + str(options.inputTestData))
print("File to read test true classes: " + str(options.inputTestClasses))
print("File to read evaluation data: " + str(options.inputEvaluationData))
print("File to read evaluation true classes: " + str(options.inputEvaluationClasses))
print("Path to place output files: " + str(options.outputPath))
print("File to write evaluation report: " + str(options.outputFile))
print("Classifier: " + str(options.outputFile))
......@@ -82,24 +72,24 @@ if __name__ == "__main__":
# Start time
t0 = time()
print(" Reading training and test data and true classes...")
print(" Reading training and evaluation data and true classes...")
trueTrainingClasses = []
trueTestClasses = []
trueEvaluationClasses = []
with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \
as classFile:
for line in classFile:
line = line.strip('\r\n')
trueTrainingClasses.append(line)
with open(os.path.join(options.inputPath, options.inputTestClasses), encoding='utf8', mode='r') \
with open(os.path.join(options.inputPath, options.inputEvaluationClasses), encoding='utf8', mode='r') \
as classFile:
for line in classFile:
line = line.strip('\r\n')
trueTestClasses.append(line)
# print(trueTestClasses)
trueEvaluationClasses.append(line)
# print(trueEvaluationClasses)
dataTraining = []
dataTest = []
dataEvaluation = []
with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \
as dataFile:
for line in dataFile:
......@@ -112,7 +102,7 @@ if __name__ == "__main__":
dataTraining.append(listFloat)
print(dataTraining)
with open(os.path.join(options.inputPath, options.inputTestData), encoding='utf8', mode='r') \
with open(os.path.join(options.inputPath, options.inputEvaluationData), encoding='utf8', mode='r') \
as dataFile:
for line in dataFile:
listTemp = []
......@@ -121,21 +111,21 @@ if __name__ == "__main__":
listTemp = line.split('\t')
for elem in listTemp:
listFloat.append(float(elem))
dataTest.append(listFloat)
print(dataTest)
dataEvaluation.append(listFloat)
print(dataEvaluation)
print(" Reading data and true classes done!")
if options.classifier == "MultinomialNB":
classifier = MultinomialNB()
elif options.classifier == "SVM":
pass
elif options.classifier == "RandomForest":
classifier = RandomForestClassifier()
classifier = SVC()
elif options.classifier == "DecisionTree":
classifier = DecisionTreeClassifier()
print(" Training...")
classifier.fit(dataTraining, trueTrainingClasses)
print(" Prediction...")
y_pred = classifier.predict(dataTest)
y_pred = classifier.predict(dataEvaluation)
print(" Training and predition done!")
# for i in range(len(trueClasses)):
......@@ -145,18 +135,18 @@ if __name__ == "__main__":
with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
oFile.write('********** EVALUATION REPORT **********\n')
oFile.write('Classifier: {}\n'.format(options.classifier))
oFile.write('Accuracy: {}\n'.format(accuracy_score(trueTestClasses, y_pred)))
oFile.write('Precision: {}\n'.format(precision_score(trueTestClasses, y_pred, average='weighted')))
oFile.write('Recall: {}\n'.format(recall_score(trueTestClasses, y_pred, average='weighted')))
oFile.write('F-score: {}\n'.format(f1_score(trueTestClasses, y_pred, average='weighted')))
oFile.write('Accuracy: {}\n'.format(accuracy_score(trueEvaluationClasses, y_pred)))
oFile.write('Precision: {}\n'.format(precision_score(trueEvaluationClasses, y_pred, average='weighted')))
oFile.write('Recall: {}\n'.format(recall_score(trueEvaluationClasses, y_pred, average='weighted')))
oFile.write('F-score: {}\n'.format(f1_score(trueEvaluationClasses, y_pred, average='weighted')))
# oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
# precision_score(trueClasses, y_pred, average='weighted'),
# recall_score(trueClasses, y_pred, average='weighted'),
# f1_score(trueClasses, y_pred, average='weighted')))
oFile.write('Confusion matrix: \n')
oFile.write(str(confusion_matrix(trueTestClasses, y_pred)) + '\n')
oFile.write(str(confusion_matrix(trueEvaluationClasses, y_pred)) + '\n')
oFile.write('Classification report: \n')
oFile.write(classification_report(trueTestClasses, y_pred) + '\n')
oFile.write(classification_report(trueEvaluationClasses, y_pred) + '\n')
print(" Saving test report done!")
print("Training and test done in: %fs" % (time() - t0))
......