trainingEvaluation_Iris_v1.py 7.36 KB
# -*- encoding: utf-8 -*-

import os
from time import time
from optparse import OptionParser
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
    classification_report
import sys

__author__ = 'CMendezC'

# Goal: training and test Iris dataset

# Parameters:
# 1) --inputPath Path to read input files.
# 2) --inputTrainingData File to read training data.
# 3) --inputTrainingClasses File to read training true classes.
# 4) --inputTestData File to read test data.
# 5) --inputTestClasses File to read test true classes.
# 6) --outputPath Path to place output files.
# 7) --outputFile File to place evaluation report.
# 8) --classifier Classifier: MultinomialNB, SVM, RandomForest.

# Ouput:
# 1) Evaluation report.

# Execution:
# C:\Anaconda3\python trainingTest_Iris_v2.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --inputTrainingData training_Data.txt
# --inputTrainingClasses training_TrueClasses.txt
# --inputTestData test_Data.txt
# --inputTestClasses test_TrueClasses.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --outputFile report_MultinomialNB.txt
# --classifier MultinomialNB

# C:\Anaconda3\python trainingTest_Iris_v2.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputTrainingData training_Data.txt --inputTrainingClasses training_TrueClasses.txt --inputTestData test_Data.txt --inputTestClasses test_TrueClasses.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

if __name__ == "__main__":
    # Parameter definition
    parser = OptionParser()
    parser.add_option("--inputPath", dest="inputPath",
                      help="Path to read input files", metavar="PATH")
    parser.add_option("--inputTrainingData", dest="inputTrainingData",
                      help="File to read training data", metavar="FILE")
    parser.add_option("--inputTrainingClasses", dest="inputTrainingClasses",
                      help="File to read training true classes", metavar="FILE")
    parser.add_option("--inputTestData", dest="inputTestData",
                      help="File to read test data", metavar="FILE")
    parser.add_option("--inputTestClasses", dest="inputTestClasses",
                      help="File to read test true classes", metavar="FILE")
    parser.add_option("--outputPath", dest="outputPath",
                          help="Path to place output files", metavar="PATH")
    parser.add_option("--outputFile", dest="outputFile",
                      help="File to write evaluation report", metavar="FILE")
    parser.add_option("--classifier", dest="classifier",
                      help="Classifier", metavar="CLASSIFIER")

    (options, args) = parser.parse_args()
    if len(args) <= 0:
        parser.error("None parameters indicated.")
        sys.exit(1)

    # Printing parameter values
    print('-------------------------------- PARAMETERS --------------------------------')
    print("Path to read input files: " + str(options.inputPath))
    print("File to read training data: " + str(options.inputTrainingData))
    print("File to read training true classes: " + str(options.inputTrainingClasses))
    print("File to read test data: " + str(options.inputTestData))
    print("File to read test true classes: " + str(options.inputTestClasses))
    print("Path to place output files: " + str(options.outputPath))
    print("File to write evaluation report: " + str(options.outputFile))
    print("Classifier: " + str(options.outputFile))

    # Start time
    t0 = time()

    print("   Reading training and test data and true classes...")
    trueTrainingClasses = []
    trueTestClasses = []
    with open(os.path.join(options.inputPath, options.inputTrainingClasses), encoding='utf8', mode='r') \
            as classFile:
        for line in classFile:
            line = line.strip('\r\n')
            trueTrainingClasses.append(line)

    with open(os.path.join(options.inputPath, options.inputTestClasses), encoding='utf8', mode='r') \
            as classFile:
        for line in classFile:
            line = line.strip('\r\n')
            trueTestClasses.append(line)
    # print(trueTestClasses)

    dataTraining = []
    dataTest = []
    with open(os.path.join(options.inputPath, options.inputTrainingData), encoding='utf8', mode='r') \
            as dataFile:
        for line in dataFile:
            listTemp = []
            listFloat = []
            line = line.strip('\r\n')
            listTemp = line.split('\t')
            for elem in listTemp:
                listFloat.append(float(elem))
            dataTraining.append(listFloat)
    print(dataTraining)

    with open(os.path.join(options.inputPath, options.inputTestData), encoding='utf8', mode='r') \
            as dataFile:
        for line in dataFile:
            listTemp = []
            listFloat = []
            line = line.strip('\r\n')
            listTemp = line.split('\t')
            for elem in listTemp:
                listFloat.append(float(elem))
            dataTest.append(listFloat)
    print(dataTest)
    print("     Reading data and true classes done!")

    if options.classifier == "MultinomialNB":
        classifier = MultinomialNB()
    elif options.classifier == "SVM":
        pass
    elif options.classifier == "RandomForest":
        classifier = RandomForestClassifier()

    print("   Training...")
    classifier.fit(dataTraining, trueTrainingClasses)
    print("   Prediction...")
    y_pred = classifier.predict(dataTest)
    print("     Training and predition done!")

    # for i in range(len(trueClasses)):
    #     print(str(trueClasses[i]) + "\t" + str(y_pred[i]))

    print("   Saving test report...")
    with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
        oFile.write('**********        EVALUATION REPORT     **********\n')
        oFile.write('Classifier: {}\n'.format(options.classifier))
        oFile.write('Accuracy: {}\n'.format(accuracy_score(trueTestClasses, y_pred)))
        oFile.write('Precision: {}\n'.format(precision_score(trueTestClasses, y_pred, average='weighted')))
        oFile.write('Recall: {}\n'.format(recall_score(trueTestClasses, y_pred, average='weighted')))
        oFile.write('F-score: {}\n'.format(f1_score(trueTestClasses, y_pred, average='weighted')))
        # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
        #                                       precision_score(trueClasses, y_pred, average='weighted'),
        #                                       recall_score(trueClasses, y_pred, average='weighted'),
        #                                       f1_score(trueClasses, y_pred, average='weighted')))
        oFile.write('Confusion matrix: \n')
        oFile.write(str(confusion_matrix(trueTestClasses, y_pred)) + '\n')
        oFile.write('Classification report: \n')
        oFile.write(classification_report(trueTestClasses, y_pred) + '\n')
    print("     Saving test report done!")

    print("Training and test done in: %fs" % (time() - t0))