training-evaluation-iris-v1.py 7.58 KB
# -*- encoding: utf-8 -*-

import os
from time import time
import argparse
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
    classification_report

# Author:
# Carlos Méndez-Cruz

# Goal: training and evaluation using Iris dataset

# Arguments:
# 1) --inputPath Path to read input files.
# 2) --inputTrainingData File to read training data.
# 3) --inputTrainingClasses File to read training true classes.
# 4) --inputEvaluationData File to read evaluation data.
# 5) --inputEvaluationClasses File to read evaluation true classes.
# 6) --outputPath Path to place output files.
# 7) --outputFile File to place evaluation report.
# 8) --classifier Classifier: MultinomialNB, SVM, DecisionTree, Perceptron.

# Ouput:
# 1) Evaluation report.

# Execution:
# python training-evaluation-iris-v1.py
# --inputPath /home/clasificacion/iris-dataset
# --inputTrainingData training-data.txt
# --inputTrainingClasses training-classes.txt
# --inputEvaluationData test-data.txt
# --inputEvaluationClasses test-classes.txt
# --outputPath /home/classification/reports
# --outputFile report-iris-svm.txt
# --classifier SVM

# python training-evaluation-iris-v1.py --inputPath /home/laigen-supervised-learning/iris-data-set --inputTrainingData training-data.txt --inputTrainingClasses training-classes.txt --inputEvaluationData test-data.txt --inputEvaluationClasses test-classes.txt --outputPath /home/laigen-supervised-learning/iris-data-set/reports --outputFile report-iris-svm.txt --classifier SVM

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Training and testing Iris data set.')
    parser.add_argument("--inputPath", dest="inputPath",
                      help="Path to read input files", metavar="PATH")
    parser.add_argument("--inputTrainingData", dest="inputTrainingData",
                      help="File to read training data", metavar="FILE")
    parser.add_argument("--inputTrainingClasses", dest="inputTrainingClasses",
                      help="File to read training true classes", metavar="FILE")
    parser.add_argument("--inputEvaluationData", dest="inputEvaluationData",
                      help="File to read evaluation data", metavar="FILE")
    parser.add_argument("--inputEvaluationClasses", dest="inputEvaluationClasses",
                      help="File to read evaluation true classes", metavar="FILE")
    parser.add_argument("--outputPath", dest="outputPath",
                          help="Path to place output files", metavar="PATH")
    parser.add_argument("--outputFile", dest="outputFile",
                      help="File to write evaluation report", metavar="FILE")
    parser.add_argument("--classifier", dest="classifier",
                      help="Classifier", metavar="CLASSIFIER")

    args = parser.parse_args()

    # Printing parameter
    print('-------------------------------- PARAMETERS --------------------------------')
    print("Path to read input files: " + str(args.inputPath))
    print("File to read training data: " + str(args.inputTrainingData))
    print("File to read training true classes: " + str(args.inputTrainingClasses))
    print("File to read evaluation data: " + str(args.inputEvaluationData))
    print("File to read evaluation true classes: " + str(args.inputEvaluationClasses))
    print("Path to place output files: " + str(args.outputPath))
    print("File to write evaluation report: " + str(args.outputFile))
    print("Classifier: " + str(args.classifier))

    # Start time
    t0 = time()

    print("   Reading training and evaluation data, and true classes...")
    trueTrainingClasses = []
    trueEvaluationClasses = []
    with open(os.path.join(args.inputPath, args.inputTrainingClasses), encoding='utf8', mode='r') \
            as classFile:
        for line in classFile:
            line = line.strip('\r\n')
            trueTrainingClasses.append(line)

    with open(os.path.join(args.inputPath, args.inputEvaluationClasses), encoding='utf8', mode='r') \
            as classFile:
        for line in classFile:
            line = line.strip('\r\n')
            trueEvaluationClasses.append(line)
    # print(trueEvaluationClasses)

    dataTraining = []
    dataEvaluation = []
    with open(os.path.join(args.inputPath, args.inputTrainingData), encoding='utf8', mode='r') \
            as dataFile:
        for line in dataFile:
            listTemp = []
            listFloat = []
            line = line.strip('\r\n')
            listTemp = line.split('\t')
            for elem in listTemp:
                listFloat.append(float(elem))
            dataTraining.append(listFloat)
    # print(dataTraining)

    with open(os.path.join(args.inputPath, args.inputEvaluationData), encoding='utf8', mode='r') \
            as dataFile:
        for line in dataFile:
            listTemp = []
            listFloat = []
            line = line.strip('\r\n')
            listTemp = line.split('\t')
            for elem in listTemp:
                listFloat.append(float(elem))
            dataEvaluation.append(listFloat)
    # print(dataEvaluation)
    print("     Reading data and true classes done!")

    if args.classifier == "MultinomialNB":
        classifier = MultinomialNB()
    elif args.classifier == "SVM":
        classifier = SVC(kernel="linear")
    elif args.classifier == "DecisionTree":
        classifier = DecisionTreeClassifier()
    elif args.classifier == "Perceptron":
        classifier = Perceptron()

    print("   Training...")
    classifier.fit(dataTraining, trueTrainingClasses)
    print("   Prediction...")
    y_pred = classifier.predict(dataEvaluation)
    if args.classifier in ["Perceptron", "SVM"]:
        confidence_scores = classifier.decision_function(dataEvaluation)
    print("     Training and predition done!")

    print("   Saving evaluation report...")
    with open(os.path.join(args.outputPath, args.outputFile), mode='w', encoding='utf8') as oFile:
        oFile.write('**********        EVALUATION REPORT     **********\n')
        oFile.write('Classifier: {}\n'.format(args.classifier))
        oFile.write('Accuracy: {}\n'.format(accuracy_score(trueEvaluationClasses, y_pred)))
        oFile.write('Precision: {}\n'.format(precision_score(trueEvaluationClasses, y_pred, average='weighted')))
        oFile.write('Recall: {}\n'.format(recall_score(trueEvaluationClasses, y_pred, average='weighted')))
        oFile.write('F-score: {}\n'.format(f1_score(trueEvaluationClasses, y_pred, average='weighted')))
        oFile.write('Confusion matrix: \n')
        oFile.write(str(confusion_matrix(trueEvaluationClasses, y_pred)) + '\n')
        oFile.write('Classification report: \n')
        oFile.write(classification_report(trueEvaluationClasses, y_pred) + '\n')
        if args.classifier in ["Perceptron", "SVM"]:
            oFile.write('\nWeights assigned to the features: \n')
            oFile.write("{}\n".format(classifier.coef_))
            oFile.write('Confidence scores: \n')
            oFile.write("{}\n".format(confidence_scores))
        if args.classifier == "SVM":
            oFile.write('Number of support vectors per class: \n{}\n'.format(classifier.n_support_))
            oFile.write('Support vectors: \n{}\n'.format(classifier.support_vectors_))

    print("     Saving evaluation report done!")

    print("Training and evaluation done in: %fs" % (time() - t0))