Carlos-Francisco Méndez-Cruz

Iris dataset for automatic clasification

from sklearn.naive_bayes import MultinomialNB, BernoulliNB
def scores(list1, list2):
errores = 0
aciertos = 0
if len(list1) != len(list2):
print("ERROR. LENGTH MISMATCH")
for i in range(len(list1)):
if list1[i] == list2[i]:
aciertos += 1
else:
errores += 1
cocienteErrores = errores / len(list1)
return [aciertos, errores, cocienteErrores]
data = []
lista = []
with open("C:\Users\cmendezc\Dropbox (UNAM-CCG)\Actividades_CCG\LICENCIATURA_LCG\BioInfo-I\lcg-bioinfoI-bionlp\clasificacion-automatica\iris-datasetdata.txt", encoding='utf8') \
as dataFile:
for line in dataFile:
listaFloat = []
line = line.strip('\n')
lista = line.split('\t')
for elem in lista:
listaFloat.append(float(elem))
data.append(listaFloat)
print(data)
target = []
with open("C:\\Users\\cmendezc\\Documents\\GENOMICAS\\LICENCIATURA_LCGPDCB\\dataSet_Iris\\true_Classes.txt", encoding='utf8') \
as classFile:
for line in classFile:
line = line.strip('\n')
target.append(line)
myMultinomialNB = MultinomialNB()
myBernoulliNB = BernoulliNB()
y_pred = myMultinomialNB.fit(data, target).predict(data)
'''
for i in range(len(iris.target)):
print(str(iris.target[i]) + "\t" + str(y_pred[i]) + "\t" + str(iris.data[i]))
'''
myRandomForest = RandomForestClassifier()
y_pred = myRandomForest.fit(data, target).predict(data)
results = scores(target, y_pred)
print("Errores: {}".format(results[1]))
print("Aciertos: {}".format(results[0]))
print("Cociente error: {}".format(results[2]))
\ No newline at end of file
from sklearn import datasets
from sklearn.naive_bayes import MultinomialNB, BernoulliNB
from sklearn.ensemble import RandomForestClassifier
def scores(list1, list2):
errores = 0
aciertos = 0
if len(list1) != len(list2):
print("ERROR. LENGTH MISMATCH")
for i in range(len(list1)):
if list1[i] == list2[i]:
aciertos += 1
else:
errores += 1
cocienteErrores = errores / len(list1)
return [aciertos, errores, cocienteErrores]
iris = datasets.load_iris()
myMultinomialNB = MultinomialNB()
myBernoulliNB = BernoulliNB()
y_pred = myMultinomialNB.fit(iris.data, iris.target).predict(iris.data)
'''
for i in range(len(iris.target)):
print(str(iris.target[i]) + "\t" + str(y_pred[i]) + "\t" + str(iris.data[i]))
'''
myRandomForest = RandomForestClassifier()
y_pred = myRandomForest.fit(iris.data, iris.target).predict(iris.data)
results = scores(iris.target, y_pred)
print("Errores: {}".format(results[1]))
print("Aciertos: {}".format(results[0]))
print("Cociente error: {}".format(results[2]))
\ No newline at end of file
# -*- encoding: utf-8 -*-
import os
from time import time
from optparse import OptionParser
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
classification_report
import sys
__author__ = 'CMendezC'
# Goal: training and test Iris dataset
# Parameters:
# 1) --inputPath Path to read input files.
# 2) --inputFileData File to read data.
# 3) --inputFileTrueClasses File to read text true classes.
# 4) --outputPath Path to place output files.
# 5) --outputFile File to place evaluation report.
# 6) --classifier Classifier: MultinomialNB, SVM, RandomForest.
# Ouput:
# 1) Evaluation report.
# Execution:
# C:\Anaconda3\python trainingTest_Iris.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --inputFileData data.txt
# --inputFileTrueClasses true_Classes.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --outputFile report_MultinomialNB.txt
# --classifier MultinomialNB
# C:\Anaconda3\python trainingTest_Iris.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputFileData data.txt --inputFileTrueClasses true_Classes.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
###########################################################
# MAIN PROGRAM #
###########################################################
if __name__ == "__main__":
# Parameter definition
parser = OptionParser()
parser.add_option("--inputPath", dest="inputPath",
help="Path to read input files", metavar="PATH")
parser.add_option("--inputFileData", dest="inputFileData",
help="File to read data", metavar="FILE")
parser.add_option("--inputFileTrueClasses", dest="inputFileTrueClasses",
help="File to read true classes", metavar="FILE")
parser.add_option("--outputPath", dest="outputPath",
help="Path to place output files", metavar="PATH")
parser.add_option("--outputFile", dest="outputFile",
help="File to write evaluation report", metavar="FILE")
parser.add_option("--classifier", dest="classifier",
help="Classifier", metavar="CLASSIFIER")
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error("None parameters indicated.")
sys.exit(1)
# Printing parameter values
print('-------------------------------- PARAMETERS --------------------------------')
print("Path to read input files: " + str(options.inputPath))
print("File to read data: " + str(options.inputFileData))
print("File to read true classes: " + str(options.inputFileTrueClasses))
print("Path to place output files: " + str(options.outputPath))
print("File to write evaluation report: " + str(options.outputFile))
print("Classifier: " + str(options.outputFile))
# Start time
t0 = time()
print(" Reading data and true classes...")
trueClasses = []
with open(os.path.join(options.inputPath, options.inputFileTrueClasses), encoding='utf8', mode='r') \
as classFile:
for line in classFile:
line = line.strip('\r\n')
trueClasses.append(line)
print(trueClasses)
data = []
with open(os.path.join(options.inputPath, options.inputFileData), encoding='utf8', mode='r') \
as dataFile:
for line in dataFile:
listTemp = []
listFloat = []
line = line.strip('\r\n')
listTemp = line.split('\t')
for elem in listTemp:
listFloat.append(float(elem))
data.append(listFloat)
print(data)
print(" Reading data and true classes done!")
if options.classifier == "MultinomialNB":
classifier = MultinomialNB()
elif options.classifier == "SVM":
pass
elif options.classifier == "RandomForest":
classifier = RandomForestClassifier()
print(" Training...")
y_pred = classifier.fit(data, trueClasses).predict(data)
print(" Training done!")
# for i in range(len(trueClasses)):
# print(str(trueClasses[i]) + "\t" + str(y_pred[i]))
print(" Saving test report...")
with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
oFile.write('********** EVALUATION REPORT **********\n')
oFile.write('Classifier: {}\n'.format(options.classifier))
oFile.write('Accuracy: {}\n'.format(accuracy_score(trueClasses, y_pred)))
oFile.write('Precision: {}\n'.format(precision_score(trueClasses, y_pred, average='weighted')))
oFile.write('Recall: {}\n'.format(recall_score(trueClasses, y_pred, average='weighted')))
oFile.write('F-score: {}\n'.format(f1_score(trueClasses, y_pred, average='weighted')))
# oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
# precision_score(trueClasses, y_pred, average='weighted'),
# recall_score(trueClasses, y_pred, average='weighted'),
# f1_score(trueClasses, y_pred, average='weighted')))
oFile.write('Confusion matrix: \n')
oFile.write(str(confusion_matrix(trueClasses, y_pred)) + '\n')
oFile.write('Classification report: \n')
oFile.write(classification_report(trueClasses, y_pred) + '\n')
print(" Saving test report done!")
print("Training and test done in: %fs" % (time() - t0))