Carlos-Francisco Méndez-Cruz

Iris dataset for automatic clasification

1 -from sklearn.naive_bayes import MultinomialNB, BernoulliNB
2 -
3 -def scores(list1, list2):
4 - errores = 0
5 - aciertos = 0
6 - if len(list1) != len(list2):
7 - print("ERROR. LENGTH MISMATCH")
8 - for i in range(len(list1)):
9 - if list1[i] == list2[i]:
10 - aciertos += 1
11 - else:
12 - errores += 1
13 - cocienteErrores = errores / len(list1)
14 - return [aciertos, errores, cocienteErrores]
15 -
16 -data = []
17 -lista = []
18 -with open("C:\Users\cmendezc\Dropbox (UNAM-CCG)\Actividades_CCG\LICENCIATURA_LCG\BioInfo-I\lcg-bioinfoI-bionlp\clasificacion-automatica\iris-datasetdata.txt", encoding='utf8') \
19 - as dataFile:
20 - for line in dataFile:
21 - listaFloat = []
22 - line = line.strip('\n')
23 - lista = line.split('\t')
24 - for elem in lista:
25 - listaFloat.append(float(elem))
26 - data.append(listaFloat)
27 -
28 -print(data)
29 -
30 -target = []
31 -with open("C:\\Users\\cmendezc\\Documents\\GENOMICAS\\LICENCIATURA_LCGPDCB\\dataSet_Iris\\true_Classes.txt", encoding='utf8') \
32 - as classFile:
33 - for line in classFile:
34 - line = line.strip('\n')
35 - target.append(line)
36 -
37 -myMultinomialNB = MultinomialNB()
38 -myBernoulliNB = BernoulliNB()
39 -
40 -y_pred = myMultinomialNB.fit(data, target).predict(data)
41 -
42 -'''
43 -for i in range(len(iris.target)):
44 - print(str(iris.target[i]) + "\t" + str(y_pred[i]) + "\t" + str(iris.data[i]))
45 -'''
46 -
47 -myRandomForest = RandomForestClassifier()
48 -y_pred = myRandomForest.fit(data, target).predict(data)
49 -results = scores(target, y_pred)
50 -print("Errores: {}".format(results[1]))
51 -print("Aciertos: {}".format(results[0]))
52 -print("Cociente error: {}".format(results[2]))
...\ No newline at end of file ...\ No newline at end of file
1 -from sklearn import datasets
2 -from sklearn.naive_bayes import MultinomialNB, BernoulliNB
3 -from sklearn.ensemble import RandomForestClassifier
4 -
5 -def scores(list1, list2):
6 - errores = 0
7 - aciertos = 0
8 - if len(list1) != len(list2):
9 - print("ERROR. LENGTH MISMATCH")
10 - for i in range(len(list1)):
11 - if list1[i] == list2[i]:
12 - aciertos += 1
13 - else:
14 - errores += 1
15 - cocienteErrores = errores / len(list1)
16 - return [aciertos, errores, cocienteErrores]
17 -
18 -iris = datasets.load_iris()
19 -
20 -myMultinomialNB = MultinomialNB()
21 -myBernoulliNB = BernoulliNB()
22 -
23 -y_pred = myMultinomialNB.fit(iris.data, iris.target).predict(iris.data)
24 -
25 -'''
26 -for i in range(len(iris.target)):
27 - print(str(iris.target[i]) + "\t" + str(y_pred[i]) + "\t" + str(iris.data[i]))
28 -'''
29 -
30 -myRandomForest = RandomForestClassifier()
31 -y_pred = myRandomForest.fit(iris.data, iris.target).predict(iris.data)
32 -results = scores(iris.target, y_pred)
33 -print("Errores: {}".format(results[1]))
34 -print("Aciertos: {}".format(results[0]))
35 -print("Cociente error: {}".format(results[2]))
...\ No newline at end of file ...\ No newline at end of file
1 -# -*- encoding: utf-8 -*-
2 -
3 -import os
4 -from time import time
5 -from optparse import OptionParser
6 -from sklearn.naive_bayes import MultinomialNB
7 -from sklearn.ensemble import RandomForestClassifier
8 -from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, \
9 - classification_report
10 -import sys
11 -
12 -__author__ = 'CMendezC'
13 -
14 -# Goal: training and test Iris dataset
15 -
16 -# Parameters:
17 -# 1) --inputPath Path to read input files.
18 -# 2) --inputFileData File to read data.
19 -# 3) --inputFileTrueClasses File to read text true classes.
20 -# 4) --outputPath Path to place output files.
21 -# 5) --outputFile File to place evaluation report.
22 -# 6) --classifier Classifier: MultinomialNB, SVM, RandomForest.
23 -
24 -# Ouput:
25 -# 1) Evaluation report.
26 -
27 -# Execution:
28 -# C:\Anaconda3\python trainingTest_Iris.py
29 -# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
30 -# --inputFileData data.txt
31 -# --inputFileTrueClasses true_Classes.txt
32 -# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
33 -# --outputFile report_MultinomialNB.txt
34 -# --classifier MultinomialNB
35 -
36 -# C:\Anaconda3\python trainingTest_Iris.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputFileData data.txt --inputFileTrueClasses true_Classes.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
37 -
38 -###########################################################
39 -# MAIN PROGRAM #
40 -###########################################################
41 -
42 -if __name__ == "__main__":
43 - # Parameter definition
44 - parser = OptionParser()
45 - parser.add_option("--inputPath", dest="inputPath",
46 - help="Path to read input files", metavar="PATH")
47 - parser.add_option("--inputFileData", dest="inputFileData",
48 - help="File to read data", metavar="FILE")
49 - parser.add_option("--inputFileTrueClasses", dest="inputFileTrueClasses",
50 - help="File to read true classes", metavar="FILE")
51 - parser.add_option("--outputPath", dest="outputPath",
52 - help="Path to place output files", metavar="PATH")
53 - parser.add_option("--outputFile", dest="outputFile",
54 - help="File to write evaluation report", metavar="FILE")
55 - parser.add_option("--classifier", dest="classifier",
56 - help="Classifier", metavar="CLASSIFIER")
57 -
58 - (options, args) = parser.parse_args()
59 - if len(args) > 0:
60 - parser.error("None parameters indicated.")
61 - sys.exit(1)
62 -
63 - # Printing parameter values
64 - print('-------------------------------- PARAMETERS --------------------------------')
65 - print("Path to read input files: " + str(options.inputPath))
66 - print("File to read data: " + str(options.inputFileData))
67 - print("File to read true classes: " + str(options.inputFileTrueClasses))
68 - print("Path to place output files: " + str(options.outputPath))
69 - print("File to write evaluation report: " + str(options.outputFile))
70 - print("Classifier: " + str(options.outputFile))
71 -
72 - # Start time
73 - t0 = time()
74 -
75 - print(" Reading data and true classes...")
76 - trueClasses = []
77 - with open(os.path.join(options.inputPath, options.inputFileTrueClasses), encoding='utf8', mode='r') \
78 - as classFile:
79 - for line in classFile:
80 - line = line.strip('\r\n')
81 - trueClasses.append(line)
82 - print(trueClasses)
83 -
84 - data = []
85 - with open(os.path.join(options.inputPath, options.inputFileData), encoding='utf8', mode='r') \
86 - as dataFile:
87 - for line in dataFile:
88 - listTemp = []
89 - listFloat = []
90 - line = line.strip('\r\n')
91 - listTemp = line.split('\t')
92 - for elem in listTemp:
93 - listFloat.append(float(elem))
94 - data.append(listFloat)
95 - print(data)
96 - print(" Reading data and true classes done!")
97 -
98 - if options.classifier == "MultinomialNB":
99 - classifier = MultinomialNB()
100 - elif options.classifier == "SVM":
101 - pass
102 - elif options.classifier == "RandomForest":
103 - classifier = RandomForestClassifier()
104 -
105 - print(" Training...")
106 - y_pred = classifier.fit(data, trueClasses).predict(data)
107 - print(" Training done!")
108 -
109 - # for i in range(len(trueClasses)):
110 - # print(str(trueClasses[i]) + "\t" + str(y_pred[i]))
111 -
112 - print(" Saving test report...")
113 - with open(os.path.join(options.outputPath, options.outputFile), mode='w', encoding='utf8') as oFile:
114 - oFile.write('********** EVALUATION REPORT **********\n')
115 - oFile.write('Classifier: {}\n'.format(options.classifier))
116 - oFile.write('Accuracy: {}\n'.format(accuracy_score(trueClasses, y_pred)))
117 - oFile.write('Precision: {}\n'.format(precision_score(trueClasses, y_pred, average='weighted')))
118 - oFile.write('Recall: {}\n'.format(recall_score(trueClasses, y_pred, average='weighted')))
119 - oFile.write('F-score: {}\n'.format(f1_score(trueClasses, y_pred, average='weighted')))
120 - # oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
121 - # precision_score(trueClasses, y_pred, average='weighted'),
122 - # recall_score(trueClasses, y_pred, average='weighted'),
123 - # f1_score(trueClasses, y_pred, average='weighted')))
124 - oFile.write('Confusion matrix: \n')
125 - oFile.write(str(confusion_matrix(trueClasses, y_pred)) + '\n')
126 - oFile.write('Classification report: \n')
127 - oFile.write(classification_report(trueClasses, y_pred) + '\n')
128 - print(" Saving test report done!")
129 -
130 - print("Training and test done in: %fs" % (time() - t0))