Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-03-26 12:02:24 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
3742c09504e2b574e2efe82ac7f240f8a0a00f49
3742c095
1 parent
e879af08
Iris dataset for automatic clasification
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
0 additions
and
217 deletions
clasificacion-automatica/iris-dataset/trainingTest_Iris_v2.py → clasificacion-automatica/iris-dataset/trainingEvaluation_Iris_v1.py
clasificacion-automatica/iris-dataset/trainingTest_IrisFiles.py
clasificacion-automatica/iris-dataset/trainingTest_Iris_v0.py
clasificacion-automatica/iris-dataset/trainingTest_Iris_v1.py
clasificacion-automatica/iris-dataset/training
Test_Iris_v2
.py
→
clasificacion-automatica/iris-dataset/training
Evaluation_Iris_v1
.py
View file @
3742c09
File moved
clasificacion-automatica/iris-dataset/trainingTest_IrisFiles.py
deleted
100644 → 0
View file @
e879af0
from
sklearn.naive_bayes
import
MultinomialNB
,
BernoulliNB
def
scores
(
list1
,
list2
):
errores
=
0
aciertos
=
0
if
len
(
list1
)
!=
len
(
list2
):
print
(
"ERROR. LENGTH MISMATCH"
)
for
i
in
range
(
len
(
list1
)):
if
list1
[
i
]
==
list2
[
i
]:
aciertos
+=
1
else
:
errores
+=
1
cocienteErrores
=
errores
/
len
(
list1
)
return
[
aciertos
,
errores
,
cocienteErrores
]
data
=
[]
lista
=
[]
with
open
(
"C:
\
Users
\
cmendezc
\
Dropbox (UNAM-CCG)
\
Actividades_CCG
\
LICENCIATURA_LCG
\
BioInfo-I
\
lcg-bioinfoI-bionlp
\
clasificacion-automatica
\
iris-datasetdata.txt"
,
encoding
=
'utf8'
)
\
as
dataFile
:
for
line
in
dataFile
:
listaFloat
=
[]
line
=
line
.
strip
(
'
\n
'
)
lista
=
line
.
split
(
'
\t
'
)
for
elem
in
lista
:
listaFloat
.
append
(
float
(
elem
))
data
.
append
(
listaFloat
)
print
(
data
)
target
=
[]
with
open
(
"C:
\\
Users
\\
cmendezc
\\
Documents
\\
GENOMICAS
\\
LICENCIATURA_LCGPDCB
\\
dataSet_Iris
\\
true_Classes.txt"
,
encoding
=
'utf8'
)
\
as
classFile
:
for
line
in
classFile
:
line
=
line
.
strip
(
'
\n
'
)
target
.
append
(
line
)
myMultinomialNB
=
MultinomialNB
()
myBernoulliNB
=
BernoulliNB
()
y_pred
=
myMultinomialNB
.
fit
(
data
,
target
)
.
predict
(
data
)
'''
for i in range(len(iris.target)):
print(str(iris.target[i]) + "
\t
" + str(y_pred[i]) + "
\t
" + str(iris.data[i]))
'''
myRandomForest
=
RandomForestClassifier
()
y_pred
=
myRandomForest
.
fit
(
data
,
target
)
.
predict
(
data
)
results
=
scores
(
target
,
y_pred
)
print
(
"Errores: {}"
.
format
(
results
[
1
]))
print
(
"Aciertos: {}"
.
format
(
results
[
0
]))
print
(
"Cociente error: {}"
.
format
(
results
[
2
]))
\ No newline at end of file
clasificacion-automatica/iris-dataset/trainingTest_Iris_v0.py
deleted
100644 → 0
View file @
e879af0
from
sklearn
import
datasets
from
sklearn.naive_bayes
import
MultinomialNB
,
BernoulliNB
from
sklearn.ensemble
import
RandomForestClassifier
def
scores
(
list1
,
list2
):
errores
=
0
aciertos
=
0
if
len
(
list1
)
!=
len
(
list2
):
print
(
"ERROR. LENGTH MISMATCH"
)
for
i
in
range
(
len
(
list1
)):
if
list1
[
i
]
==
list2
[
i
]:
aciertos
+=
1
else
:
errores
+=
1
cocienteErrores
=
errores
/
len
(
list1
)
return
[
aciertos
,
errores
,
cocienteErrores
]
iris
=
datasets
.
load_iris
()
myMultinomialNB
=
MultinomialNB
()
myBernoulliNB
=
BernoulliNB
()
y_pred
=
myMultinomialNB
.
fit
(
iris
.
data
,
iris
.
target
)
.
predict
(
iris
.
data
)
'''
for i in range(len(iris.target)):
print(str(iris.target[i]) + "
\t
" + str(y_pred[i]) + "
\t
" + str(iris.data[i]))
'''
myRandomForest
=
RandomForestClassifier
()
y_pred
=
myRandomForest
.
fit
(
iris
.
data
,
iris
.
target
)
.
predict
(
iris
.
data
)
results
=
scores
(
iris
.
target
,
y_pred
)
print
(
"Errores: {}"
.
format
(
results
[
1
]))
print
(
"Aciertos: {}"
.
format
(
results
[
0
]))
print
(
"Cociente error: {}"
.
format
(
results
[
2
]))
\ No newline at end of file
clasificacion-automatica/iris-dataset/trainingTest_Iris_v1.py
deleted
100644 → 0
View file @
e879af0
# -*- encoding: utf-8 -*-
import
os
from
time
import
time
from
optparse
import
OptionParser
from
sklearn.naive_bayes
import
MultinomialNB
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.metrics
import
accuracy_score
,
precision_score
,
recall_score
,
f1_score
,
confusion_matrix
,
\
classification_report
import
sys
__author__
=
'CMendezC'
# Goal: training and test Iris dataset
# Parameters:
# 1) --inputPath Path to read input files.
# 2) --inputFileData File to read data.
# 3) --inputFileTrueClasses File to read text true classes.
# 4) --outputPath Path to place output files.
# 5) --outputFile File to place evaluation report.
# 6) --classifier Classifier: MultinomialNB, SVM, RandomForest.
# Ouput:
# 1) Evaluation report.
# Execution:
# C:\Anaconda3\python trainingTest_Iris.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --inputFileData data.txt
# --inputFileTrueClasses true_Classes.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --outputFile report_MultinomialNB.txt
# --classifier MultinomialNB
# C:\Anaconda3\python trainingTest_Iris.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputFileData data.txt --inputFileTrueClasses true_Classes.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
###########################################################
# MAIN PROGRAM #
###########################################################
if
__name__
==
"__main__"
:
# Parameter definition
parser
=
OptionParser
()
parser
.
add_option
(
"--inputPath"
,
dest
=
"inputPath"
,
help
=
"Path to read input files"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--inputFileData"
,
dest
=
"inputFileData"
,
help
=
"File to read data"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--inputFileTrueClasses"
,
dest
=
"inputFileTrueClasses"
,
help
=
"File to read true classes"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--outputPath"
,
dest
=
"outputPath"
,
help
=
"Path to place output files"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--outputFile"
,
dest
=
"outputFile"
,
help
=
"File to write evaluation report"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--classifier"
,
dest
=
"classifier"
,
help
=
"Classifier"
,
metavar
=
"CLASSIFIER"
)
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
>
0
:
parser
.
error
(
"None parameters indicated."
)
sys
.
exit
(
1
)
# Printing parameter values
print
(
'-------------------------------- PARAMETERS --------------------------------'
)
print
(
"Path to read input files: "
+
str
(
options
.
inputPath
))
print
(
"File to read data: "
+
str
(
options
.
inputFileData
))
print
(
"File to read true classes: "
+
str
(
options
.
inputFileTrueClasses
))
print
(
"Path to place output files: "
+
str
(
options
.
outputPath
))
print
(
"File to write evaluation report: "
+
str
(
options
.
outputFile
))
print
(
"Classifier: "
+
str
(
options
.
outputFile
))
# Start time
t0
=
time
()
print
(
" Reading data and true classes..."
)
trueClasses
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
inputFileTrueClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
classFile
:
for
line
in
classFile
:
line
=
line
.
strip
(
'
\r\n
'
)
trueClasses
.
append
(
line
)
print
(
trueClasses
)
data
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
inputFileData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
dataFile
:
for
line
in
dataFile
:
listTemp
=
[]
listFloat
=
[]
line
=
line
.
strip
(
'
\r\n
'
)
listTemp
=
line
.
split
(
'
\t
'
)
for
elem
in
listTemp
:
listFloat
.
append
(
float
(
elem
))
data
.
append
(
listFloat
)
print
(
data
)
print
(
" Reading data and true classes done!"
)
if
options
.
classifier
==
"MultinomialNB"
:
classifier
=
MultinomialNB
()
elif
options
.
classifier
==
"SVM"
:
pass
elif
options
.
classifier
==
"RandomForest"
:
classifier
=
RandomForestClassifier
()
print
(
" Training..."
)
y_pred
=
classifier
.
fit
(
data
,
trueClasses
)
.
predict
(
data
)
print
(
" Training done!"
)
# for i in range(len(trueClasses)):
# print(str(trueClasses[i]) + "\t" + str(y_pred[i]))
print
(
" Saving test report..."
)
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
options
.
outputFile
),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
oFile
.
write
(
'********** EVALUATION REPORT **********
\n
'
)
oFile
.
write
(
'Classifier: {}
\n
'
.
format
(
options
.
classifier
))
oFile
.
write
(
'Accuracy: {}
\n
'
.
format
(
accuracy_score
(
trueClasses
,
y_pred
)))
oFile
.
write
(
'Precision: {}
\n
'
.
format
(
precision_score
(
trueClasses
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'Recall: {}
\n
'
.
format
(
recall_score
(
trueClasses
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'F-score: {}
\n
'
.
format
(
f1_score
(
trueClasses
,
y_pred
,
average
=
'weighted'
)))
# oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
# precision_score(trueClasses, y_pred, average='weighted'),
# recall_score(trueClasses, y_pred, average='weighted'),
# f1_score(trueClasses, y_pred, average='weighted')))
oFile
.
write
(
'Confusion matrix:
\n
'
)
oFile
.
write
(
str
(
confusion_matrix
(
trueClasses
,
y_pred
))
+
'
\n
'
)
oFile
.
write
(
'Classification report:
\n
'
)
oFile
.
write
(
classification_report
(
trueClasses
,
y_pred
)
+
'
\n
'
)
print
(
" Saving test report done!"
)
print
(
"Training and test done in:
%
fs"
%
(
time
()
-
t0
))
Please
register
or
login
to post a comment