Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-03-26 12:13:34 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
caac90815d7683b31b1c452242a6bf45264e68cc
caac9081
1 parent
3742c095
Iris dataset for automatic clasification
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
29 additions
and
39 deletions
clasificacion-automatica/iris-dataset/trainingEvaluation_Iris_v1.py
clasificacion-automatica/iris-dataset/trainingEvaluation_Iris_v1.py
View file @
caac908
...
...
@@ -4,40 +4,30 @@ import os
from
time
import
time
from
optparse
import
OptionParser
from
sklearn.naive_bayes
import
MultinomialNB
from
sklearn.ensemble
import
RandomForestClassifier
from
sklearn.tree
import
DecisionTreeClassifier
from
sklearn.svm
import
SVC
from
sklearn.metrics
import
accuracy_score
,
precision_score
,
recall_score
,
f1_score
,
confusion_matrix
,
\
classification_report
import
sys
__author__
=
'CMendezC'
# Goal: training and
test
Iris dataset
# Goal: training and
evaluation
Iris dataset
# Parameters:
# 1) --inputPath Path to read input files.
# 2) --inputTrainingData File to read training data.
# 3) --inputTrainingClasses File to read training true classes.
# 4) --input
Test
Data File to read test data.
# 5) --input
Test
Classes File to read test true classes.
# 4) --input
Evaluation
Data File to read test data.
# 5) --input
Evaluation
Classes File to read test true classes.
# 6) --outputPath Path to place output files.
# 7) --outputFile File to place evaluation report.
# 8) --classifier Classifier: MultinomialNB, SVM,
RandomForest
.
# 8) --classifier Classifier: MultinomialNB, SVM,
DecisionTree
.
# Ouput:
# 1) Evaluation report.
# Execution:
# C:\Anaconda3\python trainingTest_Iris_v2.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --inputTrainingData training_Data.txt
# --inputTrainingClasses training_TrueClasses.txt
# --inputTestData test_Data.txt
# --inputTestClasses test_TrueClasses.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris
# --outputFile report_MultinomialNB.txt
# --classifier MultinomialNB
# C:\Anaconda3\python trainingTest_Iris_v2.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --inputTrainingData training_Data.txt --inputTrainingClasses training_TrueClasses.txt --inputTestData test_Data.txt --inputTestClasses test_TrueClasses.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\LICENCIATURA_LCGPDCB\dataSet_Iris --outputFile report_MultinomialNB.txt --classifier MultinomialNB
###########################################################
# MAIN PROGRAM #
...
...
@@ -52,9 +42,9 @@ if __name__ == "__main__":
help
=
"File to read training data"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--inputTrainingClasses"
,
dest
=
"inputTrainingClasses"
,
help
=
"File to read training true classes"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--input
TestData"
,
dest
=
"inputTest
Data"
,
parser
.
add_option
(
"--input
EvaluationData"
,
dest
=
"inputEvaluation
Data"
,
help
=
"File to read test data"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--input
TestClasses"
,
dest
=
"inputTest
Classes"
,
parser
.
add_option
(
"--input
EvaluationClasses"
,
dest
=
"inputEvaluation
Classes"
,
help
=
"File to read test true classes"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--outputPath"
,
dest
=
"outputPath"
,
help
=
"Path to place output files"
,
metavar
=
"PATH"
)
...
...
@@ -73,8 +63,8 @@ if __name__ == "__main__":
print
(
"Path to read input files: "
+
str
(
options
.
inputPath
))
print
(
"File to read training data: "
+
str
(
options
.
inputTrainingData
))
print
(
"File to read training true classes: "
+
str
(
options
.
inputTrainingClasses
))
print
(
"File to read
test data: "
+
str
(
options
.
inputTest
Data
))
print
(
"File to read
test true classes: "
+
str
(
options
.
inputTest
Classes
))
print
(
"File to read
evaluation data: "
+
str
(
options
.
inputEvaluation
Data
))
print
(
"File to read
evaluation true classes: "
+
str
(
options
.
inputEvaluation
Classes
))
print
(
"Path to place output files: "
+
str
(
options
.
outputPath
))
print
(
"File to write evaluation report: "
+
str
(
options
.
outputFile
))
print
(
"Classifier: "
+
str
(
options
.
outputFile
))
...
...
@@ -82,24 +72,24 @@ if __name__ == "__main__":
# Start time
t0
=
time
()
print
(
" Reading training and
test
data and true classes..."
)
print
(
" Reading training and
evaluation
data and true classes..."
)
trueTrainingClasses
=
[]
true
Test
Classes
=
[]
true
Evaluation
Classes
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
inputTrainingClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
classFile
:
for
line
in
classFile
:
line
=
line
.
strip
(
'
\r\n
'
)
trueTrainingClasses
.
append
(
line
)
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
input
Test
Classes
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
input
Evaluation
Classes
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
classFile
:
for
line
in
classFile
:
line
=
line
.
strip
(
'
\r\n
'
)
true
Test
Classes
.
append
(
line
)
# print(true
Test
Classes)
true
Evaluation
Classes
.
append
(
line
)
# print(true
Evaluation
Classes)
dataTraining
=
[]
data
Test
=
[]
data
Evaluation
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
inputTrainingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
dataFile
:
for
line
in
dataFile
:
...
...
@@ -112,7 +102,7 @@ if __name__ == "__main__":
dataTraining
.
append
(
listFloat
)
print
(
dataTraining
)
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
input
Test
Data
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
input
Evaluation
Data
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
dataFile
:
for
line
in
dataFile
:
listTemp
=
[]
...
...
@@ -121,21 +111,21 @@ if __name__ == "__main__":
listTemp
=
line
.
split
(
'
\t
'
)
for
elem
in
listTemp
:
listFloat
.
append
(
float
(
elem
))
data
Test
.
append
(
listFloat
)
print
(
data
Test
)
data
Evaluation
.
append
(
listFloat
)
print
(
data
Evaluation
)
print
(
" Reading data and true classes done!"
)
if
options
.
classifier
==
"MultinomialNB"
:
classifier
=
MultinomialNB
()
elif
options
.
classifier
==
"SVM"
:
pass
elif
options
.
classifier
==
"
RandomForest
"
:
classifier
=
RandomForest
Classifier
()
classifier
=
SVC
()
elif
options
.
classifier
==
"
DecisionTree
"
:
classifier
=
DecisionTree
Classifier
()
print
(
" Training..."
)
classifier
.
fit
(
dataTraining
,
trueTrainingClasses
)
print
(
" Prediction..."
)
y_pred
=
classifier
.
predict
(
data
Test
)
y_pred
=
classifier
.
predict
(
data
Evaluation
)
print
(
" Training and predition done!"
)
# for i in range(len(trueClasses)):
...
...
@@ -145,18 +135,18 @@ if __name__ == "__main__":
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
options
.
outputFile
),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
oFile
.
write
(
'********** EVALUATION REPORT **********
\n
'
)
oFile
.
write
(
'Classifier: {}
\n
'
.
format
(
options
.
classifier
))
oFile
.
write
(
'Accuracy: {}
\n
'
.
format
(
accuracy_score
(
true
Test
Classes
,
y_pred
)))
oFile
.
write
(
'Precision: {}
\n
'
.
format
(
precision_score
(
true
Test
Classes
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'Recall: {}
\n
'
.
format
(
recall_score
(
true
Test
Classes
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'F-score: {}
\n
'
.
format
(
f1_score
(
true
Test
Classes
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'Accuracy: {}
\n
'
.
format
(
accuracy_score
(
true
Evaluation
Classes
,
y_pred
)))
oFile
.
write
(
'Precision: {}
\n
'
.
format
(
precision_score
(
true
Evaluation
Classes
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'Recall: {}
\n
'
.
format
(
recall_score
(
true
Evaluation
Classes
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'F-score: {}
\n
'
.
format
(
f1_score
(
true
Evaluation
Classes
,
y_pred
,
average
=
'weighted'
)))
# oFile.write('{}\t{}\t{}\t{}\n'.format(accuracy_score(trueClasses, y_pred),
# precision_score(trueClasses, y_pred, average='weighted'),
# recall_score(trueClasses, y_pred, average='weighted'),
# f1_score(trueClasses, y_pred, average='weighted')))
oFile
.
write
(
'Confusion matrix:
\n
'
)
oFile
.
write
(
str
(
confusion_matrix
(
true
Test
Classes
,
y_pred
))
+
'
\n
'
)
oFile
.
write
(
str
(
confusion_matrix
(
true
Evaluation
Classes
,
y_pred
))
+
'
\n
'
)
oFile
.
write
(
'Classification report:
\n
'
)
oFile
.
write
(
classification_report
(
true
Test
Classes
,
y_pred
)
+
'
\n
'
)
oFile
.
write
(
classification_report
(
true
Evaluation
Classes
,
y_pred
)
+
'
\n
'
)
print
(
" Saving test report done!"
)
print
(
"Training and test done in:
%
fs"
%
(
time
()
-
t0
))
...
...
Please
register
or
login
to post a comment