Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-09-19 23:25:47 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
698e530858e543bcec0dc94024f6c66682f72cb0
698e5308
1 parent
4d7657b9
Training and testing binding thrombin dataset
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
21 deletions
clasificacion-automatica/binding-thrombin-dataset/training-validation-binding-thrombin.py
clasificacion-automatica/binding-thrombin-dataset/training-validation-binding-thrombin.py
View file @
698e530
...
...
@@ -44,6 +44,7 @@ __author__ = 'CMendezC'
# --classifier SVM
# source activate python3
# python training-validation-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/models --outputModelFile SVM-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/binding-thrombin-dataset/reports --outputReportFile SVM.txt --classifier SVM
###########################################################
# MAIN PROGRAM #
...
...
@@ -72,22 +73,19 @@ if __name__ == "__main__":
help
=
"Classifier"
,
metavar
=
"NAME"
,
choices
=
(
'BernoulliNB'
,
'SVM'
,
'NearestCentroid'
),
default
=
'SVM'
)
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
>
0
:
parser
.
error
(
"None parameters indicated."
)
sys
.
exit
(
1
)
args
=
parser
.
parse_args
()
# Printing parameter values
print
(
'-------------------------------- PARAMETERS --------------------------------'
)
print
(
"Path to read input files: "
+
str
(
option
s
.
inputPath
))
print
(
"File to read training data: "
+
str
(
option
s
.
inputTrainingData
))
print
(
"File to read testing data: "
+
str
(
option
s
.
inputTestingData
))
print
(
"File to read testing classes: "
+
str
(
option
s
.
inputTestingClasses
))
print
(
"Path to place output model: "
+
str
(
option
s
.
outputModelPath
))
print
(
"File to place output model: "
+
str
(
option
s
.
outputModelFile
))
print
(
"Path to place evaluation report: "
+
str
(
option
s
.
outputReportPath
))
print
(
"File to place evaluation report: "
+
str
(
option
s
.
outputReportFile
))
print
(
"Classifier: "
+
str
(
option
s
.
outputFile
))
print
(
"Path to read input files: "
+
str
(
arg
s
.
inputPath
))
print
(
"File to read training data: "
+
str
(
arg
s
.
inputTrainingData
))
print
(
"File to read testing data: "
+
str
(
arg
s
.
inputTestingData
))
print
(
"File to read testing classes: "
+
str
(
arg
s
.
inputTestingClasses
))
print
(
"Path to place output model: "
+
str
(
arg
s
.
outputModelPath
))
print
(
"File to place output model: "
+
str
(
arg
s
.
outputModelFile
))
print
(
"Path to place evaluation report: "
+
str
(
arg
s
.
outputReportPath
))
print
(
"File to place evaluation report: "
+
str
(
arg
s
.
outputReportFile
))
print
(
"Classifier: "
+
str
(
arg
s
.
outputFile
))
# Start time
t0
=
time
()
...
...
@@ -95,7 +93,7 @@ if __name__ == "__main__":
print
(
" Reading training data and true classes..."
)
trainingClasses
=
[]
trainingData
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
option
s
.
inputTrainingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
arg
s
.
inputTrainingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
...
...
@@ -113,14 +111,14 @@ if __name__ == "__main__":
print
(
" Reading testing data and true classes..."
)
testingClasses
=
[]
testingData
=
[]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
option
s
.
inputTestingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
arg
s
.
inputTestingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
listLine
=
line
.
split
(
','
)
testingData
.
append
(
listLine
)
testingMatrix
=
csr_matrix
(
testingData
,
dtype
=
'double'
)
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
option
s
.
inputTestingClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
arg
s
.
inputTestingClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
...
...
@@ -131,11 +129,11 @@ if __name__ == "__main__":
print
(
"Number of testing class I: {}"
.
format
(
trainingClasses
.
count
(
'I'
)))
print
(
"Shape of testing matrix: {}"
.
format
(
testingMatrix
.
shape
))
if
option
s
.
classifier
==
"MultinomialNB"
:
if
arg
s
.
classifier
==
"MultinomialNB"
:
classifier
=
BernoulliNB
()
elif
option
s
.
classifier
==
"SVM"
:
elif
arg
s
.
classifier
==
"SVM"
:
classifier
=
SVC
()
elif
option
s
.
classifier
==
"NearestCentroid"
:
elif
arg
s
.
classifier
==
"NearestCentroid"
:
classifier
=
NearestCentroid
()
print
(
" Training..."
)
...
...
@@ -147,9 +145,9 @@ if __name__ == "__main__":
print
(
" Done!"
)
print
(
" Saving report..."
)
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
option
s
.
outputFile
),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
with
open
(
os
.
path
.
join
(
args
.
outputPath
,
arg
s
.
outputFile
),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
oFile
.
write
(
'********** EVALUATION REPORT **********
\n
'
)
oFile
.
write
(
'Classifier: {}
\n
'
.
format
(
option
s
.
classifier
))
oFile
.
write
(
'Classifier: {}
\n
'
.
format
(
arg
s
.
classifier
))
oFile
.
write
(
'Accuracy: {}
\n
'
.
format
(
accuracy_score
(
testingClasses
,
y_pred
)))
oFile
.
write
(
'Precision: {}
\n
'
.
format
(
precision_score
(
testingClasses
,
y_pred
,
average
=
'weighted'
)))
oFile
.
write
(
'Recall: {}
\n
'
.
format
(
recall_score
(
testingClasses
,
y_pred
,
average
=
'weighted'
)))
...
...
Please
register
or
login
to post a comment