Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-09-20 01:49:17 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
1788888a0381484c0346e10951af1dbdb29869ba
1788888a
1 parent
9c8914a1
Training, crossvalidation and testing binding thrombin dataset
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
43 additions
and
7 deletions
clasificacion-automatica/binding-thrombin-dataset/training-crossvalidation-testing-binding-thrombin.py
clasificacion-automatica/binding-thrombin-dataset/training-crossvalidation-testing-binding-thrombin.py
View file @
1788888
...
...
@@ -5,11 +5,13 @@ from time import time
import
argparse
from
sklearn.naive_bayes
import
BernoulliNB
from
sklearn.svm
import
SVC
from
sklearn.neighbors
import
NearestCentroid
from
sklearn.neighbors
import
KNeighborsClassifier
from
sklearn.metrics
import
accuracy_score
,
precision_score
,
recall_score
,
f1_score
,
confusion_matrix
,
\
classification_report
from
sklearn.externals
import
joblib
from
sklearn
import
model_selection
from
scipy.sparse
import
csr_matrix
import
scipy
__author__
=
'CMendezC'
...
...
@@ -47,7 +49,7 @@ __author__ = 'CMendezC'
# --kernel linear
# source activate python3
# python training-testing-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/models --outputModelFile SVM-
model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/reports --outputReportFile SVM.txt --classifier SVM
# python training-testing-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/models --outputModelFile SVM-
linear-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/reports --outputReportFile SVM-linear.txt --classifier SVM --kernel linear
###########################################################
# MAIN PROGRAM #
...
...
@@ -77,6 +79,9 @@ if __name__ == "__main__":
choices
=
(
'BernoulliNB'
,
'SVM'
,
'NearestCentroid'
),
default
=
'SVM'
)
parser
.
add_argument
(
"--saveData"
,
dest
=
"saveData"
,
action
=
'store_true'
,
help
=
"Save matrices"
)
parser
.
add_argument
(
"--kernel"
,
dest
=
"kernel"
,
help
=
"Kernel SVM"
,
metavar
=
"NAME"
,
choices
=
(
'linear'
,
'rbf'
,
'poly'
),
default
=
'linear'
)
args
=
parser
.
parse_args
()
...
...
@@ -92,6 +97,7 @@ if __name__ == "__main__":
print
(
"File to place evaluation report: "
+
str
(
args
.
outputReportFile
))
print
(
"Classifier: "
+
str
(
args
.
classifier
))
print
(
"Save matrices: "
+
str
(
args
.
saveData
))
print
(
"Kernel: "
+
str
(
args
.
kernel
))
# Start time
t0
=
time
()
...
...
@@ -157,15 +163,45 @@ if __name__ == "__main__":
print
(
" Number of testing class I: {}"
.
format
(
y_test
.
count
(
'I'
)))
print
(
" Shape of testing matrix: {}"
.
format
(
X_test
.
shape
))
if
args
.
classifier
==
"BernoulliNB"
:
classifier
=
BernoulliNB
()
elif
args
.
classifier
==
"SVM"
:
jobs
=
-
1
paramGrid
=
[]
nIter
=
20
crossV
=
10
print
(
"Defining randomized grid search..."
)
if
args
.
classifier
==
'SVM'
:
# SVM
classifier
=
SVC
()
elif
args
.
classifier
==
"NearestCentroid"
:
classifier
=
NearestCentroid
()
if
args
.
kernel
==
'rbf'
:
paramGrid
=
{
'C'
:
scipy
.
stats
.
expon
(
scale
=
100
),
'gamma'
:
scipy
.
stats
.
expon
(
scale
=.
1
),
'kernel'
:
[
'rbf'
],
'class_weight'
:
[
'balanced'
,
None
]}
elif
args
.
kernel
==
'linear'
:
paramGrid
=
{
'C'
:
scipy
.
stats
.
expon
(
scale
=
100
),
'kernel'
:
[
'linear'
],
'class_weight'
:
[
'balanced'
,
None
]}
elif
args
.
kernel
==
'poly'
:
paramGrid
=
{
'C'
:
scipy
.
stats
.
expon
(
scale
=
100
),
'gamma'
:
scipy
.
stats
.
expon
(
scale
=.
1
),
'degree'
:
[
2
,
3
],
'kernel'
:
[
'poly'
],
'class_weight'
:
[
'balanced'
,
None
]}
myClassifier
=
model_selection
.
RandomizedSearchCV
(
classifier
,
paramGrid
,
n_iter
=
nIter
,
cv
=
crossV
,
n_jobs
=
jobs
,
verbose
=
3
)
elif
args
.
classifier
==
'BernoulliNB'
:
# BernoulliNB
classifier
=
BernoulliNB
()
paramGrid
=
{
'alpha'
:
scipy
.
stats
.
expon
(
scale
=
1.0
)}
myClassifier
=
model_selection
.
RandomizedSearchCV
(
classifier
,
paramGrid
,
n_iter
=
nIter
,
cv
=
crossV
,
n_jobs
=
jobs
,
verbose
=
3
)
elif
args
.
classifier
==
'kNN'
:
# kNN
classifier
=
KNeighborsClassifier
()
paramGrid
=
{
'n_neighbors '
:
[
3
,
5
,
7
]}
myClassifier
=
model_selection
.
RandomizedSearchCV
(
classifier
,
paramGrid
,
n_iter
=
nIter
,
cv
=
crossV
,
n_jobs
=
jobs
,
verbose
=
3
)
else
:
print
(
"Bad classifier"
)
exit
()
print
(
" Done!"
)
print
(
"Training..."
)
classifier
.
fit
(
X_train
,
y_train
)
...
...
Please
register
or
login
to post a comment