Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-09-20 02:18:08 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
1cbaf97199237aa4f72ef1debea9d041a94fffbf
1cbaf971
1 parent
c3c3b7ae
Training, crossvalidation and testing binding thrombin dataset
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
2 deletions
clasificacion-automatica/binding-thrombin-dataset/training-crossvalidation-testing-binding-thrombin.py
clasificacion-automatica/binding-thrombin-dataset/training-crossvalidation-testing-binding-thrombin.py
View file @
1cbaf97
...
...
@@ -10,7 +10,9 @@ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_sc
classification_report
from
sklearn.externals
import
joblib
from
sklearn
import
model_selection
from
sklearn.feature_selection
import
SelectKBest
,
chi2
from
scipy.sparse
import
csr_matrix
from
sklearn.decomposition
import
TruncatedSVD
import
scipy
__author__
=
'CMendezC'
...
...
@@ -29,13 +31,14 @@ __author__ = 'CMendezC'
# 9) --classifier Classifier: BernoulliNB, SVM, NearestCentroid.
# 10) --saveData Save matrices
# 11) --kernel Kernel
# 12) --reduction Feature selection or dimensionality reduction
# Ouput:
# 1) Classification model and evaluation report.
# Execution:
# python training-testing-binding-thrombin.py
# python training-
crossvalidation-
testing-binding-thrombin.py
# --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset
# --inputTrainingData thrombin.data
# --inputTestingData Thrombin.testset
...
...
@@ -47,9 +50,10 @@ __author__ = 'CMendezC'
# --classifier SVM
# --saveData
# --kernel linear
# --reduction SVD200
# source activate python3
# python training-
testing-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/models --outputModelFile SVM-linear-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/reports --outputReportFile SVM-linear.txt --classifier SVM --kernel linear
# python training-
crossvalidation-testing-binding-thrombin.py --inputPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset --inputTrainingData thrombin.data --inputTestingData Thrombin.testset --inputTestingClasses Thrombin.testset.class --outputModelPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/models --outputModelFile SVM-linear-model.mod --outputReportPath /home/compu2/bionlp/lcg-bioinfoI-bionlp/clasificacion-automatica/binding-thrombin-dataset/reports --outputReportFile SVM-linear.txt --classifier SVM --kernel linear --reduction SVD200
###########################################################
# MAIN PROGRAM #
...
...
@@ -82,6 +86,9 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--kernel"
,
dest
=
"kernel"
,
help
=
"Kernel SVM"
,
metavar
=
"NAME"
,
choices
=
(
'linear'
,
'rbf'
,
'poly'
),
default
=
'linear'
)
parser
.
add_argument
(
"--reduction"
,
dest
=
"reduction"
,
help
=
"Feature selection or dimensionality reduction"
,
metavar
=
"NAME"
,
choices
=
(
'SVD200'
,
'SVD300'
,
'CHI250'
,
'CHI2100'
),
default
=
'SVD200'
)
args
=
parser
.
parse_args
()
...
...
@@ -98,6 +105,7 @@ if __name__ == "__main__":
print
(
"Classifier: "
+
str
(
args
.
classifier
))
print
(
"Save matrices: "
+
str
(
args
.
saveData
))
print
(
"Kernel: "
+
str
(
args
.
kernel
))
print
(
"Reduction: "
+
str
(
args
.
reduction
))
# Start time
t0
=
time
()
...
...
@@ -163,6 +171,24 @@ if __name__ == "__main__":
print
(
" Number of testing class I: {}"
.
format
(
y_test
.
count
(
'I'
)))
print
(
" Shape of testing matrix: {}"
.
format
(
X_test
.
shape
))
# Feature selection and dimensional reduction
if
args
.
reduction
is
not
None
:
print
(
'Performing dimensionality reduction or feature selection...'
,
args
.
reduction
)
if
args
.
reduction
==
'SVD200'
:
reduc
=
TruncatedSVD
(
n_components
=
200
,
random_state
=
42
)
X_train
=
reduc
.
fit_transform
(
X_train
)
if
args
.
reduction
==
'SVD300'
:
reduc
=
TruncatedSVD
(
n_components
=
300
,
random_state
=
42
)
X_train
=
reduc
.
fit_transform
(
X_train
)
elif
args
.
reduction
==
'CHI250'
:
reduc
=
SelectKBest
(
chi2
,
k
=
50
)
X_train
=
reduc
.
fit_transform
(
X_train
,
y_train
)
elif
args
.
reduction
==
'CHI2100'
:
reduc
=
SelectKBest
(
chi2
,
k
=
100
)
X_train
=
reduc
.
fit_transform
(
X_train
,
y_train
)
print
(
" Done!"
)
print
(
' New shape of training matrix: '
,
X_train
.
shape
)
jobs
=
-
1
paramGrid
=
[]
nIter
=
20
...
...
Please
register
or
login
to post a comment