Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-09-28 02:43:53 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
01169cd8127badfb299f2428b1b5cdede02edd10
01169cd8
1 parent
32da8634
Training, crossvalidation and testing dataset
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
36 deletions
clasificacion-automatica/binding-thrombin-dataset/imb-training-crossvalidation-testing-binding-thrombin.py
clasificacion-automatica/binding-thrombin-dataset/imb-training-crossvalidation-testing-binding-thrombin.py
View file @
01169cd
...
...
@@ -157,38 +157,6 @@ if __name__ == "__main__":
print
(
" Number of training class I: {}"
.
format
(
y_train
.
count
(
'I'
)))
print
(
" Shape of training matrix: {}"
.
format
(
X_train
.
shape
))
print
(
"Reading testing data and true classes..."
)
X_test
=
None
if
args
.
saveData
:
y_test
=
[]
testingData
=
[]
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
args
.
inputTestingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
listLine
=
line
.
split
(
','
)
testingData
.
append
(
listLine
[
1
:])
X_test
=
csr_matrix
(
testingData
,
dtype
=
'double'
)
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
args
.
inputTestingClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
y_test
.
append
(
line
)
print
(
" Saving matrix and classes..."
)
joblib
.
dump
(
X_test
,
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingData
+
'.jlb'
))
joblib
.
dump
(
y_test
,
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingClasses
+
'.class.jlb'
))
print
(
" Done!"
)
else
:
print
(
" Loading matrix and classes..."
)
X_test
=
joblib
.
load
(
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingData
+
'.jlb'
))
y_test
=
joblib
.
load
(
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingClasses
+
'.class.jlb'
))
print
(
" Done!"
)
print
(
" Number of testing classes: {}"
.
format
(
len
(
y_test
)))
print
(
" Number of testing class A: {}"
.
format
(
y_test
.
count
(
'A'
)))
print
(
" Number of testing class I: {}"
.
format
(
y_test
.
count
(
'I'
)))
print
(
" Shape of testing matrix: {}"
.
format
(
X_test
.
shape
))
# Feature selection and dimensional reduction
if
args
.
reduction
is
not
None
:
print
(
'Performing dimensionality reduction or feature selection...'
,
args
.
reduction
)
...
...
@@ -252,12 +220,44 @@ if __name__ == "__main__":
X_train
,
y_train
=
sm
.
fit_sample
(
X_train
,
y_train
)
print
(
" After transformtion with {}"
.
format
(
args
.
imbalanced
))
print
(
" Number of t
esting classes: {}"
.
format
(
len
(
y_test
)))
print
(
" Number of t
esting class A: {}"
.
format
(
y_test
.
count
(
'A'
)))
print
(
" Number of t
esting class I: {}"
.
format
(
y_test
.
count
(
'I'
)))
print
(
" Shape of t
esting matrix: {}"
.
format
(
X_test
.
shape
))
print
(
" Number of t
raining classes: {}"
.
format
(
len
(
y_train
)))
print
(
" Number of t
raining class A: {}"
.
format
(
y_train
.
count
(
'A'
)))
print
(
" Number of t
raining class I: {}"
.
format
(
y_train
.
count
(
'I'
)))
print
(
" Shape of t
raining matrix: {}"
.
format
(
X_train
.
shape
))
print
(
" Data transformation done in :
%
fs"
%
(
time
()
-
t1
))
print
(
"Reading testing data and true classes..."
)
X_test
=
None
if
args
.
saveData
:
y_test
=
[]
testingData
=
[]
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
args
.
inputTestingData
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
listLine
=
line
.
split
(
','
)
testingData
.
append
(
listLine
[
1
:])
X_test
=
csr_matrix
(
testingData
,
dtype
=
'double'
)
with
open
(
os
.
path
.
join
(
args
.
inputPath
,
args
.
inputTestingClasses
),
encoding
=
'utf8'
,
mode
=
'r'
)
\
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\r\n
'
)
y_test
.
append
(
line
)
print
(
" Saving matrix and classes..."
)
joblib
.
dump
(
X_test
,
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingData
+
'.jlb'
))
joblib
.
dump
(
y_test
,
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingClasses
+
'.class.jlb'
))
print
(
" Done!"
)
else
:
print
(
" Loading matrix and classes..."
)
X_test
=
joblib
.
load
(
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingData
+
'.jlb'
))
y_test
=
joblib
.
load
(
os
.
path
.
join
(
args
.
outputModelPath
,
args
.
inputTestingClasses
+
'.class.jlb'
))
print
(
" Done!"
)
print
(
" Number of testing classes: {}"
.
format
(
len
(
y_test
)))
print
(
" Number of testing class A: {}"
.
format
(
y_test
.
count
(
'A'
)))
print
(
" Number of testing class I: {}"
.
format
(
y_test
.
count
(
'I'
)))
print
(
" Shape of testing matrix: {}"
.
format
(
X_test
.
shape
))
jobs
=
-
1
paramGrid
=
[]
nIter
=
20
...
...
Please
register
or
login
to post a comment