Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
deep-learning-workshop
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-05-08 14:42:57 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
ab0e732639cb3824627a62d5239eab62a023b821
ab0e7326
1 parent
e3482dca
Deep Learning Workshop
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
5 additions
and
2 deletions
data-sets/get-hga-training-test-py27-v1.py
data-sets/get-hga-training-test-py27-v2.py
data-sets/get-hga-training-test-py27-v1.py
View file @
ab0e732
...
...
@@ -94,6 +94,7 @@ if __name__ == "__main__":
# Fill sequence with X char to get max length
# One-hot-encoding of sequences
sequences_adjust
=
[]
for
sequence
,
label
in
zip
(
sequences
,
labels
):
if
len
(
sequence
)
<
max_length
:
# print("sequence: {}".format(sequence))
...
...
@@ -102,6 +103,7 @@ if __name__ == "__main__":
# print("sequence_adjust: {}".format(sequence_adjust))
else
:
sequence_adjust
=
sequence
+
'ACGTX'
sequences_adjust
.
append
(
sequence_adjust
)
print
(
"Length sequence_adjust: {}"
.
format
(
len
(
sequence_adjust
)))
integer_encoded
=
integer_encoder
.
fit_transform
(
list
(
sequence_adjust
))
print
(
"integer_encoded.classes_: {}"
.
format
(
integer_encoder
.
classes_
))
...
...
@@ -115,7 +117,8 @@ if __name__ == "__main__":
np
.
set_printoptions
(
threshold
=
40
)
input_features
=
np
.
stack
(
input_features
)
print
(
"Example sequence
\n
-----------------------"
)
print
(
'DNA Sequence #1:
\n
'
,
sequences
[
0
][:
10
],
'...'
,
sequences
[
0
][
-
10
:])
# print('DNA Sequence #1:\n', sequences[0][:10], '...', sequences[0][-10:])
print
(
'DNA Sequence #1:
\n
'
,
sequences_adjust
[
0
][:
10
],
'...'
,
sequences_adjust
[
0
][
-
10
:])
print
(
'One hot encoding of Sequence #1:
\n
'
,
input_features
[
0
]
.
T
)
# One-hot-encoding of labels
...
...
data-sets/get-hga-training-test-py27-v2.py
View file @
ab0e732
...
...
@@ -24,7 +24,7 @@
# --outputTraining hga-sequences-training.txt
# --outputTest hga-sequences-test.txt
# --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-
toy
.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-
1000
.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
import
argparse
import
pandas
as
pd
...
...
Please
register
or
login
to post a comment