Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
deep-learning-workshop
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-05-08 18:00:44 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
7a86d0757336936dcd51085fc006c90d192e51a4
7a86d075
1 parent
9f82e896
Deep Learning Workshop
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
16 additions
and
2 deletions
data-sets/get-hga-training-test-py27-v1.py
data-sets/get-hga-training-test-py27-v1.py
View file @
7a86d07
...
...
@@ -77,18 +77,32 @@ if __name__ == "__main__":
sequences
=
df_filtered
[
'sequence'
]
labels
=
df_filtered
[
'label'
]
count_exon
=
0
count_utr
=
0
total_exon_length
=
0
total_utr_length
=
0
max_exon_length
=
0
max_utr_length
=
0
# Getting the max length of sequences
for
sequence
,
label
in
zip
(
sequences
,
labels
):
if
label
==
"exon"
:
count_exon
+=
1
total_exon_length
+=
total_exon_length
if
len
(
sequence
)
>
max_exon_length
:
max_exon_length
=
len
(
sequence
)
elif
label
==
"utr"
:
count_utr
+=
1
total_utr_length
+=
total_utr_length
if
len
(
sequence
)
>
max_utr_length
:
max_utr_length
=
len
(
sequence
)
print
(
"Max exon length: {}"
.
format
(
max_exon_length
))
print
(
"Max utr length: {}"
.
format
(
max_utr_length
))
print
(
"Count exon: {}"
.
format
(
count_exon
))
print
(
"Count utr: {}"
.
format
(
count_utr
))
print
(
"Total exon length: {}"
.
format
(
total_exon_length
))
print
(
"Total utr length: {}"
.
format
(
total_utr_length
))
quit
()
if
max_exon_length
>
max_utr_length
:
max_length
=
max_exon_length
...
...
@@ -108,9 +122,9 @@ if __name__ == "__main__":
else
:
sequence_adjust
=
sequence
+
'ACGTX'
sequences_adjust
.
append
(
sequence_adjust
)
print
(
"Length sequence_adjust: {}"
.
format
(
len
(
sequence_adjust
)))
#
print("Length sequence_adjust: {}".format(len(sequence_adjust)))
integer_encoded
=
integer_encoder
.
fit_transform
(
list
(
sequence_adjust
))
print
(
"integer_encoded.classes_: {}"
.
format
(
integer_encoder
.
classes_
))
#
print("integer_encoded.classes_: {}".format(integer_encoder.classes_))
integer_encoded
=
np
.
array
(
integer_encoded
)
.
reshape
(
-
1
,
1
)
# print("integer_encoded: {}".format(integer_encoded))
one_hot_encoded
=
one_hot_encoder
.
fit_transform
(
integer_encoded
)
...
...
Please
register
or
login
to post a comment