Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
deep-learning-workshop
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2019-05-08 18:08:29 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
4d536d405a43ac0f5ddefc59712ab3dd17846e2a
4d536d40
1 parent
dc0e4cbb
Deep Learning Workshop
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
1 deletions
data-sets/get-hga-sequences-py3.py
data-sets/get-hga-sequences-py3.py
View file @
4d536d4
...
...
@@ -86,7 +86,8 @@ if __name__ == "__main__":
for
row
in
reader
:
# print(row)
filename
=
os
.
path
.
join
(
args
.
fastaPath
,
"Homo_sapiens.GRCh38.dna.chromosome.{}.fa"
.
format
(
row
[
'seqname'
]))
# We use only
if
row
[
'feature'
]
in
[
"exon"
,
"five_prime_utr"
,
"three_prime_utr"
]:
# We use only exon, five_prime_utr, and three_prime_utr
sequence
=
get_sequence
(
filename
,
int
(
row
[
'start'
]),
int
(
row
[
'end'
]))
# Features in HGA:
# exon
...
...
@@ -115,6 +116,7 @@ if __name__ == "__main__":
print
(
"{} rows processed."
.
format
(
i
))
if
i
==
10000
:
break
print
(
"Count exons {} and utr {}"
.
format
(
total_exon
,
total_utr
))
print
(
"Length media exons {} and utr {}"
.
format
(
length_total_exon
/
total_exon
,
length_total_utr
/
total_utr
))
with
open
(
os
.
path
.
join
(
args
.
outputPath
,
args
.
outputFile
),
mode
=
"w"
)
as
oFile
:
oFile
.
write
(
"seqname
\t
start
\t
end
\t
length
\t
sequence
\t
label
\n
"
)
...
...
Please
register
or
login
to post a comment