Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

......@@ -86,7 +86,8 @@ if __name__ == "__main__":
for row in reader:
# print(row)
filename = os.path.join(args.fastaPath, "Homo_sapiens.GRCh38.dna.chromosome.{}.fa".format(row['seqname']))
# We use only
if row['feature'] in ["exon", "five_prime_utr", "three_prime_utr"]:
# We use only exon, five_prime_utr, and three_prime_utr
sequence = get_sequence(filename, int(row['start']), int(row['end']))
# Features in HGA:
# exon
......@@ -115,6 +116,7 @@ if __name__ == "__main__":
print("{} rows processed.".format(i))
if i == 10000:
break
print("Count exons {} and utr {}".format(total_exon, total_utr))
print("Length media exons {} and utr {}".format(length_total_exon/total_exon, length_total_utr/total_utr))
with open(os.path.join(args.outputPath, args.outputFile), mode="w") as oFile:
oFile.write("seqname\tstart\tend\tlength\tsequence\tlabel\n")
......