Showing
1 changed file
with
16 additions
and
1 deletions
... | @@ -80,9 +80,22 @@ if __name__ == "__main__": | ... | @@ -80,9 +80,22 @@ if __name__ == "__main__": |
80 | for row in reader: | 80 | for row in reader: |
81 | # print(row) | 81 | # print(row) |
82 | filename = os.path.join(args.fastaPath, "Homo_sapiens.GRCh38.dna.chromosome.{}.fa".format(row['seqname'])) | 82 | filename = os.path.join(args.fastaPath, "Homo_sapiens.GRCh38.dna.chromosome.{}.fa".format(row['seqname'])) |
83 | + # We use only | ||
83 | sequence = get_sequence(filename, int(row['start']), int(row['end'])) | 84 | sequence = get_sequence(filename, int(row['start']), int(row['end'])) |
84 | - if row['feature'] == args.feature: | 85 | + # Features in HGA: |
86 | + # exon | ||
87 | + # feature | ||
88 | + # five_prime_utr | ||
89 | + # gene | ||
90 | + # Selenocysteine | ||
91 | + # start_codon | ||
92 | + # stop_codon | ||
93 | + # three_prime_utr | ||
94 | + | ||
95 | + if row['feature'] == "exon": | ||
85 | label = row['feature'] | 96 | label = row['feature'] |
97 | + elif row['feature'] in ["five_prime_utr", "three_prime_utr"]: | ||
98 | + label = "utr" | ||
86 | else: | 99 | else: |
87 | label = "other" | 100 | label = "other" |
88 | new_row = "{}\t{}\t{}\t{}\t{}\n".format(row['seqname'], row['start'], row['end'], sequence, label) | 101 | new_row = "{}\t{}\t{}\t{}\t{}\n".format(row['seqname'], row['start'], row['end'], sequence, label) |
... | @@ -90,6 +103,8 @@ if __name__ == "__main__": | ... | @@ -90,6 +103,8 @@ if __name__ == "__main__": |
90 | i += 1 | 103 | i += 1 |
91 | if (i % 100) == 0: | 104 | if (i % 100) == 0: |
92 | print("{} rows processed.".format(i)) | 105 | print("{} rows processed.".format(i)) |
106 | + if i == 10000: | ||
107 | + break | ||
93 | 108 | ||
94 | with open(os.path.join(args.outputPath, args.outputFile), mode="w") as oFile: | 109 | with open(os.path.join(args.outputPath, args.outputFile), mode="w") as oFile: |
95 | oFile.write("seqname\tstart\tend\tsequence\tlabel\n") | 110 | oFile.write("seqname\tstart\tend\tsequence\tlabel\n") | ... | ... |
-
Please register or login to post a comment