Showing
1 changed file
with
7 additions
and
6 deletions
... | @@ -86,15 +86,16 @@ if __name__ == "__main__": | ... | @@ -86,15 +86,16 @@ if __name__ == "__main__": |
86 | print("Max exon length: {}".format(max_exon_length)) | 86 | print("Max exon length: {}".format(max_exon_length)) |
87 | print("Max utr length: {}".format(max_utr_length)) | 87 | print("Max utr length: {}".format(max_utr_length)) |
88 | 88 | ||
89 | + if max_exon_length > max_utr_length: | ||
90 | + max_length = max_exon_length | ||
91 | + else: | ||
92 | + max_length = max_utr_length | ||
93 | + | ||
89 | # Fill sequence with X char to get max length | 94 | # Fill sequence with X char to get max length |
90 | # One-hot-encoding of sequences | 95 | # One-hot-encoding of sequences |
91 | for sequence, label in zip(sequences, labels): | 96 | for sequence, label in zip(sequences, labels): |
92 | - if label == "exon": | 97 | + if len(sequence) < max_length: |
93 | - if len(sequence) < max_exon_length: | 98 | + sequence_adjust = sequence.ljust(max_length + len(sequence), 'X') |
94 | - sequence_adjust = sequence.ljust(max_exon_length + len(sequence), 'X') | ||
95 | - elif label == "utr": | ||
96 | - if len(sequence) < max_utr_length: | ||
97 | - sequence_adjust = sequence.ljust(max_utr_length + len(sequence), 'X') | ||
98 | print("Length sequence_adjust: {}".format(len(sequence_adjust))) | 99 | print("Length sequence_adjust: {}".format(len(sequence_adjust))) |
99 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) | 100 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) |
100 | integer_encoded = np.array(integer_encoded).reshape(-1, 1) | 101 | integer_encoded = np.array(integer_encoded).reshape(-1, 1) | ... | ... |
-
Please register or login to post a comment