Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

...@@ -86,15 +86,16 @@ if __name__ == "__main__": ...@@ -86,15 +86,16 @@ if __name__ == "__main__":
86 print("Max exon length: {}".format(max_exon_length)) 86 print("Max exon length: {}".format(max_exon_length))
87 print("Max utr length: {}".format(max_utr_length)) 87 print("Max utr length: {}".format(max_utr_length))
88 88
89 + if max_exon_length > max_utr_length:
90 + max_length = max_exon_length
91 + else:
92 + max_length = max_utr_length
93 +
89 # Fill sequence with X char to get max length 94 # Fill sequence with X char to get max length
90 # One-hot-encoding of sequences 95 # One-hot-encoding of sequences
91 for sequence, label in zip(sequences, labels): 96 for sequence, label in zip(sequences, labels):
92 - if label == "exon": 97 + if len(sequence) < max_length:
93 - if len(sequence) < max_exon_length: 98 + sequence_adjust = sequence.ljust(max_length + len(sequence), 'X')
94 - sequence_adjust = sequence.ljust(max_exon_length + len(sequence), 'X')
95 - elif label == "utr":
96 - if len(sequence) < max_utr_length:
97 - sequence_adjust = sequence.ljust(max_utr_length + len(sequence), 'X')
98 print("Length sequence_adjust: {}".format(len(sequence_adjust))) 99 print("Length sequence_adjust: {}".format(len(sequence_adjust)))
99 integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) 100 integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
100 integer_encoded = np.array(integer_encoded).reshape(-1, 1) 101 integer_encoded = np.array(integer_encoded).reshape(-1, 1)
......