Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

...@@ -86,18 +86,17 @@ if __name__ == "__main__": ...@@ -86,18 +86,17 @@ if __name__ == "__main__":
86 print("Max exon length: {}".format(max_exon_length)) 86 print("Max exon length: {}".format(max_exon_length))
87 print("Max utr length: {}".format(max_utr_length)) 87 print("Max utr length: {}".format(max_utr_length))
88 88
89 - quit()
90 -
91 # Fill sequence with X char to get max length 89 # Fill sequence with X char to get max length
92 # One-hot-encoding of sequences 90 # One-hot-encoding of sequences
93 for sequence, label in zip(sequences, labels): 91 for sequence, label in zip(sequences, labels):
94 if label == "exon": 92 if label == "exon":
95 if len(sequence) < max_exon_length: 93 if len(sequence) < max_exon_length:
96 - sequence.ljust(max_exon_length + len(sequence), 'X') 94 + sequence_adjust = sequence.ljust(max_exon_length + len(sequence), 'X')
97 elif label == "utr": 95 elif label == "utr":
98 if len(sequence) < max_utr_length: 96 if len(sequence) < max_utr_length:
99 - sequence.ljust(max_utr_length + len(sequence), 'X') 97 + sequence_adjust = sequence.ljust(max_utr_length + len(sequence), 'X')
100 - integer_encoded = integer_encoder.fit_transform(list(sequence)) 98 + print("Length sequence_adjust: {}".format(len(sequence_adjust)))
99 + integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
101 integer_encoded = np.array(integer_encoded).reshape(-1, 1) 100 integer_encoded = np.array(integer_encoded).reshape(-1, 1)
102 one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded) 101 one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
103 input_features.append(one_hot_encoded.toarray()) 102 input_features.append(one_hot_encoded.toarray())
......