Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

......@@ -86,18 +86,17 @@ if __name__ == "__main__":
print("Max exon length: {}".format(max_exon_length))
print("Max utr length: {}".format(max_utr_length))
quit()
# Fill sequence with X char to get max length
# One-hot-encoding of sequences
for sequence, label in zip(sequences, labels):
if label == "exon":
if len(sequence) < max_exon_length:
sequence.ljust(max_exon_length + len(sequence), 'X')
sequence_adjust = sequence.ljust(max_exon_length + len(sequence), 'X')
elif label == "utr":
if len(sequence) < max_utr_length:
sequence.ljust(max_utr_length + len(sequence), 'X')
integer_encoded = integer_encoder.fit_transform(list(sequence))
sequence_adjust = sequence.ljust(max_utr_length + len(sequence), 'X')
print("Length sequence_adjust: {}".format(len(sequence_adjust)))
integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
integer_encoded = np.array(integer_encoded).reshape(-1, 1)
one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
input_features.append(one_hot_encoded.toarray())
......