Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

......@@ -77,18 +77,32 @@ if __name__ == "__main__":
sequences = df_filtered['sequence']
labels = df_filtered['label']
count_exon = 0
count_utr = 0
total_exon_length = 0
total_utr_length = 0
max_exon_length = 0
max_utr_length = 0
# Getting the max length of sequences
for sequence, label in zip(sequences, labels):
if label == "exon":
count_exon += 1
total_exon_length += total_exon_length
if len(sequence) > max_exon_length:
max_exon_length = len(sequence)
elif label == "utr":
count_utr += 1
total_utr_length += total_utr_length
if len(sequence) > max_utr_length:
max_utr_length = len(sequence)
print("Max exon length: {}".format(max_exon_length))
print("Max utr length: {}".format(max_utr_length))
print("Count exon: {}".format(count_exon))
print("Count utr: {}".format(count_utr))
print("Total exon length: {}".format(total_exon_length))
print("Total utr length: {}".format(total_utr_length))
quit()
if max_exon_length > max_utr_length:
max_length = max_exon_length
......@@ -108,9 +122,9 @@ if __name__ == "__main__":
else:
sequence_adjust = sequence + 'ACGTX'
sequences_adjust.append(sequence_adjust)
print("Length sequence_adjust: {}".format(len(sequence_adjust)))
# print("Length sequence_adjust: {}".format(len(sequence_adjust)))
integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
print("integer_encoded.classes_: {}".format(integer_encoder.classes_))
# print("integer_encoded.classes_: {}".format(integer_encoder.classes_))
integer_encoded = np.array(integer_encoded).reshape(-1, 1)
# print("integer_encoded: {}".format(integer_encoded))
one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
......