Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

...@@ -77,18 +77,32 @@ if __name__ == "__main__": ...@@ -77,18 +77,32 @@ if __name__ == "__main__":
77 sequences = df_filtered['sequence'] 77 sequences = df_filtered['sequence']
78 labels = df_filtered['label'] 78 labels = df_filtered['label']
79 79
80 + count_exon = 0
81 + count_utr = 0
82 + total_exon_length = 0
83 + total_utr_length = 0
80 max_exon_length = 0 84 max_exon_length = 0
81 max_utr_length = 0 85 max_utr_length = 0
82 # Getting the max length of sequences 86 # Getting the max length of sequences
83 for sequence, label in zip(sequences, labels): 87 for sequence, label in zip(sequences, labels):
84 if label == "exon": 88 if label == "exon":
89 + count_exon += 1
90 + total_exon_length += total_exon_length
85 if len(sequence) > max_exon_length: 91 if len(sequence) > max_exon_length:
86 max_exon_length = len(sequence) 92 max_exon_length = len(sequence)
87 elif label == "utr": 93 elif label == "utr":
94 + count_utr += 1
95 + total_utr_length += total_utr_length
88 if len(sequence) > max_utr_length: 96 if len(sequence) > max_utr_length:
89 max_utr_length = len(sequence) 97 max_utr_length = len(sequence)
90 print("Max exon length: {}".format(max_exon_length)) 98 print("Max exon length: {}".format(max_exon_length))
91 print("Max utr length: {}".format(max_utr_length)) 99 print("Max utr length: {}".format(max_utr_length))
100 + print("Count exon: {}".format(count_exon))
101 + print("Count utr: {}".format(count_utr))
102 + print("Total exon length: {}".format(total_exon_length))
103 + print("Total utr length: {}".format(total_utr_length))
104 +
105 + quit()
92 106
93 if max_exon_length > max_utr_length: 107 if max_exon_length > max_utr_length:
94 max_length = max_exon_length 108 max_length = max_exon_length
...@@ -108,9 +122,9 @@ if __name__ == "__main__": ...@@ -108,9 +122,9 @@ if __name__ == "__main__":
108 else: 122 else:
109 sequence_adjust = sequence + 'ACGTX' 123 sequence_adjust = sequence + 'ACGTX'
110 sequences_adjust.append(sequence_adjust) 124 sequences_adjust.append(sequence_adjust)
111 - print("Length sequence_adjust: {}".format(len(sequence_adjust))) 125 + # print("Length sequence_adjust: {}".format(len(sequence_adjust)))
112 integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) 126 integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
113 - print("integer_encoded.classes_: {}".format(integer_encoder.classes_)) 127 + # print("integer_encoded.classes_: {}".format(integer_encoder.classes_))
114 integer_encoded = np.array(integer_encoded).reshape(-1, 1) 128 integer_encoded = np.array(integer_encoded).reshape(-1, 1)
115 # print("integer_encoded: {}".format(integer_encoded)) 129 # print("integer_encoded: {}".format(integer_encoded))
116 one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded) 130 one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded)
......