Showing
1 changed file
with
16 additions
and
2 deletions
... | @@ -77,18 +77,32 @@ if __name__ == "__main__": | ... | @@ -77,18 +77,32 @@ if __name__ == "__main__": |
77 | sequences = df_filtered['sequence'] | 77 | sequences = df_filtered['sequence'] |
78 | labels = df_filtered['label'] | 78 | labels = df_filtered['label'] |
79 | 79 | ||
80 | + count_exon = 0 | ||
81 | + count_utr = 0 | ||
82 | + total_exon_length = 0 | ||
83 | + total_utr_length = 0 | ||
80 | max_exon_length = 0 | 84 | max_exon_length = 0 |
81 | max_utr_length = 0 | 85 | max_utr_length = 0 |
82 | # Getting the max length of sequences | 86 | # Getting the max length of sequences |
83 | for sequence, label in zip(sequences, labels): | 87 | for sequence, label in zip(sequences, labels): |
84 | if label == "exon": | 88 | if label == "exon": |
89 | + count_exon += 1 | ||
90 | + total_exon_length += total_exon_length | ||
85 | if len(sequence) > max_exon_length: | 91 | if len(sequence) > max_exon_length: |
86 | max_exon_length = len(sequence) | 92 | max_exon_length = len(sequence) |
87 | elif label == "utr": | 93 | elif label == "utr": |
94 | + count_utr += 1 | ||
95 | + total_utr_length += total_utr_length | ||
88 | if len(sequence) > max_utr_length: | 96 | if len(sequence) > max_utr_length: |
89 | max_utr_length = len(sequence) | 97 | max_utr_length = len(sequence) |
90 | print("Max exon length: {}".format(max_exon_length)) | 98 | print("Max exon length: {}".format(max_exon_length)) |
91 | print("Max utr length: {}".format(max_utr_length)) | 99 | print("Max utr length: {}".format(max_utr_length)) |
100 | + print("Count exon: {}".format(count_exon)) | ||
101 | + print("Count utr: {}".format(count_utr)) | ||
102 | + print("Total exon length: {}".format(total_exon_length)) | ||
103 | + print("Total utr length: {}".format(total_utr_length)) | ||
104 | + | ||
105 | + quit() | ||
92 | 106 | ||
93 | if max_exon_length > max_utr_length: | 107 | if max_exon_length > max_utr_length: |
94 | max_length = max_exon_length | 108 | max_length = max_exon_length |
... | @@ -108,9 +122,9 @@ if __name__ == "__main__": | ... | @@ -108,9 +122,9 @@ if __name__ == "__main__": |
108 | else: | 122 | else: |
109 | sequence_adjust = sequence + 'ACGTX' | 123 | sequence_adjust = sequence + 'ACGTX' |
110 | sequences_adjust.append(sequence_adjust) | 124 | sequences_adjust.append(sequence_adjust) |
111 | - print("Length sequence_adjust: {}".format(len(sequence_adjust))) | 125 | + # print("Length sequence_adjust: {}".format(len(sequence_adjust))) |
112 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) | 126 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) |
113 | - print("integer_encoded.classes_: {}".format(integer_encoder.classes_)) | 127 | + # print("integer_encoded.classes_: {}".format(integer_encoder.classes_)) |
114 | integer_encoded = np.array(integer_encoded).reshape(-1, 1) | 128 | integer_encoded = np.array(integer_encoded).reshape(-1, 1) |
115 | # print("integer_encoded: {}".format(integer_encoded)) | 129 | # print("integer_encoded: {}".format(integer_encoded)) |
116 | one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded) | 130 | one_hot_encoded = one_hot_encoder.fit_transform(integer_encoded) | ... | ... |
-
Please register or login to post a comment