Showing
2 changed files
with
5 additions
and
2 deletions
... | @@ -94,6 +94,7 @@ if __name__ == "__main__": | ... | @@ -94,6 +94,7 @@ if __name__ == "__main__": |
94 | 94 | ||
95 | # Fill sequence with X char to get max length | 95 | # Fill sequence with X char to get max length |
96 | # One-hot-encoding of sequences | 96 | # One-hot-encoding of sequences |
97 | + sequences_adjust = [] | ||
97 | for sequence, label in zip(sequences, labels): | 98 | for sequence, label in zip(sequences, labels): |
98 | if len(sequence) < max_length: | 99 | if len(sequence) < max_length: |
99 | # print("sequence: {}".format(sequence)) | 100 | # print("sequence: {}".format(sequence)) |
... | @@ -102,6 +103,7 @@ if __name__ == "__main__": | ... | @@ -102,6 +103,7 @@ if __name__ == "__main__": |
102 | # print("sequence_adjust: {}".format(sequence_adjust)) | 103 | # print("sequence_adjust: {}".format(sequence_adjust)) |
103 | else: | 104 | else: |
104 | sequence_adjust = sequence + 'ACGTX' | 105 | sequence_adjust = sequence + 'ACGTX' |
106 | + sequences_adjust.append(sequence_adjust) | ||
105 | print("Length sequence_adjust: {}".format(len(sequence_adjust))) | 107 | print("Length sequence_adjust: {}".format(len(sequence_adjust))) |
106 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) | 108 | integer_encoded = integer_encoder.fit_transform(list(sequence_adjust)) |
107 | print("integer_encoded.classes_: {}".format(integer_encoder.classes_)) | 109 | print("integer_encoded.classes_: {}".format(integer_encoder.classes_)) |
... | @@ -115,7 +117,8 @@ if __name__ == "__main__": | ... | @@ -115,7 +117,8 @@ if __name__ == "__main__": |
115 | np.set_printoptions(threshold=40) | 117 | np.set_printoptions(threshold=40) |
116 | input_features = np.stack(input_features) | 118 | input_features = np.stack(input_features) |
117 | print("Example sequence\n-----------------------") | 119 | print("Example sequence\n-----------------------") |
118 | - print('DNA Sequence #1:\n', sequences[0][:10], '...', sequences[0][-10:]) | 120 | + # print('DNA Sequence #1:\n', sequences[0][:10], '...', sequences[0][-10:]) |
121 | + print('DNA Sequence #1:\n', sequences_adjust[0][:10], '...', sequences_adjust[0][-10:]) | ||
119 | print('One hot encoding of Sequence #1:\n', input_features[0].T) | 122 | print('One hot encoding of Sequence #1:\n', input_features[0].T) |
120 | 123 | ||
121 | # One-hot-encoding of labels | 124 | # One-hot-encoding of labels | ... | ... |
... | @@ -24,7 +24,7 @@ | ... | @@ -24,7 +24,7 @@ |
24 | # --outputTraining hga-sequences-training.txt | 24 | # --outputTraining hga-sequences-training.txt |
25 | # --outputTest hga-sequences-test.txt | 25 | # --outputTest hga-sequences-test.txt |
26 | # --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation | 26 | # --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation |
27 | -# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-toy.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation | 27 | +# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-1000.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation |
28 | 28 | ||
29 | import argparse | 29 | import argparse |
30 | import pandas as pd | 30 | import pandas as pd | ... | ... |
-
Please register or login to post a comment