Deep Learning Workshop

Carlos-Francisco Méndez-Cruz
Commit ab0e732639cb3824627a62d5239eab62a023b821 ab0e7326 1 parent e3482dca
Showing 2 changed files with 5 additions and 2 deletions
data-sets/get-hga-training-test-py27-v1.py
data-sets/get-hga-training-test-py27-v2.py
--- a/data-sets/get-hga-training-test-py27-v1.py
View file @ab0e732
+++ b/data-sets/get-hga-training-test-py27-v1.py
View file @ab0e732
@@ -94,6 +94,7 @@ if __name__ == "__main__":
     # Fill sequence with X char to get max length
     # One-hot-encoding of sequences
+    sequences_adjust = []
     for sequence, label in zip(sequences, labels):
         if len(sequence) < max_length:
             # print("sequence: {}".format(sequence))
@@ -102,6 +103,7 @@ if __name__ == "__main__":
             # print("sequence_adjust: {}".format(sequence_adjust))
         else:
             sequence_adjust = sequence + 'ACGTX'
+        sequences_adjust.append(sequence_adjust)
         print("Length sequence_adjust: {}".format(len(sequence_adjust)))
         integer_encoded = integer_encoder.fit_transform(list(sequence_adjust))
         print("integer_encoded.classes_: {}".format(integer_encoder.classes_))
@@ -115,7 +117,8 @@ if __name__ == "__main__":
     np.set_printoptions(threshold=40)
     input_features = np.stack(input_features)
     print("Example sequence\n-----------------------")
-    print('DNA Sequence #1:\n', sequences[0][:10], '...', sequences[0][-10:])
+    # print('DNA Sequence #1:\n', sequences[0][:10], '...', sequences[0][-10:])
+    print('DNA Sequence #1:\n', sequences_adjust[0][:10], '...', sequences_adjust[0][-10:])
     print('One hot encoding of Sequence #1:\n', input_features[0].T)
     # One-hot-encoding of labels
--- a/data-sets/get-hga-training-test-py27-v2.py
View file @ab0e732
+++ b/data-sets/get-hga-training-test-py27-v2.py
View file @ab0e732
@@ -24,7 +24,7 @@
 # --outputTraining hga-sequences-training.txt
 # --outputTest hga-sequences-test.txt
 # --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
-# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-toy.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
+# python get-hga-training-test-py27-v2.py --inputFile hga-sequences-1000.txt --inputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /mnt/Genoma/amedina/cmendez/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
 import argparse
 import pandas as pd