Deep Learning Workshop

Carlos-Francisco Méndez-Cruz
Commit 7be1f7bc45b74b64f9ea5fe33d8b9a132cc89bd7 7be1f7bc 1 parent 381b4a02
Showing 3 changed files with 13 additions and 12 deletions
data-sets/get-hga-sequences.py
data-sets/get-hga-training-test.py
data-sets/human-genome-annotation/hga-sequences.txt → data-sets/human-genome-annotation/hga-sequences-toy.txt
--- a/data-sets/get-hga-sequences.py
View file @7be1f7b
+++ b/data-sets/get-hga-sequences.py
View file @7be1f7b
@@ -13,21 +13,21 @@
 # Run:
 # c:\Anaconda3\python get-hga-data-set.py
 # --feature gene
- # --outputFile hga-sequences.txt
+ # --outputFile hga-sequences-toy.txt
 # --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
 # --hgaFile some-rows-example-human-genome-annotation.csv
 # --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
 # --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
- # c:\Anaconda3\python get-hga-data-set.py --feature gene --outputFile hga-sequences.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --hgaFile some-rows-example-human-genome-annotation.csv --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
+ # c:\Anaconda3\python get-hga-data-set.py --feature gene --outputFile hga-sequences-toy.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --hgaFile some-rows-example-human-genome-annotation.csv --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
 
 # python3 get-hga-sequences.py
 # --feature gene
- # --outputFile hga-sequences.txt
+ # --outputFile hga-sequences-toy.txt
 # --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
 # --hgaFile Homo_sapiens.GRCh38.92.csv
 # --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
 # --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
- # python3 get-hga-sequences.py --feature gene --outputFile hga-sequences.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --hgaFile Homo_sapiens.GRCh38.92.csv --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
+ # python3 get-hga-sequences.py --feature gene --outputFile hga-sequences-toy.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --hgaFile Homo_sapiens.GRCh38.92.csv --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
 
 import argparse
 # from Bio import SeqIO
--- a/data-sets/get-hga-training-test.py
View file @7be1f7b
+++ b/data-sets/get-hga-training-test.py
View file @7be1f7b
@@ -2,18 +2,19 @@
 # obtained from FASTA and HGA data sets (see script get-hga-sequences.py)
 
 # Input tab-separated format:
- # Sequences: hga-sequences.txt
+ # Sequences: hga-sequences-toy.txt
 
 # Output one-hot encoding format:
 # Each sequence as a one-hot encoding WHAT array or matrix
 
 # Run:
- # c:\Anaconda3\python get-hga-training-test.py
- # --inputFile hga-sequences.txt
- # --inputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
- # --outputTrainFile hga-sequences-training.txt
- # --outputTestFile hga-sequences-test.txt
- # --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
+ # python3 get-hga-training-test.py
+ # --inputFile hga-sequences-toy.txt
+ # --inputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
+ # --outputTraining hga-sequences-training.txt
+ # --outputTest hga-sequences-test.txt
+ # --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
+ # python3 get-hga-training-test.py --inputFile hga-sequences-toy.txt --inputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
 
 import argparse
 import pandas as pd
@@ -54,7 +55,7 @@ if __name__ == "__main__":
     sequences = []
 
     # Read file with sequences
-     with open(os.path.join(args.inputFile, args.inputPath), mode="r", encoding="utf-8") as tabfile:
+     with open(os.path.join(args.inputPath, args.inputFile), mode="r", encoding="utf-8") as tabfile:
         df = pd.read_csv(tabfile, delimiter='\t')
         sequences = df['sequence']
         labels = df['label']
--- a/data-sets/human-genome-annotation/hga-sequences.txt → data-sets/human-genome-annotation/hga-sequences-toy.txt
View file @7be1f7b
+++ b/data-sets/human-genome-annotation/hga-sequences.txt → data-sets/human-genome-annotation/hga-sequences-toy.txt
View file @7be1f7b