Carlos-Francisco Méndez-Cruz

Deep Learning Workshop

......@@ -13,21 +13,21 @@
# Run:
# c:\Anaconda3\python get-hga-data-set.py
# --feature gene
# --outputFile hga-sequences.txt
# --outputFile hga-sequences-toy.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
# --hgaFile some-rows-example-human-genome-annotation.csv
# --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
# --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
# c:\Anaconda3\python get-hga-data-set.py --feature gene --outputFile hga-sequences.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --hgaFile some-rows-example-human-genome-annotation.csv --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
# c:\Anaconda3\python get-hga-data-set.py --feature gene --outputFile hga-sequences-toy.txt --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --hgaFile some-rows-example-human-genome-annotation.csv --hgaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation --fastaPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\fasta-files
# python3 get-hga-sequences.py
# --feature gene
# --outputFile hga-sequences.txt
# --outputFile hga-sequences-toy.txt
# --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# --hgaFile Homo_sapiens.GRCh38.92.csv
# --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
# python3 get-hga-sequences.py --feature gene --outputFile hga-sequences.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --hgaFile Homo_sapiens.GRCh38.92.csv --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
# python3 get-hga-sequences.py --feature gene --outputFile hga-sequences-toy.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --hgaFile Homo_sapiens.GRCh38.92.csv --hgaPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --fastaPath /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
import argparse
# from Bio import SeqIO
......
......@@ -2,18 +2,19 @@
# obtained from FASTA and HGA data sets (see script get-hga-sequences.py)
# Input tab-separated format:
# Sequences: hga-sequences.txt
# Sequences: hga-sequences-toy.txt
# Output one-hot encoding format:
# Each sequence as a one-hot encoding WHAT array or matrix
# Run:
# c:\Anaconda3\python get-hga-training-test.py
# --inputFile hga-sequences.txt
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
# --outputTrainFile hga-sequences-training.txt
# --outputTestFile hga-sequences-test.txt
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\DEEP_LEARNING\gitlab-deep-learning-workshop\data-sets\human-genome-annotation
# python3 get-hga-training-test.py
# --inputFile hga-sequences-toy.txt
# --inputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# --outputTraining hga-sequences-training.txt
# --outputTest hga-sequences-test.txt
# --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
# python3 get-hga-training-test.py --inputFile hga-sequences-toy.txt --inputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation --outputTraining hga-sequences-training.txt --outputTest hga-sequences-test.txt --outputPath /home/cmendezc/gitlab-deep-learning-workshop/data-sets/human-genome-annotation
import argparse
import pandas as pd
......@@ -54,7 +55,7 @@ if __name__ == "__main__":
sequences = []
# Read file with sequences
with open(os.path.join(args.inputFile, args.inputPath), mode="r", encoding="utf-8") as tabfile:
with open(os.path.join(args.inputPath, args.inputFile), mode="r", encoding="utf-8") as tabfile:
df = pd.read_csv(tabfile, delimiter='\t')
sequences = df['sequence']
labels = df['label']
......