get-hga-data-set.py
789 Bytes
# Get source data set by combining Human Genome Annotation data set (csv)
# with FASTA files to obtain sequences corresponding to object in human genome
# using "start" and "end" columns from human-genome-annotation
# Input files:
# FASTA all chromosomes: /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna
# Output tab-separated format:
# Start End Sequence Feature
import argparse
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Get source data set for Human Genome Annotation.')
parser.add_argument('--fastaPath', dest='fastaPath', action='store_const',
const=sum, default=max,
help='sum the integers (default: find the max)')
args = parser.parse_args()
print(args.accumulate(args.integers))