get-hga-data-set.py 789 Bytes

Raw Blame History Permalink

# Get source data set by combining Human Genome Annotation data set (csv)
# with FASTA files to obtain sequences corresponding to object in human genome
# using "start" and "end" columns from human-genome-annotation

# Input files:
# FASTA all chromosomes: /home/cmendezc/data-FASTA-Homo_sapiens.GRCh38.dna

# Output tab-separated format:
# Start End Sequence Feature

import argparse

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Get source data set for Human Genome Annotation.')
    parser.add_argument('--fastaPath', dest='fastaPath', action='store_const',
                        const=sum, default=max,
                        help='sum the integers (default: find the max)')

    args = parser.parse_args()
    print(args.accumulate(args.integers))