soft-2-xml.py 3.29 KB
# -*- coding: UTF-8 -*-

from optparse import OptionParser
import os
import sys

__author__ = 'CMendezC'

# Objective: convert soft file to XML file:
#   include headings, tags, substitute & and <

# Parameters:
#   1) --inputPath input path
#   2) --outputPath output path

# Ouput:
#   1) XML File with soft file content

# Execution:
# python soft-2-xml.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# Additional files
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data

###########################################################
#                       MAIN PROGRAM                      #
###########################################################

if __name__ == "__main__":
    # Parameter definition
    parser = OptionParser()
    parser.add_option("--inputPath", dest="inputPath",
                      help="Path to read input files", metavar="PATH")
    parser.add_option("--outputPath", dest="outputPath",
                      help="Path to place output files", metavar="PATH")

    (options, args) = parser.parse_args()
    if len(args) > 0:
        parser.error("None parameter entered.")
        sys.exit(1)

    # Printing parameter values
    print('-------------------------------- PARAMETERS --------------------------------')
    print("Path to read input files: " + str(options.inputPath))
    print("Path to place output files: " + str(options.outputPath))

    # Walk directory to read files
    processedFiles = 0
    for path, dirs, files in os.walk(options.inputPath):
        for f in files:
            if f.endswith("_family.soft"):
                print("Processing...{}/{}".format(options.inputPath, f))
                softText = ''
                with open(os.path.join(options.inputPath, f), "r", encoding="utf-8", errors="replace") as iFile:
                    with open(os.path.join(options.outputPath, f.replace(".soft", ".xml")), "w",
                              encoding="utf-8") as oFile:
                        oFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<gse xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\nxsi:noNamespaceSchemaLocation=\"esquema-gcs.xsd\">\n\n")
                        for line in iFile:
                            line = line.replace("&", "&amp;")
                            line = line.replace("<", "&lt;")
                            # line = line.replace(">", "&gt;")
                            # line = line.replace("\"", "&quot;")
                            # line = line.replace("\'", "&apos;")
                            oFile.write(line)
                        oFile.write("\n</gse>\n")
                        processedFiles+=1
    print("Processed files: {}".format(processedFiles))