Carlos-Francisco Méndez-Cruz

Transform soft file in XML file

......@@ -18,9 +18,11 @@ __author__ = 'CMendezC'
# Execution:
# python soft-2-xml.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# Additional files
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
###########################################################
# MAIN PROGRAM #
......@@ -45,13 +47,14 @@ if __name__ == "__main__":
print("Path to place output files: " + str(options.outputPath))
# Walk directory to read files
processedFiles = 0
for path, dirs, files in os.walk(options.inputPath):
for f in files:
if f.endswith("_family.txt"):
if f.endswith("_family.soft"):
print("Processing...{}/{}".format(options.inputPath, f))
softText = ''
with open(os.path.join(options.inputPath, f), "r", encoding="utf-8", errors="replace") as iFile:
with open(os.path.join(options.outputPath, f.replace(".txt", ".xml")), "w",
with open(os.path.join(options.outputPath, f.replace(".soft", ".xml")), "w",
encoding="utf-8") as oFile:
oFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<gse xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\nxsi:noNamespaceSchemaLocation=\"esquema-gcs.xsd\">\n\n")
for line in iFile:
......@@ -62,4 +65,5 @@ if __name__ == "__main__":
# line = line.replace("\'", "&apos;")
oFile.write(line)
oFile.write("\n</gse>\n")
processedFiles+=1
print("Processed files: {}".format(processedFiles))
......