soft-2-xml.py
3.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: UTF-8 -*-
from optparse import OptionParser
import os
import sys
__author__ = 'CMendezC'
# Objective: convert soft file to XML file:
# include headings, tags, substitute & and <
# Parameters:
# 1) --inputPath input path
# 2) --outputPath output path
# Ouput:
# 1) XML File with soft file content
# Execution:
# python soft-2-xml.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
# Additional files
# python soft-2-xml.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\soft-data-additional --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\xml-data
###########################################################
# MAIN PROGRAM #
###########################################################
if __name__ == "__main__":
# Parameter definition
parser = OptionParser()
parser.add_option("--inputPath", dest="inputPath",
help="Path to read input files", metavar="PATH")
parser.add_option("--outputPath", dest="outputPath",
help="Path to place output files", metavar="PATH")
(options, args) = parser.parse_args()
if len(args) > 0:
parser.error("None parameter entered.")
sys.exit(1)
# Printing parameter values
print('-------------------------------- PARAMETERS --------------------------------')
print("Path to read input files: " + str(options.inputPath))
print("Path to place output files: " + str(options.outputPath))
# Walk directory to read files
processedFiles = 0
for path, dirs, files in os.walk(options.inputPath):
for f in files:
if f.endswith("_family.soft"):
print("Processing...{}/{}".format(options.inputPath, f))
softText = ''
with open(os.path.join(options.inputPath, f), "r", encoding="utf-8", errors="replace") as iFile:
with open(os.path.join(options.outputPath, f.replace(".soft", ".xml")), "w",
encoding="utf-8") as oFile:
oFile.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n<gse xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\nxsi:noNamespaceSchemaLocation=\"esquema-gcs.xsd\">\n\n")
for line in iFile:
line = line.replace("&", "&")
line = line.replace("<", "<")
# line = line.replace(">", ">")
# line = line.replace("\"", """)
# line = line.replace("\'", "'")
oFile.write(line)
oFile.write("\n</gse>\n")
processedFiles+=1
print("Processed files: {}".format(processedFiles))