Carlos-Francisco Méndez-Cruz

Transform soft file in XML file

......@@ -19,8 +19,8 @@ __author__ = 'CMendezC'
# Execution:
# python extract-manually-tagged-gcs.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\report-manually-tagged-gcs
# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\report-manually-tagged-gcs
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
###########################################################
# MAIN PROGRAM #
......@@ -52,8 +52,9 @@ if __name__ == "__main__":
for path, dirs, files in os.walk(options.inputPath):
for f in files:
if f.endswith("_family.xml"):
print("Processing...{}/{}".format(options.inputPath, f))
with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile:
print("Processing...{} {}".format(options.inputPath, f))
#with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile:
with open(os.path.join(options.inputPath, f), "r") as iFile:
for line in iFile:
line = line.strip('\n')
result = regexSerie.match(line)
......@@ -98,7 +99,8 @@ if __name__ == "__main__":
print("New tag: {} and content: {}".format(tag, content.encode(encoding='utf-8', errors='replace')))
# print(hashGcs)
tags = ["Technique", "Orgn", "Strain", "Substrain", "Gversion", "Gtype", "Phase", "Phase", "Air", "Med", "Temp", "Supp"]
with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile:
#with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile:
with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w") as oFile:
output = 'Serie\tSample\t'
for tag in tags:
output = output + tag + '\t'
......