Carlos-Francisco Méndez-Cruz

Transform soft file in XML file

...@@ -19,8 +19,8 @@ __author__ = 'CMendezC' ...@@ -19,8 +19,8 @@ __author__ = 'CMendezC'
19 # Execution: 19 # Execution:
20 # python extract-manually-tagged-gcs.py 20 # python extract-manually-tagged-gcs.py
21 # --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data 21 # --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data
22 -# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\report-manually-tagged-gcs 22 +# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
23 -# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\report-manually-tagged-gcs 23 +# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
24 24
25 ########################################################### 25 ###########################################################
26 # MAIN PROGRAM # 26 # MAIN PROGRAM #
...@@ -52,8 +52,9 @@ if __name__ == "__main__": ...@@ -52,8 +52,9 @@ if __name__ == "__main__":
52 for path, dirs, files in os.walk(options.inputPath): 52 for path, dirs, files in os.walk(options.inputPath):
53 for f in files: 53 for f in files:
54 if f.endswith("_family.xml"): 54 if f.endswith("_family.xml"):
55 - print("Processing...{}/{}".format(options.inputPath, f)) 55 + print("Processing...{} {}".format(options.inputPath, f))
56 - with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile: 56 + #with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile:
57 + with open(os.path.join(options.inputPath, f), "r") as iFile:
57 for line in iFile: 58 for line in iFile:
58 line = line.strip('\n') 59 line = line.strip('\n')
59 result = regexSerie.match(line) 60 result = regexSerie.match(line)
...@@ -98,7 +99,8 @@ if __name__ == "__main__": ...@@ -98,7 +99,8 @@ if __name__ == "__main__":
98 print("New tag: {} and content: {}".format(tag, content.encode(encoding='utf-8', errors='replace'))) 99 print("New tag: {} and content: {}".format(tag, content.encode(encoding='utf-8', errors='replace')))
99 # print(hashGcs) 100 # print(hashGcs)
100 tags = ["Technique", "Orgn", "Strain", "Substrain", "Gversion", "Gtype", "Phase", "Phase", "Air", "Med", "Temp", "Supp"] 101 tags = ["Technique", "Orgn", "Strain", "Substrain", "Gversion", "Gtype", "Phase", "Phase", "Air", "Med", "Temp", "Supp"]
101 - with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile: 102 + #with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile:
103 + with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w") as oFile:
102 output = 'Serie\tSample\t' 104 output = 'Serie\tSample\t'
103 for tag in tags: 105 for tag in tags:
104 output = output + tag + '\t' 106 output = output + tag + '\t'
......