Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
automatic-extraction-growth-conditions
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-10-12 12:32:14 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
37fdbca494304004bd85566223ec76047800d21c
37fdbca4
1 parent
ec363d7c
Transform soft file in XML file
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
5 deletions
scripts/extract-manually-tagged-gcs.py
scripts/extract-manually-tagged-gcs.py
View file @
37fdbca
...
...
@@ -19,8 +19,8 @@ __author__ = 'CMendezC'
# Execution:
# python extract-manually-tagged-gcs.py
# --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\
data-sets\
report-manually-tagged-gcs
# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\
data-sets\
report-manually-tagged-gcs
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
# c:\anaconda3\python extract-manually-tagged-gcs.py --inputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\data-sets\tagged-xml-data --outputPath C:\Users\cmendezc\Documents\GENOMICAS\gitlab_automatic-extraction-growth-conditions\report-manually-tagged-gcs
###########################################################
# MAIN PROGRAM #
...
...
@@ -52,8 +52,9 @@ if __name__ == "__main__":
for
path
,
dirs
,
files
in
os
.
walk
(
options
.
inputPath
):
for
f
in
files
:
if
f
.
endswith
(
"_family.xml"
):
print
(
"Processing...{}/{}"
.
format
(
options
.
inputPath
,
f
))
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
f
),
"r"
,
encoding
=
"utf-8"
)
as
iFile
:
print
(
"Processing...{} {}"
.
format
(
options
.
inputPath
,
f
))
#with open(os.path.join(options.inputPath, f), "r", encoding="utf-8") as iFile:
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
f
),
"r"
)
as
iFile
:
for
line
in
iFile
:
line
=
line
.
strip
(
'
\n
'
)
result
=
regexSerie
.
match
(
line
)
...
...
@@ -98,7 +99,8 @@ if __name__ == "__main__":
print
(
"New tag: {} and content: {}"
.
format
(
tag
,
content
.
encode
(
encoding
=
'utf-8'
,
errors
=
'replace'
)))
# print(hashGcs)
tags
=
[
"Technique"
,
"Orgn"
,
"Strain"
,
"Substrain"
,
"Gversion"
,
"Gtype"
,
"Phase"
,
"Phase"
,
"Air"
,
"Med"
,
"Temp"
,
"Supp"
]
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
f
.
replace
(
".xml"
,
".report.csv"
)),
"w"
,
encoding
=
"utf-8"
)
as
oFile
:
#with open(os.path.join(options.outputPath, f.replace(".xml", ".report.csv")), "w", encoding="utf-8") as oFile:
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
f
.
replace
(
".xml"
,
".report.csv"
)),
"w"
)
as
oFile
:
output
=
'Serie
\t
Sample
\t
'
for
tag
in
tags
:
output
=
output
+
tag
+
'
\t
'
...
...
Please
register
or
login
to post a comment