Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
automatic-extraction-growth-conditions
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Graphs
Network
Create a new issue
Commits
Issue Boards
Authored by
Estefani Gaytan Nunez
2019-03-22 01:46:20 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
de368eeff75632d4703add509c26cbb634283fcc
de368eef
1 parent
1670dced
CoreNLP parsed files and words labeled
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
59 additions
and
0 deletions
training-evaluation-data-sets/bin/parsed_sentences_from_labels_v3.py
training-evaluation-data-sets/bin/parsed_sentences_from_labels_v3.py
0 → 100644
View file @
de368ee
#!/bin/python2.7
out_labels
=
{
'</Air>'
:
'O'
,
'</Gtype>'
:
'O'
,
'</Gversion>'
:
'O'
,
'</Med>'
:
'O'
,
'</Orgn>'
:
'O'
,
'</Phase>'
:
'O'
,
'</Sample>'
:
'O'
,
'</Serie>'
:
'O'
,
'</Strain>'
:
'O'
,
'</Substrain>'
:
'O'
,
'</Supp>'
:
'O'
,
'</Technique>'
:
'O'
,
'</Temp>'
:
'O'
,
'</Name>'
:
'O'
,
'</OD>'
:
'O'
,
'</Anti>'
:
'O'
,
'</Agit>'
:
'O'
,
'</Vess>'
:
'O'
}
in_labels
=
{
'<Air>'
:
'Air'
,
'<Gtype>'
:
'Gtype'
,
'<Gversion>'
:
'Gversion'
,
'<Med>'
:
'Med'
,
'<Orgn>'
:
'Orgn'
,
'<Phase>'
:
'Phase'
,
'<Sample>'
:
'Sample'
,
'<Serie>'
:
'Serie'
,
'<Strain>'
:
'Strain'
,
'<Substrain>'
:
'Substrain'
,
'<Supp>'
:
'Supp'
,
'<Technique>'
:
'Technique'
,
'<Temp>'
:
'Temp'
,
'<Name>'
:
'Name'
,
'<OD>'
:
'OD'
,
'<Anti>'
:
'Anti'
,
'<Agit>'
:
'Agit'
,
'<Vess>'
:
'Vess'
}
import
re
#columna Contenido de "/home/egaytan/Dropbox/PGC/data-sets/file_output/exit_file.txt"
inpath
=
'/home/egaytan/Dropbox/PGC/data-sets_1/content_colum_data_set.tsv.conll'
outpath
=
'/home/egaytan/Dropbox/PGC/data-sets_1/sentences_labeled_v1.tsv'
flag
=
'O'
with
open
(
outpath
,
'w'
)
as
out
:
with
open
(
inpath
,
'r'
)
as
input_file
:
for
line
in
input_file
:
if
len
(
line
.
split
(
'
\t
'
))
>
1
:
w
=
line
.
split
(
'
\t
'
)[
1
]
if
w
in
in_labels
or
w
in
out_labels
:
if
w
in
in_labels
.
keys
():
flag
=
in_labels
[
w
]
if
w
in
out_labels
:
flag
=
out_labels
[
w
]
else
:
if
w
==
"PGCGROWTHCONDITIONS"
:
out
.
write
(
'
\n
'
)
else
:
out
.
write
(
'|'
.
join
(
line
.
split
(
'
\t
'
)[
1
:
4
])
+
'|'
+
flag
+
' '
)
#print('\t'.join(line.split('\t')[1:4])+'\t'+flag)
\ No newline at end of file
Please
register
or
login
to post a comment