Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
nlp-preprocessing-pipeline
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-03-07 17:55:06 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
802aed3e04693f47e7fdcdc87396ab23897f1e10
802aed3e
1 parent
520f6a89
New terminological tagging for CRFs
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
biologicalTermTagging-CRF.py
biologicalTermTagging-CRF.py
View file @
802aed3
...
...
@@ -104,6 +104,7 @@ if __name__ == "__main__":
hashTerms
[
key
]
.
append
(
lineHyp
.
capitalize
())
hashTermsOrig
[
key
]
.
append
(
line
.
capitalize
())
print
(
' Terms read {} size: {}'
.
format
(
key
,
len
(
hashTerms
[
key
])))
print
(
' Terms read {} size: {}'
.
format
(
key
,
len
(
hashTermsOrig
[
key
])))
#regularWords = words.words('en')
print
()
...
...
@@ -128,7 +129,6 @@ if __name__ == "__main__":
if
len
(
listLine1
)
<
3
:
continue
word
=
listLine1
[
0
]
print
(
"Word: {}"
.
format
(
word
))
pos
=
listLine1
[
1
]
listLine2
=
listLine1
[
2
]
.
split
(
' '
)
lemma
=
listLine2
[
0
]
...
...
@@ -136,9 +136,9 @@ if __name__ == "__main__":
for
termTag
in
hashTerms
:
if
word
in
hashTerms
[
termTag
]:
wordOrig
=
word
.
replace
(
'-'
,
' '
)
print
(
"Word: {}"
.
format
(
word
))
print
(
"WordOrig: {}"
.
format
(
wordOrig
))
#print("Word: {}".format(word))
if
wordOrig
in
hashTermsOrig
[
termTag
]:
print
(
"WordOrig: {}"
.
format
(
wordOrig
))
line
=
''
for
w
,
l
in
zip
(
word
.
split
(
'-'
),
lemma
.
split
(
'-'
)):
line
+=
w
+
'
\t
'
+
listLine1
[
1
]
+
'
\t
'
+
l
+
' '
+
termTag
+
' TermTag'
+
'
\n
'
...
...
Please
register
or
login
to post a comment