Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
conditional-random-fields
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-03-08 02:07:56 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
4791fb144629a2e8a67a6e1b01f836af2f3ece6f
4791fb14
1 parent
19c2bdea
Obtaining training and test data sets
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
5 deletions
training-validation-v1.py
training-validation-v1.py
View file @
4791fb1
...
...
@@ -332,14 +332,15 @@ if __name__ == "__main__":
sentencesTrainingData
=
[]
sentencesTestData
=
[]
stopwords
=
[
word
.
decode
(
'utf-8'
)
for
word
in
stopwords
.
words
(
'english'
)]
# Original: stopwords = [word.decode('utf-8') for word in stopwords.words('english')]
stopwords
=
[
word
for
word
in
stopwords
.
words
(
'english'
)]
with
open
(
os
.
path
.
join
(
options
.
inputPath
,
options
.
trainingFile
),
"r"
)
as
iFile
:
# with open(os.path.join(options.inputPath, options.trainingFile), "r", encoding="utf-8", errors='replace') as iFile:
for
line
in
iFile
.
readlines
():
listLine
=
[]
line
=
line
.
decode
(
"utf-8"
)
for
token
in
line
.
s
trip
(
'
\n
'
)
.
s
plit
():
line
=
line
.
strip
(
'
\n
'
)
for
token
in
line
.
split
():
if
options
.
filterStopWords
:
listToken
=
token
.
split
(
'|'
)
lemma
=
listToken
[
1
]
...
...
@@ -366,8 +367,8 @@ if __name__ == "__main__":
# with open(os.path.join(options.inputPath, options.testFile), "r", encoding="utf-8", errors='replace') as iFile:
for
line
in
iFile
.
readlines
():
listLine
=
[]
line
=
line
.
decode
(
"utf-8"
)
for
token
in
line
.
s
trip
(
'
\n
'
)
.
s
plit
():
line
=
line
.
strip
(
'
\n
'
)
for
token
in
line
.
split
():
if
options
.
filterStopWords
:
listToken
=
token
.
split
(
'|'
)
lemma
=
listToken
[
1
]
...
...
Please
register
or
login
to post a comment