Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
conditional-random-fields
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-03-08 12:19:16 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
19ea1278e4af6cfc624fc1829a302a9373d5d3f0
19ea1278
1 parent
37bda8b8
Training validation script
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
1 deletions
training-validation-v1.py
training-validation-v1.py
View file @
19ea127
...
...
@@ -5,6 +5,7 @@ from itertools import chain
from
optparse
import
OptionParser
from
time
import
time
from
collections
import
Counter
import
re
import
nltk
import
sklearn
...
...
@@ -48,6 +49,12 @@ from nltk.corpus import stopwords
#################################
# FUNCTIONS #
#################################
def
endsConLow
(
word
):
miregex
=
re
.
compile
(
r'[^aeiouA-Z0-9]$'
)
if
miregex
.
search
(
word
):
return
True
else
:
return
False
def
word2features
(
sent
,
i
):
listElem
=
sent
[
i
]
.
split
(
'|'
)
...
...
@@ -69,6 +76,7 @@ def word2features(sent, i):
'word[:3]'
:
word
[:
3
],
'word[:2]'
:
word
[:
2
],
'word[:1]'
:
word
[:
1
],
'endsConLow()'
:
endsConLow
(
word
),
}
'''
if i > 0:
...
...
@@ -196,7 +204,8 @@ if __name__ == "__main__":
print
(
"Exclude stop words: "
+
str
(
options
.
excludeStopWords
))
symbols
=
[
'.'
,
','
,
':'
,
';'
,
'?'
,
'!'
,
'
\'
'
,
'"'
,
'<'
,
'>'
,
'('
,
')'
,
'-'
,
'_'
,
'/'
,
'
\\
'
,
'¿'
,
'¡'
,
'+'
,
'{'
,
'}'
,
'['
,
']'
,
'*'
,
'
%
'
,
'$'
,
'#'
,
'&'
,
'°'
,
'`'
,
'...'
]
print
(
"Exclude symbols "
+
str
(
symbols
)
+
': '
+
str
(
options
.
excludeSymbols
))
#print("Exclude symbols " + str(symbols) + ': ' + str(options.excludeSymbols))
print
(
"Exclude symbols: "
+
str
(
options
.
excludeSymbols
))
print
(
'-------------------------------- PROCESSING --------------------------------'
)
print
(
'Reading corpus...'
)
...
...
Please
register
or
login
to post a comment