Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-bioinfoI-bionlp
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-09-12 22:39:24 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
8b0e819e0aa4c34b0d752e0a164afc44e64f60c1
8b0e819e
1 parent
d7ae81db
Feature extraction and vectorizer three sentences
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
1 deletions
representaciones-vectoriales/extraccion-caracteristicas-vectorizacion.py
representaciones-vectoriales/extraccion-caracteristicas-vectorizacion.py
View file @
8b0e819
...
...
@@ -17,6 +17,7 @@ __author__ = 'CMendezC'
# 1) --inputPath Path to read input files.
# 2) --outputPath Path to save output files.
# 3) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
# 4) --feature Extracted feature from documents: word, lemma, pos, ner
# Ouput:
# 1) Report with dictionary, vectors, cosine similarity matrix.
...
...
@@ -47,7 +48,7 @@ if __name__ == "__main__":
choices
=
(
'b'
,
'f'
,
't'
),
default
=
'b'
)
parser
.
add_argument
(
"--feature"
,
dest
=
"feature"
,
required
=
True
,
help
=
"Feature: word, lemma, pos"
,
metavar
=
"TEXT"
,
choices
=
(
'word'
,
'lemma'
,
'pos'
),
default
=
'b'
)
choices
=
(
'word'
,
'lemma'
,
'pos'
,
'ner'
),
default
=
'b'
)
args
=
parser
.
parse_args
()
...
...
@@ -56,6 +57,7 @@ if __name__ == "__main__":
print
(
"Path to read input files: "
+
str
(
args
.
inputPath
))
print
(
"Path to place output files: "
+
str
(
args
.
outputPath
))
print
(
"Vectorizer: "
+
str
(
args
.
vectorizer
))
print
(
"Feature: "
+
str
(
args
.
feature
))
# Start time
t0
=
time
()
...
...
Please
register
or
login
to post a comment