Feature extraction and vectorizer three sentences

Carlos-Francisco Méndez-Cruz
Commit 8b0e819e0aa4c34b0d752e0a164afc44e64f60c1 8b0e819e 1 parent d7ae81db
Showing 1 changed file with 3 additions and 1 deletions
representaciones-vectoriales/extraccion-caracteristicas-vectorizacion.py
--- a/representaciones-vectoriales/extraccion-caracteristicas-vectorizacion.py
View file @8b0e819
+++ b/representaciones-vectoriales/extraccion-caracteristicas-vectorizacion.py
View file @8b0e819
@@ -17,6 +17,7 @@ __author__ = 'CMendezC'
 # 1) --inputPath Path to read input files.
 # 2) --outputPath Path to save output files.
 # 3) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
+ # 4) --feature Extracted feature from documents: word, lemma, pos, ner
 
 # Ouput:
 # 1) Report with dictionary, vectors, cosine similarity matrix.
@@ -47,7 +48,7 @@ if __name__ == "__main__":
                       choices=('b', 'f', 't'), default='b')
     parser.add_argument("--feature", dest="feature", required=True,
                       help="Feature: word, lemma, pos", metavar="TEXT",
-                       choices=('word', 'lemma', 'pos'), default='b')
+                       choices=('word', 'lemma', 'pos', 'ner'), default='b')
 
     args = parser.parse_args()
 
@@ -56,6 +57,7 @@ if __name__ == "__main__":
     print("Path to read input files: " + str(args.inputPath))
     print("Path to place output files: " + str(args.outputPath))
     print("Vectorizer: " + str(args.vectorizer))
+     print("Feature: " + str(args.feature))
 
     # Start time
     t0 = time()