Carlos-Francisco Méndez-Cruz

Feature extraction and vectorizer three sentences

...@@ -17,6 +17,7 @@ __author__ = 'CMendezC' ...@@ -17,6 +17,7 @@ __author__ = 'CMendezC'
17 # 1) --inputPath Path to read input files. 17 # 1) --inputPath Path to read input files.
18 # 2) --outputPath Path to save output files. 18 # 2) --outputPath Path to save output files.
19 # 3) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf. 19 # 3) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
20 +# 4) --feature Extracted feature from documents: word, lemma, pos, ner
20 21
21 # Ouput: 22 # Ouput:
22 # 1) Report with dictionary, vectors, cosine similarity matrix. 23 # 1) Report with dictionary, vectors, cosine similarity matrix.
...@@ -47,7 +48,7 @@ if __name__ == "__main__": ...@@ -47,7 +48,7 @@ if __name__ == "__main__":
47 choices=('b', 'f', 't'), default='b') 48 choices=('b', 'f', 't'), default='b')
48 parser.add_argument("--feature", dest="feature", required=True, 49 parser.add_argument("--feature", dest="feature", required=True,
49 help="Feature: word, lemma, pos", metavar="TEXT", 50 help="Feature: word, lemma, pos", metavar="TEXT",
50 - choices=('word', 'lemma', 'pos'), default='b') 51 + choices=('word', 'lemma', 'pos', 'ner'), default='b')
51 52
52 args = parser.parse_args() 53 args = parser.parse_args()
53 54
...@@ -56,6 +57,7 @@ if __name__ == "__main__": ...@@ -56,6 +57,7 @@ if __name__ == "__main__":
56 print("Path to read input files: " + str(args.inputPath)) 57 print("Path to read input files: " + str(args.inputPath))
57 print("Path to place output files: " + str(args.outputPath)) 58 print("Path to place output files: " + str(args.outputPath))
58 print("Vectorizer: " + str(args.vectorizer)) 59 print("Vectorizer: " + str(args.vectorizer))
60 + print("Feature: " + str(args.feature))
59 61
60 # Start time 62 # Start time
61 t0 = time() 63 t0 = time()
......