Carlos-Francisco Méndez-Cruz

Feature extraction and vectorizer three sentences

......@@ -17,6 +17,7 @@ __author__ = 'CMendezC'
# 1) --inputPath Path to read input files.
# 2) --outputPath Path to save output files.
# 3) --vectorizer Vectorizer: b=binary, f=frequency, t=tf-idf.
# 4) --feature Extracted feature from documents: word, lemma, pos, ner
# Ouput:
# 1) Report with dictionary, vectors, cosine similarity matrix.
......@@ -47,7 +48,7 @@ if __name__ == "__main__":
choices=('b', 'f', 't'), default='b')
parser.add_argument("--feature", dest="feature", required=True,
help="Feature: word, lemma, pos", metavar="TEXT",
choices=('word', 'lemma', 'pos'), default='b')
choices=('word', 'lemma', 'pos', 'ner'), default='b')
args = parser.parse_args()
......@@ -56,6 +57,7 @@ if __name__ == "__main__":
print("Path to read input files: " + str(args.inputPath))
print("Path to place output files: " + str(args.outputPath))
print("Vectorizer: " + str(args.vectorizer))
print("Feature: " + str(args.feature))
# Start time
t0 = time()
......