Showing
1 changed file
with
4 additions
and
3 deletions
... | @@ -58,15 +58,16 @@ if __name__ == "__main__": | ... | @@ -58,15 +58,16 @@ if __name__ == "__main__": |
58 | print("Reading documents...") | 58 | print("Reading documents...") |
59 | documents = [] | 59 | documents = [] |
60 | # Read documents from input path | 60 | # Read documents from input path |
61 | - for path, dirs, files in os.walk(args.outputPath): | 61 | + for path, dirs, files in os.walk(args.inputPath): |
62 | for file in files: | 62 | for file in files: |
63 | with open(os.path.join(args.inputPath, file), mode="r", encoding="utf-8") as iFile: | 63 | with open(os.path.join(args.inputPath, file), mode="r", encoding="utf-8") as iFile: |
64 | print("...{}".format(file)) | 64 | print("...{}".format(file)) |
65 | # Add file to document list | 65 | # Add file to document list |
66 | documents.append(iFile.read()) | 66 | documents.append(iFile.read()) |
67 | + print(" Documents: {}".format(len(documents))) | ||
67 | 68 | ||
68 | # Create vectorizer | 69 | # Create vectorizer |
69 | - print('Vectorizer: {}'.format(args.vectorizer)) | 70 | + print(' Vectorizer: {}'.format(args.vectorizer)) |
70 | if args.vectorizer == "b": | 71 | if args.vectorizer == "b": |
71 | # Binary vectorizer | 72 | # Binary vectorizer |
72 | vectorizer = CountVectorizer(ngram_range=(1, 1), binary=True) | 73 | vectorizer = CountVectorizer(ngram_range=(1, 1), binary=True) |
... | @@ -78,7 +79,7 @@ if __name__ == "__main__": | ... | @@ -78,7 +79,7 @@ if __name__ == "__main__": |
78 | vectorizer = TfidfVectorizer(ngram_range=(1, 1)) | 79 | vectorizer = TfidfVectorizer(ngram_range=(1, 1)) |
79 | 80 | ||
80 | matrix = csr_matrix(vectorizer.fit_transform(documents), dtype='double') | 81 | matrix = csr_matrix(vectorizer.fit_transform(documents), dtype='double') |
81 | - print(' matrix.shape: ', matrix.shape) | 82 | + print(' matrix.shape: ', matrix.shape) |
82 | 83 | ||
83 | with open(os.path.join(args.outputPath, "report-vectorizer.{}.txt".format(args.vectorizer)), encoding="utf-8", mode="w") as oFile: | 84 | with open(os.path.join(args.outputPath, "report-vectorizer.{}.txt".format(args.vectorizer)), encoding="utf-8", mode="w") as oFile: |
84 | oFile.write("Vectorizer: {}".format(args.vectorizer)) | 85 | oFile.write("Vectorizer: {}".format(args.vectorizer)) | ... | ... |
-
Please register or login to post a comment