Showing
1 changed file
with
6 additions
and
0 deletions
... | @@ -7,6 +7,7 @@ import argparse | ... | @@ -7,6 +7,7 @@ import argparse |
7 | import sys | 7 | import sys |
8 | from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer | 8 | from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer |
9 | from scipy.sparse import csr_matrix | 9 | from scipy.sparse import csr_matrix |
10 | +from sklearn.metrics.pairwise import cosine_similarity | ||
10 | 11 | ||
11 | __author__ = 'CMendezC' | 12 | __author__ = 'CMendezC' |
12 | 13 | ||
... | @@ -81,10 +82,15 @@ if __name__ == "__main__": | ... | @@ -81,10 +82,15 @@ if __name__ == "__main__": |
81 | matrix = csr_matrix(vectorizer.fit_transform(documents), dtype='double') | 82 | matrix = csr_matrix(vectorizer.fit_transform(documents), dtype='double') |
82 | print(' matrix.shape: ', matrix.shape) | 83 | print(' matrix.shape: ', matrix.shape) |
83 | 84 | ||
85 | + similarityMatrix = cosine_similarity(matrix) | ||
86 | + print(" Cosine similarity matrix shape: {}".format(similarityMatrix.shape)) | ||
87 | + | ||
84 | with open(os.path.join(args.outputPath, "report-vectorizer.{}.txt".format(args.vectorizer)), encoding="utf-8", mode="w") as oFile: | 88 | with open(os.path.join(args.outputPath, "report-vectorizer.{}.txt".format(args.vectorizer)), encoding="utf-8", mode="w") as oFile: |
85 | oFile.write("Vectorizer: {}\n".format(args.vectorizer)) | 89 | oFile.write("Vectorizer: {}\n".format(args.vectorizer)) |
86 | oFile.write(str(vectorizer.get_feature_names())) | 90 | oFile.write(str(vectorizer.get_feature_names())) |
87 | oFile.write("\n") | 91 | oFile.write("\n") |
88 | oFile.write(str(matrix.toarray())) | 92 | oFile.write(str(matrix.toarray())) |
93 | + oFile.write("\n") | ||
94 | + oFile.write(str(similarityMatrix.toarray())) | ||
89 | 95 | ||
90 | print("Feature extraction and vectorizer in: %fs" % (time() - t0)) | 96 | print("Feature extraction and vectorizer in: %fs" % (time() - t0)) | ... | ... |
-
Please register or login to post a comment