Showing
1 changed file
with
155 additions
and
0 deletions
sentence-representation/plot_Vectors.py
0 → 100644
| 1 | +# -*- coding: UTF-8 -*- | ||
| 2 | +import os | ||
| 3 | +from optparse import OptionParser | ||
| 4 | +import sys | ||
| 5 | +from time import time | ||
| 6 | +import matplotlib.pyplot as plt | ||
| 7 | +import re | ||
| 8 | +import numpy as np | ||
| 9 | +import matplotlib | ||
| 10 | +from sklearn.metrics.pairwise import cosine_similarity | ||
| 11 | + | ||
| 12 | +matplotlib.use('Qt4Agg') | ||
| 13 | + | ||
| 14 | + | ||
| 15 | +__author__ = 'CMendezC' | ||
| 16 | + | ||
| 17 | +# Objective: Plot vectors into 2D and 3D | ||
| 18 | +# with a color for vectors using different transformations | ||
| 19 | + | ||
| 20 | +# Parameters: | ||
| 21 | +# 1) --vectorPath Path to read vectors. | ||
| 22 | +# 2) --vectorFile File to read vectors. | ||
| 23 | +# 3) --outputPath Path to place plot files. | ||
| 24 | +# 4) --outputFormat Plot file format: pdf, png | ||
| 25 | +# 5) --absoluteValue Employ absolute values in vectors | ||
| 26 | + | ||
| 27 | +# Ouput: | ||
| 28 | +# 1) Plots | ||
| 29 | + | ||
| 30 | +# Execution: | ||
| 31 | +# C:\Anaconda3\python plot_Vectors_LSA.py | ||
| 32 | +# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots | ||
| 33 | +# --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa | ||
| 34 | +# --vectorFile GU_lsa_local_vectors_2T.txt | ||
| 35 | +# --absoluteValue | ||
| 36 | +# --outputFormat pdf | ||
| 37 | + | ||
| 38 | +# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_2T.txt --absoluteValue --outputFormat pdf | ||
| 39 | +# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_10T.txt --absoluteValue --outputFormat pdf | ||
| 40 | +# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_36T.txt --absoluteValue --outputFormat pdf | ||
| 41 | +# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_88T.txt --absoluteValue --outputFormat pdf | ||
| 42 | +# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_120T.txt --absoluteValue --outputFormat pdf | ||
| 43 | + | ||
| 44 | +########################################################### | ||
| 45 | +# MAIN PROGRAM # | ||
| 46 | +########################################################### | ||
| 47 | + | ||
| 48 | +if __name__ == "__main__": | ||
| 49 | + # Parameter definition | ||
| 50 | + parser = OptionParser() | ||
| 51 | + parser.add_option("--vectorPath", dest="vectorPath", | ||
| 52 | + help="Path to read vector file", metavar="PATH") | ||
| 53 | + parser.add_option("--vectorFile", dest="vectorFile", | ||
| 54 | + help="File to read vectors", metavar="FILE") | ||
| 55 | + parser.add_option("--outputPath", dest="outputPath", | ||
| 56 | + help="Path to place clustering classified files", metavar="PATH") | ||
| 57 | + parser.add_option("--outputFormat", dest="outputFormat", choices=('pdf', 'png'), | ||
| 58 | + help="Plot output format", metavar="PATH") | ||
| 59 | + parser.add_option("--absoluteValue", default=False, | ||
| 60 | + action="store_true", dest="absoluteValue", | ||
| 61 | + help="Use vector absolute values?") | ||
| 62 | + | ||
| 63 | + (options, args) = parser.parse_args() | ||
| 64 | + if len(args) > 0: | ||
| 65 | + parser.error("None parameters indicated.") | ||
| 66 | + sys.exit(1) | ||
| 67 | + | ||
| 68 | + # Printing parameter values | ||
| 69 | + print('-------------------------------- PARAMETERS --------------------------------') | ||
| 70 | + print("Path to read vector file: " + str(options.vectorPath)) | ||
| 71 | + print("File to read vectors: " + str(options.vectorFile)) | ||
| 72 | + print("Path to write plots: " + str(options.outputPath)) | ||
| 73 | + print("Plot output format: " + str(options.outputFormat)) | ||
| 74 | + print("Use vector absolute values? " + str(options.absoluteValue)) | ||
| 75 | + | ||
| 76 | + #regexLen = re.compile(r'_(?P<vectorLen>[0-9]+)T') | ||
| 77 | + listVectors = [] | ||
| 78 | + listLabels = [] | ||
| 79 | + print("Reading vectors...") | ||
| 80 | + #result = regexLen.search(options.vectorFile) | ||
| 81 | + #vectorLen = 0 | ||
| 82 | + #if result: | ||
| 83 | + # vectorLen = int(result.group('vectorLen')) | ||
| 84 | + # print("Vector vectorLen: {}".format(vectorLen)) | ||
| 85 | + #else: | ||
| 86 | + # print("None vectorLen mentioned within name file!") | ||
| 87 | + # quit() | ||
| 88 | + with open(os.path.join(options.vectorPath, options.vectorFile), mode="r", encoding='utf8') as iFile: | ||
| 89 | + for line in iFile.readlines(): | ||
| 90 | + line = line.strip('\r\n') | ||
| 91 | + listLine = line.split() | ||
| 92 | + # print("Len listLine: {}".format(len(listLine))) | ||
| 93 | + label = listLine[0][:12] | ||
| 94 | + # print(" Label: {}".format(label)) | ||
| 95 | + vector = [] | ||
| 96 | + listValues = listLine[1:] | ||
| 97 | + # print(" Len listValues: {}".format(len(listValues))) | ||
| 98 | + #if len(listValues) != vectorLen: | ||
| 99 | + # print("Vector vectorLen does not match: {}".format(label)) | ||
| 100 | + # continue | ||
| 101 | + for elem in listValues: | ||
| 102 | + if options.absoluteValue: | ||
| 103 | + vector.append(abs(float(elem))) | ||
| 104 | + else: | ||
| 105 | + vector.append(float(elem)) | ||
| 106 | + listLabels.append(label) | ||
| 107 | + listVectors.append(vector) | ||
| 108 | + print(" Reading vectors done!") | ||
| 109 | + print(" Len vectors: " + str(len(listVectors))) | ||
| 110 | + print(" Len labels: " + str(len(listLabels))) | ||
| 111 | + | ||
| 112 | + similarityMatrix = cosine_similarity(np.array(listVectors)) | ||
| 113 | + print("similarityMatrix shape: {}".format(similarityMatrix.shape)) | ||
| 114 | + | ||
| 115 | + t0 = time() | ||
| 116 | + print("Plotting heatmap...") | ||
| 117 | + # fig, ax = plt.subplots() | ||
| 118 | + fig = plt.figure() | ||
| 119 | + ax = fig.add_subplot(111) | ||
| 120 | + # heatmap = ax.pcolor(similarityMatrix, cmap=plt.cm.Reds, alpha=0.8) | ||
| 121 | + heatmap = ax.pcolor(similarityMatrix, cmap=plt.cm.Reds) | ||
| 122 | + fig = plt.gcf() | ||
| 123 | + fig.set_size_inches(16, 16) | ||
| 124 | + ax.set_frame_on(False) | ||
| 125 | + ax.set_yticks(np.arange(similarityMatrix.shape[0]) + 0.5, minor=False) | ||
| 126 | + ax.set_xticks(np.arange(similarityMatrix.shape[1]) + 0.5, minor=False) | ||
| 127 | + ax.invert_yaxis() | ||
| 128 | + ax.xaxis.tick_top() | ||
| 129 | + ax.set_xticklabels(listLabels, minor=False, size='xx-small') | ||
| 130 | + ax.set_yticklabels(listLabels, minor=False, size='xx-small') | ||
| 131 | + plt.xticks(rotation=90) | ||
| 132 | + ax.grid(False) | ||
| 133 | + | ||
| 134 | + # Turn off all the ticks | ||
| 135 | + ax = plt.gca() | ||
| 136 | + | ||
| 137 | + for t in ax.xaxis.get_major_ticks(): | ||
| 138 | + t.tick1On = False | ||
| 139 | + t.tick2On = False | ||
| 140 | + for t in ax.yaxis.get_major_ticks(): | ||
| 141 | + t.tick1On = False | ||
| 142 | + t.tick2On = False | ||
| 143 | + | ||
| 144 | + fig.tight_layout() | ||
| 145 | + if options.absoluteValue: | ||
| 146 | + fileName = options.vectorFile.replace('.txt', '.abs.' + options.outputFormat) | ||
| 147 | + else: | ||
| 148 | + fileName = options.vectorFile.replace('.txt', '.' + options.outputFormat) | ||
| 149 | + fig.savefig(os.path.join(options.outputPath, fileName)) | ||
| 150 | + | ||
| 151 | + # plt.axis('tight') | ||
| 152 | + # plt.show() | ||
| 153 | + # plt.savefig('test.png', bbox_inches='tight') | ||
| 154 | + | ||
| 155 | + print(" Plotting heatmap done in %fs" % (time() - t0)) |
-
Please register or login to post a comment