Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
sentence-representation-word-embeddings
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-03-09 11:52:08 -0600
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
32a7185ba5506518f7a245ddbba2012ece4c3af9
32a7185b
1 parent
9bcf2f7a
WISSE example
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
155 additions
and
0 deletions
sentence-representation/plot_Vectors.py
sentence-representation/plot_Vectors.py
0 → 100644
View file @
32a7185
# -*- coding: UTF-8 -*-
import
os
from
optparse
import
OptionParser
import
sys
from
time
import
time
import
matplotlib.pyplot
as
plt
import
re
import
numpy
as
np
import
matplotlib
from
sklearn.metrics.pairwise
import
cosine_similarity
matplotlib
.
use
(
'Qt4Agg'
)
__author__
=
'CMendezC'
# Objective: Plot vectors into 2D and 3D
# with a color for vectors using different transformations
# Parameters:
# 1) --vectorPath Path to read vectors.
# 2) --vectorFile File to read vectors.
# 3) --outputPath Path to place plot files.
# 4) --outputFormat Plot file format: pdf, png
# 5) --absoluteValue Employ absolute values in vectors
# Ouput:
# 1) Plots
# Execution:
# C:\Anaconda3\python plot_Vectors_LSA.py
# --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots
# --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa
# --vectorFile GU_lsa_local_vectors_2T.txt
# --absoluteValue
# --outputFormat pdf
# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_2T.txt --absoluteValue --outputFormat pdf
# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_10T.txt --absoluteValue --outputFormat pdf
# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_36T.txt --absoluteValue --outputFormat pdf
# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_88T.txt --absoluteValue --outputFormat pdf
# C:\Anaconda3\python plot_Vectors_LSA.py --outputPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa\plots --vectorPath C:\Users\cmendezc\Documents\GENOMICAS\GENSOR_UNITS\wordEmbeddings\lsa --vectorFile GU_lsa_local_vectors_120T.txt --absoluteValue --outputFormat pdf
###########################################################
# MAIN PROGRAM #
###########################################################
if
__name__
==
"__main__"
:
# Parameter definition
parser
=
OptionParser
()
parser
.
add_option
(
"--vectorPath"
,
dest
=
"vectorPath"
,
help
=
"Path to read vector file"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--vectorFile"
,
dest
=
"vectorFile"
,
help
=
"File to read vectors"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--outputPath"
,
dest
=
"outputPath"
,
help
=
"Path to place clustering classified files"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--outputFormat"
,
dest
=
"outputFormat"
,
choices
=
(
'pdf'
,
'png'
),
help
=
"Plot output format"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--absoluteValue"
,
default
=
False
,
action
=
"store_true"
,
dest
=
"absoluteValue"
,
help
=
"Use vector absolute values?"
)
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
>
0
:
parser
.
error
(
"None parameters indicated."
)
sys
.
exit
(
1
)
# Printing parameter values
print
(
'-------------------------------- PARAMETERS --------------------------------'
)
print
(
"Path to read vector file: "
+
str
(
options
.
vectorPath
))
print
(
"File to read vectors: "
+
str
(
options
.
vectorFile
))
print
(
"Path to write plots: "
+
str
(
options
.
outputPath
))
print
(
"Plot output format: "
+
str
(
options
.
outputFormat
))
print
(
"Use vector absolute values? "
+
str
(
options
.
absoluteValue
))
#regexLen = re.compile(r'_(?P<vectorLen>[0-9]+)T')
listVectors
=
[]
listLabels
=
[]
print
(
"Reading vectors..."
)
#result = regexLen.search(options.vectorFile)
#vectorLen = 0
#if result:
# vectorLen = int(result.group('vectorLen'))
# print("Vector vectorLen: {}".format(vectorLen))
#else:
# print("None vectorLen mentioned within name file!")
# quit()
with
open
(
os
.
path
.
join
(
options
.
vectorPath
,
options
.
vectorFile
),
mode
=
"r"
,
encoding
=
'utf8'
)
as
iFile
:
for
line
in
iFile
.
readlines
():
line
=
line
.
strip
(
'
\r\n
'
)
listLine
=
line
.
split
()
# print("Len listLine: {}".format(len(listLine)))
label
=
listLine
[
0
][:
12
]
# print(" Label: {}".format(label))
vector
=
[]
listValues
=
listLine
[
1
:]
# print(" Len listValues: {}".format(len(listValues)))
#if len(listValues) != vectorLen:
# print("Vector vectorLen does not match: {}".format(label))
# continue
for
elem
in
listValues
:
if
options
.
absoluteValue
:
vector
.
append
(
abs
(
float
(
elem
)))
else
:
vector
.
append
(
float
(
elem
))
listLabels
.
append
(
label
)
listVectors
.
append
(
vector
)
print
(
" Reading vectors done!"
)
print
(
" Len vectors: "
+
str
(
len
(
listVectors
)))
print
(
" Len labels: "
+
str
(
len
(
listLabels
)))
similarityMatrix
=
cosine_similarity
(
np
.
array
(
listVectors
))
print
(
"similarityMatrix shape: {}"
.
format
(
similarityMatrix
.
shape
))
t0
=
time
()
print
(
"Plotting heatmap..."
)
# fig, ax = plt.subplots()
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
# heatmap = ax.pcolor(similarityMatrix, cmap=plt.cm.Reds, alpha=0.8)
heatmap
=
ax
.
pcolor
(
similarityMatrix
,
cmap
=
plt
.
cm
.
Reds
)
fig
=
plt
.
gcf
()
fig
.
set_size_inches
(
16
,
16
)
ax
.
set_frame_on
(
False
)
ax
.
set_yticks
(
np
.
arange
(
similarityMatrix
.
shape
[
0
])
+
0.5
,
minor
=
False
)
ax
.
set_xticks
(
np
.
arange
(
similarityMatrix
.
shape
[
1
])
+
0.5
,
minor
=
False
)
ax
.
invert_yaxis
()
ax
.
xaxis
.
tick_top
()
ax
.
set_xticklabels
(
listLabels
,
minor
=
False
,
size
=
'xx-small'
)
ax
.
set_yticklabels
(
listLabels
,
minor
=
False
,
size
=
'xx-small'
)
plt
.
xticks
(
rotation
=
90
)
ax
.
grid
(
False
)
# Turn off all the ticks
ax
=
plt
.
gca
()
for
t
in
ax
.
xaxis
.
get_major_ticks
():
t
.
tick1On
=
False
t
.
tick2On
=
False
for
t
in
ax
.
yaxis
.
get_major_ticks
():
t
.
tick1On
=
False
t
.
tick2On
=
False
fig
.
tight_layout
()
if
options
.
absoluteValue
:
fileName
=
options
.
vectorFile
.
replace
(
'.txt'
,
'.abs.'
+
options
.
outputFormat
)
else
:
fileName
=
options
.
vectorFile
.
replace
(
'.txt'
,
'.'
+
options
.
outputFormat
)
fig
.
savefig
(
os
.
path
.
join
(
options
.
outputPath
,
fileName
))
# plt.axis('tight')
# plt.show()
# plt.savefig('test.png', bbox_inches='tight')
print
(
" Plotting heatmap done in
%
fs"
%
(
time
()
-
t0
))
Please
register
or
login
to post a comment