Toggle navigation
Toggle navigation
This project
Loading...
Sign in
Carlos-Francisco Méndez-Cruz
/
lcg-faaa
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Snippets
Network
Create a new issue
Builds
Commits
Issue Boards
Authored by
Carlos-Francisco Méndez-Cruz
2018-10-04 23:14:31 -0500
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Commit
24fffe97d159697bcdbd285e8296b69059a6aca4
24fffe97
1 parent
f1c1a26a
LSA soft clustering
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
115 additions
and
0 deletions
agrupamiento-datos-categoricos/plot_Vectors_LSA_structured_heatmap.py
agrupamiento-datos-categoricos/plot_Vectors_LSA_structured_heatmap.py
0 → 100644
View file @
24fffe9
import
os
from
optparse
import
OptionParser
import
sys
from
time
import
time
import
re
import
numpy
as
np
# Objective: Obtain groups according to the component with the higher absolute value
# Parameters:
# 1) --vectorPath Path to read vectors.
# 2) --vectorFile File to read vectors.
# 3) --outputPath Path to place output files.
# 4) --groups Number of groups
# Ouput:
# 1) File with groups and plots
# Execution:
# python plot_Vectors_LSA_structured_heatmap.py --outputPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorFile vectors_file.txt --groups 2
###########################################################
# MAIN PROGRAM #
###########################################################
def
getGroup
(
v
):
if
np
.
max
(
v
)
==
0
:
index_max
=
len
(
v
)
else
:
index_max
=
np
.
argmax
(
v
)
return
index_max
def
getGroupSign
(
v
):
sign
=
''
vabs
=
[
abs
(
i
)
for
i
in
v
]
if
np
.
max
(
vabs
)
==
0
:
index_max
=
len
(
vabs
)
sign
=
'(+/-)'
else
:
index_max
=
np
.
argmax
(
vabs
)
sign
=
'('
+
str
(
v
[
index_max
])[:
5
]
+
')'
return
index_max
,
sign
if
__name__
==
"__main__"
:
# Parameter definition
parser
=
OptionParser
()
parser
.
add_option
(
"--vectorPath"
,
dest
=
"vectorPath"
,
help
=
"Path to read vector file"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--vectorFile"
,
dest
=
"vectorFile"
,
help
=
"File to read vectors"
,
metavar
=
"FILE"
)
parser
.
add_option
(
"--outputPath"
,
dest
=
"outputPath"
,
help
=
"Path to place clustering classified files"
,
metavar
=
"PATH"
)
parser
.
add_option
(
"--groups"
,
type
=
"int"
,
dest
=
"groups"
,
default
=
0
,
help
=
"Groups"
,
metavar
=
"N"
)
(
options
,
args
)
=
parser
.
parse_args
()
if
len
(
args
)
>
0
:
parser
.
error
(
"None parameters indicated."
)
sys
.
exit
(
1
)
# Printing parameter values
print
(
'-------------------------------- PARAMETERS --------------------------------'
)
print
(
"Path to read vector file: "
+
str
(
options
.
vectorPath
))
print
(
"File to read vectors: "
+
str
(
options
.
vectorFile
))
print
(
"Output path: "
+
str
(
options
.
outputPath
))
print
(
"Groups:"
+
str
(
options
.
groups
))
listVectors
=
[]
listLabels
=
[]
listGroup
=
[]
vectorLen
=
int
(
options
.
groups
)
t0
=
time
()
with
open
(
os
.
path
.
join
(
options
.
vectorPath
,
options
.
vectorFile
),
mode
=
"r"
,
encoding
=
'utf8'
)
as
iFile
:
for
line
in
iFile
.
readlines
():
if
line
.
startswith
(
"#"
):
continue
line
=
line
.
strip
(
'
\r\n
'
)
listLine
=
line
.
split
(
'
\t
'
)
label
=
listLine
[
0
]
vector
=
[]
vectorOrig
=
[]
listValues
=
listLine
[
1
]
.
split
()
if
len
(
listValues
)
!=
vectorLen
:
print
(
"Vector vectorLen does not match: {}"
.
format
(
label
))
continue
for
elem
in
listValues
:
vectorOrig
.
append
(
float
(
elem
))
vector
.
append
(
abs
(
float
(
elem
)))
listVectors
.
append
(
vector
)
#group = getGroupSign(vectorOrig)
group
=
getGroup
(
vectorOrig
)
listGroup
.
append
(
group
[
0
])
#listSign.append(group[1])
listLabels
.
append
(
label
+
group
[
1
])
print
(
" Reading vectors done!"
)
print
(
" Len vectors: "
+
str
(
len
(
listVectors
)))
print
(
" Len labels: "
+
str
(
len
(
listLabels
)))
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
options
.
vectorFile
.
replace
(
'.txt'
,
'.grps.txt'
)),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
for
g
,
l
in
sorted
(
zip
(
listGroup
,
listLabels
)):
oFile
.
write
(
'{}
\t
{}
\n
'
.
format
(
g
,
l
))
with
open
(
os
.
path
.
join
(
options
.
outputPath
,
options
.
vectorFile
.
replace
(
'.txt'
,
'.grps-rows.txt'
)),
mode
=
'w'
,
encoding
=
'utf8'
)
as
oFile
:
g_before
=
0
labels
=
''
for
g
,
l
in
sorted
(
zip
(
listGroup
,
listLabels
)):
if
g
!=
g_before
:
oFile
.
write
(
'{}
\t
{}
\n
'
.
format
(
g_before
,
labels
.
rstrip
(
', '
)))
labels
=
''
g_before
=
g
labels
=
labels
+
l
+
', '
oFile
.
write
(
'{}
\t
{}
\n
'
.
format
(
g_before
,
labels
.
rstrip
(
', '
)))
print
(
" Processing done in
%
fs"
%
(
time
()
-
t0
))
Please
register
or
login
to post a comment