Showing
1 changed file
with
115 additions
and
0 deletions
1 | +import os | ||
2 | +from optparse import OptionParser | ||
3 | +import sys | ||
4 | +from time import time | ||
5 | +import re | ||
6 | +import numpy as np | ||
7 | + | ||
8 | +# Objective: Obtain groups according to the component with the higher absolute value | ||
9 | + | ||
10 | +# Parameters: | ||
11 | +# 1) --vectorPath Path to read vectors. | ||
12 | +# 2) --vectorFile File to read vectors. | ||
13 | +# 3) --outputPath Path to place output files. | ||
14 | +# 4) --groups Number of groups | ||
15 | + | ||
16 | +# Ouput: | ||
17 | +# 1) File with groups and plots | ||
18 | + | ||
19 | +# Execution: | ||
20 | +# python plot_Vectors_LSA_structured_heatmap.py --outputPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorFile vectors_file.txt --groups 2 | ||
21 | + | ||
22 | +########################################################### | ||
23 | +# MAIN PROGRAM # | ||
24 | +########################################################### | ||
25 | + | ||
26 | +def getGroup(v): | ||
27 | + if np.max(v) == 0: | ||
28 | + index_max = len(v) | ||
29 | + else: | ||
30 | + index_max = np.argmax(v) | ||
31 | + return index_max | ||
32 | + | ||
33 | +def getGroupSign(v): | ||
34 | + sign = '' | ||
35 | + vabs = [abs(i) for i in v] | ||
36 | + if np.max(vabs) == 0: | ||
37 | + index_max = len(vabs) | ||
38 | + sign = '(+/-)' | ||
39 | + else: | ||
40 | + index_max = np.argmax(vabs) | ||
41 | + sign = '(' + str(v[index_max])[:5] + ')' | ||
42 | + return index_max, sign | ||
43 | + | ||
44 | +if __name__ == "__main__": | ||
45 | + # Parameter definition | ||
46 | + parser = OptionParser() | ||
47 | + parser.add_option("--vectorPath", dest="vectorPath", | ||
48 | + help="Path to read vector file", metavar="PATH") | ||
49 | + parser.add_option("--vectorFile", dest="vectorFile", | ||
50 | + help="File to read vectors", metavar="FILE") | ||
51 | + parser.add_option("--outputPath", dest="outputPath", | ||
52 | + help="Path to place clustering classified files", metavar="PATH") | ||
53 | + parser.add_option("--groups", type="int", | ||
54 | + dest="groups", default=0, | ||
55 | + help="Groups", metavar="N") | ||
56 | + | ||
57 | + (options, args) = parser.parse_args() | ||
58 | + if len(args) > 0: | ||
59 | + parser.error("None parameters indicated.") | ||
60 | + sys.exit(1) | ||
61 | + | ||
62 | + # Printing parameter values | ||
63 | + print('-------------------------------- PARAMETERS --------------------------------') | ||
64 | + print("Path to read vector file: " + str(options.vectorPath)) | ||
65 | + print("File to read vectors: " + str(options.vectorFile)) | ||
66 | + print("Output path: " + str(options.outputPath)) | ||
67 | + print("Groups:" + str(options.groups)) | ||
68 | + | ||
69 | + listVectors = [] | ||
70 | + listLabels = [] | ||
71 | + listGroup = [] | ||
72 | + vectorLen = int(options.groups) | ||
73 | + t0 = time() | ||
74 | + with open(os.path.join(options.vectorPath, options.vectorFile), mode="r", encoding='utf8') as iFile: | ||
75 | + for line in iFile.readlines(): | ||
76 | + if line.startswith("#"): | ||
77 | + continue | ||
78 | + line = line.strip('\r\n') | ||
79 | + listLine = line.split('\t') | ||
80 | + label = listLine[0] | ||
81 | + vector = [] | ||
82 | + vectorOrig = [] | ||
83 | + listValues = listLine[1].split() | ||
84 | + if len(listValues) != vectorLen: | ||
85 | + print("Vector vectorLen does not match: {}".format(label)) | ||
86 | + continue | ||
87 | + for elem in listValues: | ||
88 | + vectorOrig.append(float(elem)) | ||
89 | + vector.append(abs(float(elem))) | ||
90 | + listVectors.append(vector) | ||
91 | + #group = getGroupSign(vectorOrig) | ||
92 | + group = getGroup(vectorOrig) | ||
93 | + listGroup.append(group[0]) | ||
94 | + #listSign.append(group[1]) | ||
95 | + listLabels.append(label + group[1]) | ||
96 | + print(" Reading vectors done!") | ||
97 | + print(" Len vectors: " + str(len(listVectors))) | ||
98 | + print(" Len labels: " + str(len(listLabels))) | ||
99 | + | ||
100 | + with open(os.path.join(options.outputPath, options.vectorFile.replace('.txt', '.grps.txt')), mode='w', encoding='utf8') as oFile: | ||
101 | + for g, l in sorted(zip(listGroup, listLabels)): | ||
102 | + oFile.write('{}\t{}\n'.format(g, l)) | ||
103 | + | ||
104 | + with open(os.path.join(options.outputPath, options.vectorFile.replace('.txt', '.grps-rows.txt')), mode='w', encoding='utf8') as oFile: | ||
105 | + g_before = 0 | ||
106 | + labels = '' | ||
107 | + for g, l in sorted(zip(listGroup, listLabels)): | ||
108 | + if g != g_before: | ||
109 | + oFile.write('{}\t{}\n'.format(g_before, labels.rstrip(', '))) | ||
110 | + labels = '' | ||
111 | + g_before = g | ||
112 | + labels = labels + l + ', ' | ||
113 | + oFile.write('{}\t{}\n'.format(g_before, labels.rstrip(', '))) | ||
114 | + | ||
115 | + print(" Processing done in %fs" % (time() - t0)) |
-
Please register or login to post a comment