Showing
1 changed file
with
115 additions
and
0 deletions
| 1 | +import os | ||
| 2 | +from optparse import OptionParser | ||
| 3 | +import sys | ||
| 4 | +from time import time | ||
| 5 | +import re | ||
| 6 | +import numpy as np | ||
| 7 | + | ||
| 8 | +# Objective: Obtain groups according to the component with the higher absolute value | ||
| 9 | + | ||
| 10 | +# Parameters: | ||
| 11 | +# 1) --vectorPath Path to read vectors. | ||
| 12 | +# 2) --vectorFile File to read vectors. | ||
| 13 | +# 3) --outputPath Path to place output files. | ||
| 14 | +# 4) --groups Number of groups | ||
| 15 | + | ||
| 16 | +# Ouput: | ||
| 17 | +# 1) File with groups and plots | ||
| 18 | + | ||
| 19 | +# Execution: | ||
| 20 | +# python plot_Vectors_LSA_structured_heatmap.py --outputPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorPath /home/compu2/bionlp/lcg-faaa/agrupamiento-datos-categoricos --vectorFile vectors_file.txt --groups 2 | ||
| 21 | + | ||
| 22 | +########################################################### | ||
| 23 | +# MAIN PROGRAM # | ||
| 24 | +########################################################### | ||
| 25 | + | ||
| 26 | +def getGroup(v): | ||
| 27 | + if np.max(v) == 0: | ||
| 28 | + index_max = len(v) | ||
| 29 | + else: | ||
| 30 | + index_max = np.argmax(v) | ||
| 31 | + return index_max | ||
| 32 | + | ||
| 33 | +def getGroupSign(v): | ||
| 34 | + sign = '' | ||
| 35 | + vabs = [abs(i) for i in v] | ||
| 36 | + if np.max(vabs) == 0: | ||
| 37 | + index_max = len(vabs) | ||
| 38 | + sign = '(+/-)' | ||
| 39 | + else: | ||
| 40 | + index_max = np.argmax(vabs) | ||
| 41 | + sign = '(' + str(v[index_max])[:5] + ')' | ||
| 42 | + return index_max, sign | ||
| 43 | + | ||
| 44 | +if __name__ == "__main__": | ||
| 45 | + # Parameter definition | ||
| 46 | + parser = OptionParser() | ||
| 47 | + parser.add_option("--vectorPath", dest="vectorPath", | ||
| 48 | + help="Path to read vector file", metavar="PATH") | ||
| 49 | + parser.add_option("--vectorFile", dest="vectorFile", | ||
| 50 | + help="File to read vectors", metavar="FILE") | ||
| 51 | + parser.add_option("--outputPath", dest="outputPath", | ||
| 52 | + help="Path to place clustering classified files", metavar="PATH") | ||
| 53 | + parser.add_option("--groups", type="int", | ||
| 54 | + dest="groups", default=0, | ||
| 55 | + help="Groups", metavar="N") | ||
| 56 | + | ||
| 57 | + (options, args) = parser.parse_args() | ||
| 58 | + if len(args) > 0: | ||
| 59 | + parser.error("None parameters indicated.") | ||
| 60 | + sys.exit(1) | ||
| 61 | + | ||
| 62 | + # Printing parameter values | ||
| 63 | + print('-------------------------------- PARAMETERS --------------------------------') | ||
| 64 | + print("Path to read vector file: " + str(options.vectorPath)) | ||
| 65 | + print("File to read vectors: " + str(options.vectorFile)) | ||
| 66 | + print("Output path: " + str(options.outputPath)) | ||
| 67 | + print("Groups:" + str(options.groups)) | ||
| 68 | + | ||
| 69 | + listVectors = [] | ||
| 70 | + listLabels = [] | ||
| 71 | + listGroup = [] | ||
| 72 | + vectorLen = int(options.groups) | ||
| 73 | + t0 = time() | ||
| 74 | + with open(os.path.join(options.vectorPath, options.vectorFile), mode="r", encoding='utf8') as iFile: | ||
| 75 | + for line in iFile.readlines(): | ||
| 76 | + if line.startswith("#"): | ||
| 77 | + continue | ||
| 78 | + line = line.strip('\r\n') | ||
| 79 | + listLine = line.split('\t') | ||
| 80 | + label = listLine[0] | ||
| 81 | + vector = [] | ||
| 82 | + vectorOrig = [] | ||
| 83 | + listValues = listLine[1].split() | ||
| 84 | + if len(listValues) != vectorLen: | ||
| 85 | + print("Vector vectorLen does not match: {}".format(label)) | ||
| 86 | + continue | ||
| 87 | + for elem in listValues: | ||
| 88 | + vectorOrig.append(float(elem)) | ||
| 89 | + vector.append(abs(float(elem))) | ||
| 90 | + listVectors.append(vector) | ||
| 91 | + #group = getGroupSign(vectorOrig) | ||
| 92 | + group = getGroup(vectorOrig) | ||
| 93 | + listGroup.append(group[0]) | ||
| 94 | + #listSign.append(group[1]) | ||
| 95 | + listLabels.append(label + group[1]) | ||
| 96 | + print(" Reading vectors done!") | ||
| 97 | + print(" Len vectors: " + str(len(listVectors))) | ||
| 98 | + print(" Len labels: " + str(len(listLabels))) | ||
| 99 | + | ||
| 100 | + with open(os.path.join(options.outputPath, options.vectorFile.replace('.txt', '.grps.txt')), mode='w', encoding='utf8') as oFile: | ||
| 101 | + for g, l in sorted(zip(listGroup, listLabels)): | ||
| 102 | + oFile.write('{}\t{}\n'.format(g, l)) | ||
| 103 | + | ||
| 104 | + with open(os.path.join(options.outputPath, options.vectorFile.replace('.txt', '.grps-rows.txt')), mode='w', encoding='utf8') as oFile: | ||
| 105 | + g_before = 0 | ||
| 106 | + labels = '' | ||
| 107 | + for g, l in sorted(zip(listGroup, listLabels)): | ||
| 108 | + if g != g_before: | ||
| 109 | + oFile.write('{}\t{}\n'.format(g_before, labels.rstrip(', '))) | ||
| 110 | + labels = '' | ||
| 111 | + g_before = g | ||
| 112 | + labels = labels + l + ', ' | ||
| 113 | + oFile.write('{}\t{}\n'.format(g_before, labels.rstrip(', '))) | ||
| 114 | + | ||
| 115 | + print(" Processing done in %fs" % (time() - t0)) |
-
Please register or login to post a comment