Ignacio Arroyo

Added cluster persistence plots

1 -,iarroyof,bisnaga,19.02.2018 13:28,file:///home/iarroyof/.config/libreoffice/4;
...\ No newline at end of file ...\ No newline at end of file
1 +,iarroyof,bisnaga,20.02.2018 21:25,file:///home/iarroyof/.config/libreoffice/4;
...\ No newline at end of file ...\ No newline at end of file
......
1 +
2 +from pdb import set_trace as st
3 +import os
4 +import matplotlib.pyplot as plt
5 +
6 +
7 +def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors):
8 + dic_part = {}
9 + with open(eigenvectors) as f:
10 +
11 + eigenvector = [c for c in f.readlines() if c.startswith("(" +
12 + str(group_anlized - 1) + ",")][0]
13 + x = eigenvector.split(" ")[1]
14 + eigen = {"string": x,
15 + "bo": x.split("*")[1].replace('"', ''),
16 + "value": float(x.split("*")[0].replace("u'", '').strip())
17 + }
18 +
19 + filelist = os.listdir(gropus_dir)
20 +
21 + init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls"
22 +
23 + with open(gropus_dir + init_gfile, 'r') as f:
24 + groups = f.readlines()
25 + del groups[-1]
26 + tfs = [c for c in groups if c.startswith(str(group_anlized))][0]
27 +
28 + print("The analized group = %s" % tfs.split('\t')[0])
29 + print("The corresponding TFs: %s" % tfs.split('\t')[1])
30 + print("The corresponding eigenvector: %s" % eigenvector)
31 + group = {}
32 + ss = tfs.split(",")
33 +
34 + TFs = {[s for s in tf.strip().partition("(")[0].partition("\t")
35 + if s.replace('-', '').isalpha()][0]: [float(
36 + tf.partition("(")[-1].strip().strip(")"))] for tf in ss}
37 +
38 + partition = []
39 + for i in reversed(range(2, rev)):
40 + #if i.endswith(".cls"): # You could also add "and i.startswith('f')
41 + name = "gus_originales_" + str(i).zfill(3) + ".cls"
42 + with open(gropus_dir + name, 'r') as f:
43 + gropus = f.readlines()
44 + del gropus[-1]
45 +
46 + for c in gropus:
47 + ss = c.split(",")
48 + TFss = {[s for s in tf.strip().partition("(")[0].partition("\t")
49 + if s.replace('-', '').isalpha()][0]: [float(
50 + tf.partition("(")[-1].strip().strip(")"))]
51 + for tf in ss}
52 +
53 + if set((t for t in list(TFs.keys()))) <= set(TFss): # verify if TFs
54 + not_in = set(TFs.keys()) ^ set(list(TFss.keys())) # are in the
55 + for k in list(TFs.keys()): # current cluster
56 + TFs[k] += TFss[k]
57 +
58 + if not_in:
59 + for n in not_in:
60 + TFs[n] = TFss[n]
61 +
62 + partition.append((i, list(TFs.keys()), len(list(TFs.keys()))))
63 +
64 + partition = zip(*partition)
65 + dic_part["part"] = partition[0]
66 + dic_part["tfs"] = partition[1]
67 + dic_part["n_tfs"] = partition[2]
68 +
69 + return dic_part
70 +
71 +
72 +def get_cmap(n, name='hsv'):
73 + '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
74 + RGB color; the keyword argument name must be a standard mpl colormap name.'''
75 + return plt.cm.get_cmap(name, n)
76 +
77 +rev = 120
78 +
79 +eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt"
80 +gropus_dir = "one-by-one/groups/"
81 +
82 +# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos
83 +groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37,
84 + 36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15,
85 + 13, 12, 9]
86 +cmap = get_cmap(len(groups_analyzed))
87 +
88 +plots = []
89 +
90 +for g in groups_analyzed:
91 + partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir,
92 + rev=rev, eigenvectors=eigenvectors)
93 + plots.append(partition)
94 +
95 +fig = plt.figure()
96 +ax1 = fig.add_subplot(111)
97 +
98 +for i, p in enumerate(plots):
99 + if " ".join(p['tfs'][0]) == "Zur ZntR":
100 + width = 4
101 + mark = "D"
102 + else:
103 + width = 2
104 + mark = ""
105 + ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark,
106 + label=" ".join(p['tfs'][0]))
107 +
108 +#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second')
109 +plt.legend(loc='upper right')
110 +plt.title("TF pair persistence through partitions of model resolution")
111 +plt.xlabel("Partition")
112 +plt.ylabel("Number of TFs")
113 +plt.show()
114 +
115 +#print(TFs.keys())
116 +#print(TFss.keys())
117 +#partition = zip(*partition)
118 +#print(partition)
This diff is collapsed. Click to expand it.