Showing
4 changed files
with
119 additions
and
1 deletions
analyze_pair.py
0 → 100644
1 | + | ||
2 | +from pdb import set_trace as st | ||
3 | +import os | ||
4 | +import matplotlib.pyplot as plt | ||
5 | + | ||
6 | + | ||
7 | +def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors): | ||
8 | + dic_part = {} | ||
9 | + with open(eigenvectors) as f: | ||
10 | + | ||
11 | + eigenvector = [c for c in f.readlines() if c.startswith("(" + | ||
12 | + str(group_anlized - 1) + ",")][0] | ||
13 | + x = eigenvector.split(" ")[1] | ||
14 | + eigen = {"string": x, | ||
15 | + "bo": x.split("*")[1].replace('"', ''), | ||
16 | + "value": float(x.split("*")[0].replace("u'", '').strip()) | ||
17 | + } | ||
18 | + | ||
19 | + filelist = os.listdir(gropus_dir) | ||
20 | + | ||
21 | + init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls" | ||
22 | + | ||
23 | + with open(gropus_dir + init_gfile, 'r') as f: | ||
24 | + groups = f.readlines() | ||
25 | + del groups[-1] | ||
26 | + tfs = [c for c in groups if c.startswith(str(group_anlized))][0] | ||
27 | + | ||
28 | + print("The analized group = %s" % tfs.split('\t')[0]) | ||
29 | + print("The corresponding TFs: %s" % tfs.split('\t')[1]) | ||
30 | + print("The corresponding eigenvector: %s" % eigenvector) | ||
31 | + group = {} | ||
32 | + ss = tfs.split(",") | ||
33 | + | ||
34 | + TFs = {[s for s in tf.strip().partition("(")[0].partition("\t") | ||
35 | + if s.replace('-', '').isalpha()][0]: [float( | ||
36 | + tf.partition("(")[-1].strip().strip(")"))] for tf in ss} | ||
37 | + | ||
38 | + partition = [] | ||
39 | + for i in reversed(range(2, rev)): | ||
40 | + #if i.endswith(".cls"): # You could also add "and i.startswith('f') | ||
41 | + name = "gus_originales_" + str(i).zfill(3) + ".cls" | ||
42 | + with open(gropus_dir + name, 'r') as f: | ||
43 | + gropus = f.readlines() | ||
44 | + del gropus[-1] | ||
45 | + | ||
46 | + for c in gropus: | ||
47 | + ss = c.split(",") | ||
48 | + TFss = {[s for s in tf.strip().partition("(")[0].partition("\t") | ||
49 | + if s.replace('-', '').isalpha()][0]: [float( | ||
50 | + tf.partition("(")[-1].strip().strip(")"))] | ||
51 | + for tf in ss} | ||
52 | + | ||
53 | + if set((t for t in list(TFs.keys()))) <= set(TFss): # verify if TFs | ||
54 | + not_in = set(TFs.keys()) ^ set(list(TFss.keys())) # are in the | ||
55 | + for k in list(TFs.keys()): # current cluster | ||
56 | + TFs[k] += TFss[k] | ||
57 | + | ||
58 | + if not_in: | ||
59 | + for n in not_in: | ||
60 | + TFs[n] = TFss[n] | ||
61 | + | ||
62 | + partition.append((i, list(TFs.keys()), len(list(TFs.keys())))) | ||
63 | + | ||
64 | + partition = zip(*partition) | ||
65 | + dic_part["part"] = partition[0] | ||
66 | + dic_part["tfs"] = partition[1] | ||
67 | + dic_part["n_tfs"] = partition[2] | ||
68 | + | ||
69 | + return dic_part | ||
70 | + | ||
71 | + | ||
72 | +def get_cmap(n, name='hsv'): | ||
73 | + '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct | ||
74 | + RGB color; the keyword argument name must be a standard mpl colormap name.''' | ||
75 | + return plt.cm.get_cmap(name, n) | ||
76 | + | ||
77 | +rev = 120 | ||
78 | + | ||
79 | +eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt" | ||
80 | +gropus_dir = "one-by-one/groups/" | ||
81 | + | ||
82 | +# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos | ||
83 | +groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37, | ||
84 | + 36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15, | ||
85 | + 13, 12, 9] | ||
86 | +cmap = get_cmap(len(groups_analyzed)) | ||
87 | + | ||
88 | +plots = [] | ||
89 | + | ||
90 | +for g in groups_analyzed: | ||
91 | + partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir, | ||
92 | + rev=rev, eigenvectors=eigenvectors) | ||
93 | + plots.append(partition) | ||
94 | + | ||
95 | +fig = plt.figure() | ||
96 | +ax1 = fig.add_subplot(111) | ||
97 | + | ||
98 | +for i, p in enumerate(plots): | ||
99 | + if " ".join(p['tfs'][0]) == "Zur ZntR": | ||
100 | + width = 4 | ||
101 | + mark = "D" | ||
102 | + else: | ||
103 | + width = 2 | ||
104 | + mark = "" | ||
105 | + ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark, | ||
106 | + label=" ".join(p['tfs'][0])) | ||
107 | + | ||
108 | +#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second') | ||
109 | +plt.legend(loc='upper right') | ||
110 | +plt.title("TF pair persistence through partitions of model resolution") | ||
111 | +plt.xlabel("Partition") | ||
112 | +plt.ylabel("Number of TFs") | ||
113 | +plt.show() | ||
114 | + | ||
115 | +#print(TFs.keys()) | ||
116 | +#print(TFss.keys()) | ||
117 | +#partition = zip(*partition) | ||
118 | +#print(partition) |
csv
0 → 100644
This diff is collapsed. Click to expand it.
persistence.png
0 → 100644

162 KB
-
Please register or login to post a comment