analyze_pair.py
4.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
from pdb import set_trace as st
import os
import matplotlib.pyplot as plt
def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors):
dic_part = {}
with open(eigenvectors) as f:
eigenvector = [c for c in f.readlines() if c.startswith("(" +
str(group_anlized - 1) + ",")][0]
x = eigenvector.split(" ")[1]
eigen = {"string": x,
"bo": x.split("*")[1].replace('"', ''),
"value": float(x.split("*")[0].replace("u'", '').strip())
}
filelist = os.listdir(gropus_dir)
init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls"
with open(gropus_dir + init_gfile, 'r') as f:
groups = f.readlines()
del groups[-1]
tfs = [c for c in groups if c.startswith(str(group_anlized))][0]
print("The analized group = %s" % tfs.split('\t')[0])
print("The corresponding TFs: %s" % tfs.split('\t')[1])
print("The corresponding eigenvector: %s" % eigenvector)
group = {}
ss = tfs.split(",")
TFs = {[s for s in tf.strip().partition("(")[0].partition("\t")
if s.replace('-', '').isalpha()][0]: [float(
tf.partition("(")[-1].strip().strip(")"))] for tf in ss}
partition = []
for i in reversed(range(2, rev)):
#if i.endswith(".cls"): # You could also add "and i.startswith('f')
name = "gus_originales_" + str(i).zfill(3) + ".cls"
with open(gropus_dir + name, 'r') as f:
gropus = f.readlines()
del gropus[-1]
for c in gropus:
ss = c.split(",")
TFss = {[s for s in tf.strip().partition("(")[0].partition("\t")
if s.replace('-', '').isalpha()][0]: [float(
tf.partition("(")[-1].strip().strip(")"))]
for tf in ss}
if set((t for t in list(TFs.keys()))) <= set(TFss): # verify if TFs
not_in = set(TFs.keys()) ^ set(list(TFss.keys())) # are in the
for k in list(TFs.keys()): # current cluster
TFs[k] += TFss[k]
if not_in:
for n in not_in:
TFs[n] = TFss[n]
partition.append((i, list(TFs.keys()), len(list(TFs.keys()))))
partition = zip(*partition)
dic_part["part"] = partition[0]
dic_part["tfs"] = partition[1]
dic_part["n_tfs"] = partition[2]
return dic_part
def get_cmap(n, name='hsv'):
'''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
RGB color; the keyword argument name must be a standard mpl colormap name.'''
return plt.cm.get_cmap(name, n)
rev = 120
eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt"
gropus_dir = "one-by-one/groups/"
# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos
groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37,
36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15,
13, 12, 9]
cmap = get_cmap(len(groups_analyzed))
plots = []
for g in groups_analyzed:
partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir,
rev=rev, eigenvectors=eigenvectors)
plots.append(partition)
fig = plt.figure()
ax1 = fig.add_subplot(111)
for i, p in enumerate(plots):
if " ".join(p['tfs'][0]) == "Zur ZntR":
width = 4
mark = "D"
else:
width = 2
mark = ""
ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark,
label=" ".join(p['tfs'][0]))
#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second')
plt.legend(loc='upper right')
plt.title("TF pair persistence through partitions of model resolution")
plt.xlabel("Partition")
plt.ylabel("Number of TFs")
plt.show()
#print(TFs.keys())
#print(TFss.keys())
#partition = zip(*partition)
#print(partition)