analyze_pair.py 4.01 KB

Raw Blame History Permalink


from pdb import set_trace as st
import os
import matplotlib.pyplot as plt


def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors):
    dic_part = {}
    with open(eigenvectors) as f:

        eigenvector = [c for c in f.readlines() if c.startswith("(" +
                                            str(group_anlized - 1) + ",")][0]
    x = eigenvector.split(" ")[1]
    eigen = {"string": x,
            "bo": x.split("*")[1].replace('"', ''),
            "value": float(x.split("*")[0].replace("u'", '').strip())
            }

    filelist = os.listdir(gropus_dir)

    init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls"

    with open(gropus_dir + init_gfile, 'r') as f:
        groups = f.readlines()
        del groups[-1]
        tfs = [c for c in groups if c.startswith(str(group_anlized))][0]

        print("The analized group = %s" % tfs.split('\t')[0])
        print("The corresponding TFs: %s" % tfs.split('\t')[1])
        print("The corresponding eigenvector: %s" % eigenvector)
        group = {}
        ss = tfs.split(",")

        TFs = {[s for s in tf.strip().partition("(")[0].partition("\t")
            if s.replace('-', '').isalpha()][0]: [float(
                tf.partition("(")[-1].strip().strip(")"))] for tf in ss}

    partition = []
    for i in reversed(range(2, rev)):
    #if i.endswith(".cls"):  # You could also add "and i.startswith('f')
        name = "gus_originales_" + str(i).zfill(3) + ".cls"
        with open(gropus_dir + name, 'r') as f:
            gropus = f.readlines()
            del gropus[-1]

            for c in gropus:
                ss = c.split(",")
                TFss = {[s for s in tf.strip().partition("(")[0].partition("\t")
                     if s.replace('-', '').isalpha()][0]: [float(
                             tf.partition("(")[-1].strip().strip(")"))]
                                                                for tf in ss}

                if set((t for t in list(TFs.keys()))) <= set(TFss):  # verify if TFs
                    not_in = set(TFs.keys()) ^ set(list(TFss.keys()))  # are in the
                    for k in list(TFs.keys()):                    # current cluster
                        TFs[k] += TFss[k]

                    if not_in:
                        for n in not_in:
                            TFs[n] = TFss[n]

        partition.append((i, list(TFs.keys()), len(list(TFs.keys()))))

    partition = zip(*partition)
    dic_part["part"] = partition[0]
    dic_part["tfs"] = partition[1]
    dic_part["n_tfs"] = partition[2]

    return dic_part


def get_cmap(n, name='hsv'):
    '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
    RGB color; the keyword argument name must be a standard mpl colormap name.'''
    return plt.cm.get_cmap(name, n)

rev = 120

eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt"
gropus_dir = "one-by-one/groups/"

# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos
groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37,
                    36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15,
                    13, 12, 9]
cmap = get_cmap(len(groups_analyzed))

plots = []

for g in groups_analyzed:
    partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir,
                                        rev=rev, eigenvectors=eigenvectors)
    plots.append(partition)

fig = plt.figure()
ax1 = fig.add_subplot(111)

for i, p in enumerate(plots):
    if " ".join(p['tfs'][0]) == "Zur ZntR":
        width = 4
        mark = "D"
    else:
        width = 2
        mark = ""
    ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark,
                                                label=" ".join(p['tfs'][0]))

#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second')
plt.legend(loc='upper right')
plt.title("TF pair persistence through partitions of model resolution")
plt.xlabel("Partition")
plt.ylabel("Number of TFs")
plt.show()

#print(TFs.keys())
#print(TFss.keys())
#partition = zip(*partition)
#print(partition)