Ignacio Arroyo

Added cluster persistence plots

1 -,iarroyof,bisnaga,19.02.2018 13:28,file:///home/iarroyof/.config/libreoffice/4;
...\ No newline at end of file ...\ No newline at end of file
1 +,iarroyof,bisnaga,20.02.2018 21:25,file:///home/iarroyof/.config/libreoffice/4;
...\ No newline at end of file ...\ No newline at end of file
......
1 +
2 +from pdb import set_trace as st
3 +import os
4 +import matplotlib.pyplot as plt
5 +
6 +
7 +def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors):
8 + dic_part = {}
9 + with open(eigenvectors) as f:
10 +
11 + eigenvector = [c for c in f.readlines() if c.startswith("(" +
12 + str(group_anlized - 1) + ",")][0]
13 + x = eigenvector.split(" ")[1]
14 + eigen = {"string": x,
15 + "bo": x.split("*")[1].replace('"', ''),
16 + "value": float(x.split("*")[0].replace("u'", '').strip())
17 + }
18 +
19 + filelist = os.listdir(gropus_dir)
20 +
21 + init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls"
22 +
23 + with open(gropus_dir + init_gfile, 'r') as f:
24 + groups = f.readlines()
25 + del groups[-1]
26 + tfs = [c for c in groups if c.startswith(str(group_anlized))][0]
27 +
28 + print("The analized group = %s" % tfs.split('\t')[0])
29 + print("The corresponding TFs: %s" % tfs.split('\t')[1])
30 + print("The corresponding eigenvector: %s" % eigenvector)
31 + group = {}
32 + ss = tfs.split(",")
33 +
34 + TFs = {[s for s in tf.strip().partition("(")[0].partition("\t")
35 + if s.replace('-', '').isalpha()][0]: [float(
36 + tf.partition("(")[-1].strip().strip(")"))] for tf in ss}
37 +
38 + partition = []
39 + for i in reversed(range(2, rev)):
40 + #if i.endswith(".cls"): # You could also add "and i.startswith('f')
41 + name = "gus_originales_" + str(i).zfill(3) + ".cls"
42 + with open(gropus_dir + name, 'r') as f:
43 + gropus = f.readlines()
44 + del gropus[-1]
45 +
46 + for c in gropus:
47 + ss = c.split(",")
48 + TFss = {[s for s in tf.strip().partition("(")[0].partition("\t")
49 + if s.replace('-', '').isalpha()][0]: [float(
50 + tf.partition("(")[-1].strip().strip(")"))]
51 + for tf in ss}
52 +
53 + if set((t for t in list(TFs.keys()))) <= set(TFss): # verify if TFs
54 + not_in = set(TFs.keys()) ^ set(list(TFss.keys())) # are in the
55 + for k in list(TFs.keys()): # current cluster
56 + TFs[k] += TFss[k]
57 +
58 + if not_in:
59 + for n in not_in:
60 + TFs[n] = TFss[n]
61 +
62 + partition.append((i, list(TFs.keys()), len(list(TFs.keys()))))
63 +
64 + partition = zip(*partition)
65 + dic_part["part"] = partition[0]
66 + dic_part["tfs"] = partition[1]
67 + dic_part["n_tfs"] = partition[2]
68 +
69 + return dic_part
70 +
71 +
72 +def get_cmap(n, name='hsv'):
73 + '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
74 + RGB color; the keyword argument name must be a standard mpl colormap name.'''
75 + return plt.cm.get_cmap(name, n)
76 +
77 +rev = 120
78 +
79 +eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt"
80 +gropus_dir = "one-by-one/groups/"
81 +
82 +# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos
83 +groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37,
84 + 36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15,
85 + 13, 12, 9]
86 +cmap = get_cmap(len(groups_analyzed))
87 +
88 +plots = []
89 +
90 +for g in groups_analyzed:
91 + partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir,
92 + rev=rev, eigenvectors=eigenvectors)
93 + plots.append(partition)
94 +
95 +fig = plt.figure()
96 +ax1 = fig.add_subplot(111)
97 +
98 +for i, p in enumerate(plots):
99 + if " ".join(p['tfs'][0]) == "Zur ZntR":
100 + width = 4
101 + mark = "D"
102 + else:
103 + width = 2
104 + mark = ""
105 + ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark,
106 + label=" ".join(p['tfs'][0]))
107 +
108 +#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second')
109 +plt.legend(loc='upper right')
110 +plt.title("TF pair persistence through partitions of model resolution")
111 +plt.xlabel("Partition")
112 +plt.ylabel("Number of TFs")
113 +plt.show()
114 +
115 +#print(TFs.keys())
116 +#print(TFss.keys())
117 +#partition = zip(*partition)
118 +#print(partition)
1 +-------------------------------------------------
2 +protein] hpr phosphorylated_ext h [glycine-cleavage 6-phosphate l-glutamate complex ketoglutarate co2
3 +phosphorylated_ext hpr 6-phosphate ptsh protein] h [glycine-cleavage arbutin-6p complex l-glutamate
4 +l-glutamate protein] gaba h [glycine-cleavage complex gaba_ext n6-lipoyl-l-lysine l-glutamate_ext trans-rxn-92
5 +ribose-1-phosphate inosine adenosine xanthosine guanosine deoxy-d-ribose vitamin xanthosine_ext adenine 1-phosphate
6 +trans-rxn-92 drug drug_ext l-glutamate ketoglutarate acrab acrab_mrna marrab_mrna marrab gaba
7 +csgdefg csgdefg_mrna phosphate csga rxn-5068 csga_ext csge-csgg flhdc_mrna flhd-flhc flhdc
8 +phosphate gluconate glyceraldehyde-p xylulose-p csgdefg_mrna csgdefg 6-p fructose-6-p arbutin-6p dhap
9 +vitamin nitrite fumarate menaquinol k2 aspartate phosphate oxygen ribose-1-phosphate gluconate
10 +phosphate 6-phosphate acetyl-coa beta-d-glucose-6-p xylulose gluconate arbutin-6p 6-p ribulose-5-p xylulose-p
11 +gluconate 6-phosphate arbutin-6p salicin-6-p beta-d-glucose-6-p 6-p xylulose-p bglg bglgfb bglgfb_mrna
12 +acpp trans hexadecenoyl-[acp] malonyl-s-acp acetyl-coa ester phosphate coenzyme fatty beta-hydroxymyristoyl-[acp]
13 +oxygen ketoglutarate acid flda thioredoxin vitamin acpp csgdefg_mrna csgdefg nitrite
14 +thioredoxin udpg udp-galactose d-glucose flda 1-phosphate galactose-1-p disulfide amylose acetyl-coa
15 +udpg udp-galactose thioredoxin d-glucose 1-phosphate oxygen galactose-1-p acetyl-coa alanine ketoglutarate
16 +gluconate xylulose ribulose-5-p 3-keto-l-gulonate ribulose 5p xylulose-p kdgp thioredoxin lactaldehyde
17 +alanine desulfurase] persulfide [l-cysteine l-cysteine cysteines protein unsulfurated donor] [sulfur
18 +thioredoxin flda flhd-flhc flhdc_mrna flhdc rela-mazef_mrna rela-mazef mazf-maze 7,8-dihydrofolate disulfide
19 +flhd-flhc flhdc_mrna flhdc flhc flhd csgdefg_mrna csgdefg gltbdf_mrna gltbdf thioredoxin
20 +acetyl-coa acpp ketoglutarate tdcabcdefg_mrna tdcabcdefg trans coenzyme hyaabcdef_mrna hyaabcdef hexadecenoyl-[acp]
21 +tdcabcdefg tdcabcdefg_mrna tdca serine serine_ext dehydroalanine propanoyl-p tdce threonine_ext tdcd
22 +poly-beta-1,6-n-acetyl-d-glucosamine n-deacetylated partially poly-beta-1,6-n-acetyl-d-glucosamine_ext 7,8-dihydrofolate mazefg_mrna mazefg acetyl-coa biodex coenzyme
23 +relbe-hokd_mrna relb-rele relbe-hokd relb rele hokd 7,8-dihydrofolate yefm-yoeb poly-beta-1,6-n-acetyl-d-glucosamine yoeb-yefm
24 +poly-beta-1,6-n-acetyl-d-glucosamine 7,8-dihydrofolate guanosine-diphosphate mazefg_mrna mazefg n-deacetylated partially trna poly-beta-1,6-n-acetyl-d-glucosamine_ext glyceraldehyde-p
25 +zinc zinc_ext zint poly-beta-1,6-n-acetyl-d-glucosamine zint_mrna acetyl-coa n-deacetylated partially poly-beta-1,6-n-acetyl-d-glucosamine_ext coenzyme
26 +yefm-yoeb yefm-yoeb_mrna yoeb-yefm yefm yoeb zinc zinc_ext zint poly-beta-1,6-n-acetyl-d-glucosamine acetyl-coa
27 +poly-beta-1,6-n-acetyl-d-glucosamine zinc partially n-deacetylated acetyl-coa poly-beta-1,6-n-acetyl-d-glucosamine_ext zinc_ext coenzyme fmn flda
28 +glyceraldehyde-p dhap 1,6-diphosphate tagatofuranose phosphate fructose-6-p 3-keto-l-gulonate ribulose-5-p tagaaldol-rxn sedoheptulose-7-p
29 +biodex lysine ketoglutarate 1-_ext cadba_mrna cadba thioredoxin acid lysine_ext cadc
30 +dhap fructose-6-p lactaldehyde rhasr rhasr_mrna rhamnose rhar xylulose-p glyceraldehyde-p rhas
31 +carbonate methionine oxygen succinate fumarate aminoacrylate glutamine hyaabcdef_mrna hyaabcdef aspartate
32 +fructuronate uidabc_mrna uidabc uxur mannonate hyaabcdef_mrna hyaabcdef uxuab_mrna uxuab appcbxa
33 +hyaabcdef_mrna hyaabcdef fructuronate appcbxa_mrna appcbxa uidabc_mrna uidabc flda uxur methionine
34 +glycerate tartronate-s-ald ureidoglycolate glyoxylate allantoate 5-kdg allantoin alls methionine 2-p-d-glycerate
35 +methionine ketoglutarate rela-mazef rela-mazef_mrna 7,8-dihydrofolate biodex amylose thioredoxin hipb-hipa lysine
36 +oxygen methionine ketoglutarate 2,3-dihydroxycinnamic acetyl-coa biodex 2,3-dhp fmn gaba lysine
37 +7,8-dihydrofolate amylose rela-mazef_mrna rela-mazef trna hipb-hipa methylene-h4pteglu(n) hipba hipba_mrna ppgpp
38 +fmn flavin amylose sulfite mononucleotide sulfide thioredoxin d-glucose flda maltohexaose
39 +amylose fmn acid flavin d-glucose 1-phosphate 2,3-dihydroxycinnamic carbonate maltohexaose fumarate
40 +fructose-6-p 1,6-diphosphate tagatofuranose rhasr rhasr_mrna rhar rhamnose fumarate rhas tagaaldol-rxn
41 +puta flda glutamate-semialdehyde 5-carboxylate pyrroline thioredoxin proline qsebc_mrna qsebc fumarate
42 +hyfabcdefghijr-focb_mrna hyfabcdefghijr-focb hydrogenase hydrogen electron hyfg-hyfi-hyfa-hyfb-hyfc-hyfd-hyfe-hyff-hyfh mvhmeth-rxn 3 aidb vitamin
43 +carbonate fumarate puta glutamate-semialdehyde 5-carboxylate pyrroline flda succinate_ext vitamin malate
44 +flda lactate with peptidoglycan glycolate sorbitol lactate_ext nascent srlaebd-gutm-srlr-gutq srlaebd-gutm-srlr-gutq_mrna
45 +ion cobalt nickel nickel_ext arsenite nikabcder_mrna nikabcder melibiose nikd-nike-nikb-nikc-nika ion_ext
46 +aidb carbonate marrab_mrna marrab co2 hyfabcdefghijr-focb_mrna hyfabcdefghijr-focb aidb_mrna dna hydrogenase
47 +lactose melibiose marrab marrab_mrna lactose_ext melibiose_ext flda sgrst-seta sgrst-seta_mrna lactate
48 +aidb sorbitol flda srlaebd-gutm-srlr-gutq_mrna srlaebd-gutm-srlr-gutq tryptophan carbonate marrab_mrna marrab diacetylchitobiose
49 +tryptophan indole flda serine fructose-6-p vitamin chorismate shikimate shikimate-5-p tryptophan_ext
50 +lactose melibiose marrab_mrna marrab lactose_ext flda lactate melibiose_ext sgrst-seta sgrst-seta_mrna
51 +aicar 5\'-p-ribosylglycinamide faicar thf polyglutamate 5-phosphoribosylamine glycine aidb lactate lactose
52 +aidb lactate lipid glycolate aicar acetyl-coa 5\'-p-ribosylglycinamide i carbonate faicar
53 +ribulose dhap 1,6-diphosphate tagatofuranose glycoaldehyde 3-keto-l-gulonate lactaldehyde rhasr_mrna rhasr rhar
54 +ribulose beta-neu5ac n-acetylneuraminate short-chain nickel alpha-neu5ac 3-keto-l-gulonate fe+2 alcohol lactate
55 +fructose-6-p cu(i) cu(i)_ext carbonate silver alpha-d-mannose-6-p silver_ext trans-rxn-90 cuscfba_mrna cuscfba
56 +aidb (2s,3s)-2-methylcitrate carbonate cu(i) nemra-gloa nemra-gloa_mrna propanoyl-coa nemr cu(i)_ext tartrate
57 +fliaz-tcyj_mrna fliaz-tcyj flifghijk_mrna flifghijk aidb flda glyoxylate vitamin i-cit coenzyme
58 +ureidoglycolate allantoate glycerate glycerol-3-phosphate lactate arsenite fliaz-tcyj fliaz-tcyj_mrna fe+2 flifghijk_mrna
59 +3-keto-l-gulonate 5p 7 p-enol-pyruvate aidb carbonate tartrate ureidoglycolate ribulose dhap
60 +ureidoglycolate hyacinthin allantoate glycerate phenylacetate flda acetyl-coa 5-kdg coenzyme allantoin
61 +fe+2 mn(ii) flda mn(ii)_ext 3-keto-l-gulonate ftsq acetyl-coa hns beta-neu5ac 5p
62 +tartrate mn(ii) hyacinthin arsenite fe+2 nemra-gloa nemra-gloa_mrna nemr (2s,3s)-2-methylcitrate mn(ii)_ext
63 +arsenite glycerol-3-phosphate hyacinthin tartrate melibiose alanine sgrst-seta sgrst-seta_mrna rcho alcohol
64 +hyacinthin tartrate glycerol-3-phosphate arsenite phenylacetate aspartate mn(ii) 2,3-dihydroxycinnamic fe+2 rpos
65 +uidabc uidabc_mrna hyacinthin exut ftsq tartrate ureidoglycolate rpos i-cit lyase
66 +hyacinthin melibiose betaine glycerol-3-phosphate sgrst-seta_mrna sgrst-seta choline melibiose_ext mn(ii) lactate
67 +rpos rpos_mrna rpob-rpoc-rpoa-rpos hns dna dinj-yafq hns_mrna ureidoglycolate double hyacinthin
68 +ftsq ftsq-ftsb-ftsl rna tartrate dinj-yafq norr gadw lactate norvw_mrna norvw
69 +norr norvw norvw_mrna hyacinthin ureidoglycolate dinj-yafq rna norr_mrna flrd betaine
70 +ftsq dinj-yafq ftsq-ftsb-ftsl gadw yafq-dinj dinj-yafq_mrna rna ftsz ftsa gadw_mrna
71 +lysa rna lysa_mrna dinj-yafq yafq-dinj dinj-yafq_mrna rpos yafq dinj hns
72 +bola rcho alcohol rna lysa arsenite poly-beta-1,6-n-acetyl-d-glucosamine lysa_mrna rpos melibiose
73 +maly bola ftsq lysa rna rcho nemra-gloa_mrna nemra-gloa nemr ftsq-ftsb-ftsl
74 +rna lysa rcho alcohol lysa_mrna bola rpos hns dinj-yafq guanylate
75 +rcho lysa alcohol betaine lysa_mrna choline rna aldehyde nemr nemra-gloa
76 +rcho alcohol arsenite rna hns betaine rpos choline aldehyde dna
77 +hns dna lysa rpos hns_mrna double aidb rcho betaine stranded
78 +tartrate betaine hyacinthin nemr nemra-gloa_mrna nemra-gloa choline norr aidb aldehyde
79 +uidabc uidabc_mrna arsenite melibiose hyacinthin sgrst-seta sgrst-seta_mrna dinj-yafq 7 uxur
80 +tartrate hyacinthin serine dsdc (2s,3s)-2-methylcitrate propanoyl-coa aspartate acetyl-coa melibiose succinate_ext
81 +serine dsdc mn(ii) arsenite serine_ext mn(ii)_ext mdtabcd-baesr mdtabcd-baesr_mrna dehydroalanine uidabc
82 +mn(ii) mdtabcd-baesr_mrna mdtabcd-baesr mn(ii)_ext tartrate fe+2 dps mntp mannitol-1-p marrab
83 +arsenite moaabcde_mrna moaabcde cpmp fe+2 mn(ii) synthase small molybdopterin citcdefxg_mrna
84 +serine dsdc vitamin tartrate serine_ext (2s,3s)-2-methylcitrate mn(ii) propanoyl-coa fe+2 methylisocitrate
85 +mannitol-1-p mtladr mtladr_mrna 6-phosphate mtlr alpha-d-mannose-6-p moaabcde_mrna moaabcde cpmp mannitol_ext
86 +pimelyl-[acp] melibiose ester arsenite alanine malonyl-s-acp mdtabcd-baesr_mrna mdtabcd-baesr methyl dinj-yafq
87 +glycerol-3-phosphate arsenite vitamin mannitol-1-p mdtabcd-baesr mdtabcd-baesr_mrna mtladr_mrna mtladr gltbdf_mrna gltbdf
88 +alanine vitamin dinj-yafq lipid aspartate tyrosine flda i citcdefxg_mrna citcdefxg
89 +aspartate soxr glycerol-3-phosphate fumc fumhydr-rxn fumac_mrna fumac soxs thf polyglutamate
90 +qsebc qsebc_mrna gltbdf_mrna gltbdf glycerol-3-phosphate qseb gltd-gltb aspartate puta ftsq
91 +pimelyl-[acp] ester methyl co2 ferredoxin [2fe-2s] vitamin flda gltbdf_mrna gltbdf
92 +glrr glycerol-3-phosphate lactaldehyde ribulose glycoaldehyde xylopyranose xylab_mrna xylab aspartate tyrosine
93 +arsenite aspartate glycerol-3-phosphate short-chain rcho thf polyglutamate p-enol-pyruvate n5-methyl-tetrahydrofolate hyacinthin
94 +mdtabcd-baesr mdtabcd-baesr_mrna malonyl-coa vitamin glrr acetyl-coa-carboxyltransfer-rxn accd-acca-accc-accb accbc_mrna accbc accb
95 +glrr vitamin aspartate alanine short-chain (2s,3s)-2-methylcitrate arsenite thf polyglutamate soxr
96 +glrr putrescine_ext putrescine 5-phosphate gaba vitamin glycerol-3-phosphate tyrosine phosphate acetyl-coa
97 +p-enol-pyruvate glrr ribulose 7 2 lactaldehyde vitamin tyrosine glycoaldehyde beta-neu5ac
98 +diacetylchitobiose beta-d-cellobiose-6\'-p ptsh lipid phosphorylated_ext hpr i isocitrate-dehydrogenase 6\'-phosphate 6-phosphate
99 +putrescine_ext glrr putrescine n2-succinylornithine alanine n2-succinylarginine arginine gaba 2-acetamido-5-oxopentanoate tyrosine
100 +alanine lactose vitamin tyrosine qsebc qsebc_mrna aminoacrylate melibiose pimelyl-[acp] d-glucose
101 +alanine pimelyl-[acp] lactose melibiose ester putrescine_ext putrescine aminoacrylate tyrosine qsebc_mrna
102 +nitrate_ext vitamin narghji narghji_mrna tyrosine menaquinol_ext te0 narg-narh-nari rxn0-7124 rxn-15119
103 +kdgp eda 4-hydroxy-2-oxoglutarate 5-ketogluconate edd-eda_mrna edd-eda glyceraldehyde-p idonate 6-p sedoheptulose-7-p
104 +d-glucose succinate aminoacrylate n2-succinylornithine ketoglutarate amylose tyrosine succ-s-ald aidb n2-succinylarginine
105 +lactate glcdefgba glcdefgba_mrna glycolate lactate_ext 5-kdg vitamin kdgp isocitrate-dehydrogenase 6-phosphate
106 +cobalt nikabcder nikabcder_mrna nikd-nike-nikb-nikc-nika abc-20-rxn ion arsenite rcnab rcnab_mrna malonyl-s-acp
107 +putrescine_ext putrescine cobalt carbonate malonyl-s-acp fatty malonyl-coa carbamate tyrosine carbamoyl-p
108 +carbonate carbamate putrescine_ext cobalt putrescine alanine carbamoyl-p glutamine malonyl-coa pimelyl-[acp]
109 +6-phosphate beta-d-cellobiose-6\'-p salicin-6-p prpr glycerate glucose-6-p amylose lactose beta-d-cellobiose_ext arbutin-6p
110 +lactose sgrst-seta sgrst-seta_mrna melibiose cobalt alanine glucopyranose beta-d-cellobiose-6\'-p lactose_ext nikabcder
111 +malonyl-s-acp 5-phosphate fatty alpha-d-mannose-6-p beta-d-glucose-6-p co2 ribulose-5p ribose-5p ester chorismate
112 +amylose lactose d-glucose alpha-glucose 1-phosphate succinate 6-phosphate beta-d-cellobiose-6\'-p maltohexaose aspartate
113 +tyrosine amylose maltohexaose aminoacrylate qsebc_mrna qsebc flifghijk flifghijk_mrna d-glucose alpha-glucose
114 +alpha-d-mannose-6-p ribulose-5p ribose-5p beta-d-glucose-6-p 5-phosphate malonyl-s-acp phenylacetate suc-coa hyacinthin fatty
115 +aspartate tyrosine alpha-d-mannose-6-p 5-phosphate tartrate hydrogenase fumarate oxygen ribulose-5p acetyl-coa
116 +hydrogenase 5-phosphate 3 tyrosine hyfabcdefghijr-focb hyfabcdefghijr-focb_mrna mvhmeth-rxn hyfg-hyfi-hyfa-hyfb-hyfc-hyfd-hyfe-hyff-hyfh electron formate
117 +6-phosphate diacetylchitobiose 5-phosphate hydrogenase sorbitol n-monoacetylchitobiose trer alpha-d-glucosamine 6\'-phosphate chitobiose_ext
118 +5-phosphate hydrogenase flic flifghijk flifghijk_mrna 6-phosphate fliaz-tcyj_mrna fliaz-tcyj gade-mdtef_mrna gade-mdtef
119 +5-phosphate aspartate phenylacetate suc-coa coenzyme alpha-d-mannose-6-p succinate 6-phosphate vitamin hyacinthin
120 +5-phosphate flic acetyl-coa csgbac_mrna csgbac aspartate fliaz-tcyj_mrna fliaz-tcyj flifghijk flifghijk_mrna
121 +glycerate ptsh dehydrogenase e2 [pyruvate h [glycine-cleavage 5-kdg 6-phosphate complex
122 +-------------------------------------------------