Ignacio Arroyo

Added cluster persistence plots

,iarroyof,bisnaga,19.02.2018 13:28,file:///home/iarroyof/.config/libreoffice/4;
\ No newline at end of file
,iarroyof,bisnaga,20.02.2018 21:25,file:///home/iarroyof/.config/libreoffice/4;
\ No newline at end of file
......
from pdb import set_trace as st
import os
import matplotlib.pyplot as plt
def analyze_pair(group_anlized, gropus_dir, rev, eigenvectors):
dic_part = {}
with open(eigenvectors) as f:
eigenvector = [c for c in f.readlines() if c.startswith("(" +
str(group_anlized - 1) + ",")][0]
x = eigenvector.split(" ")[1]
eigen = {"string": x,
"bo": x.split("*")[1].replace('"', ''),
"value": float(x.split("*")[0].replace("u'", '').strip())
}
filelist = os.listdir(gropus_dir)
init_gfile = "gus_originales_" + str(rev).zfill(3) + ".cls"
with open(gropus_dir + init_gfile, 'r') as f:
groups = f.readlines()
del groups[-1]
tfs = [c for c in groups if c.startswith(str(group_anlized))][0]
print("The analized group = %s" % tfs.split('\t')[0])
print("The corresponding TFs: %s" % tfs.split('\t')[1])
print("The corresponding eigenvector: %s" % eigenvector)
group = {}
ss = tfs.split(",")
TFs = {[s for s in tf.strip().partition("(")[0].partition("\t")
if s.replace('-', '').isalpha()][0]: [float(
tf.partition("(")[-1].strip().strip(")"))] for tf in ss}
partition = []
for i in reversed(range(2, rev)):
#if i.endswith(".cls"): # You could also add "and i.startswith('f')
name = "gus_originales_" + str(i).zfill(3) + ".cls"
with open(gropus_dir + name, 'r') as f:
gropus = f.readlines()
del gropus[-1]
for c in gropus:
ss = c.split(",")
TFss = {[s for s in tf.strip().partition("(")[0].partition("\t")
if s.replace('-', '').isalpha()][0]: [float(
tf.partition("(")[-1].strip().strip(")"))]
for tf in ss}
if set((t for t in list(TFs.keys()))) <= set(TFss): # verify if TFs
not_in = set(TFs.keys()) ^ set(list(TFss.keys())) # are in the
for k in list(TFs.keys()): # current cluster
TFs[k] += TFss[k]
if not_in:
for n in not_in:
TFs[n] = TFss[n]
partition.append((i, list(TFs.keys()), len(list(TFs.keys()))))
partition = zip(*partition)
dic_part["part"] = partition[0]
dic_part["tfs"] = partition[1]
dic_part["n_tfs"] = partition[2]
return dic_part
def get_cmap(n, name='hsv'):
'''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct
RGB color; the keyword argument name must be a standard mpl colormap name.'''
return plt.cm.get_cmap(name, n)
rev = 120
eigenvectors = "one-by-one/eigen_vectors/eigenBOs_120-eigens.txt"
gropus_dir = "one-by-one/groups/"
# Candidatos que inician con 2 o 3 TFs desde la particion de 120 grupos
groups_analyzed = [97, 80, 74, 68, 63, 53, 52, 49, 47, 44, 43, 40, 39, 38, 37,
36, 34, 32, 31, 30, 29, 27, 26, 24, 23, 21, 20, 19, 18, 15,
13, 12, 9]
cmap = get_cmap(len(groups_analyzed))
plots = []
for g in groups_analyzed:
partition = analyze_pair(group_anlized=g, gropus_dir=gropus_dir,
rev=rev, eigenvectors=eigenvectors)
plots.append(partition)
fig = plt.figure()
ax1 = fig.add_subplot(111)
for i, p in enumerate(plots):
if " ".join(p['tfs'][0]) == "Zur ZntR":
width = 4
mark = "D"
else:
width = 2
mark = ""
ax1.plot(p['part'], p['n_tfs'], c=cmap(i), linewidth=width, marker=mark,
label=" ".join(p['tfs'][0]))
#ax1.scatter(x[40:],y[40:], s=10, c='r', marker="o", label='second')
plt.legend(loc='upper right')
plt.title("TF pair persistence through partitions of model resolution")
plt.xlabel("Partition")
plt.ylabel("Number of TFs")
plt.show()
#print(TFs.keys())
#print(TFss.keys())
#partition = zip(*partition)
#print(partition)
-------------------------------------------------
protein] hpr phosphorylated_ext h [glycine-cleavage 6-phosphate l-glutamate complex ketoglutarate co2
phosphorylated_ext hpr 6-phosphate ptsh protein] h [glycine-cleavage arbutin-6p complex l-glutamate
l-glutamate protein] gaba h [glycine-cleavage complex gaba_ext n6-lipoyl-l-lysine l-glutamate_ext trans-rxn-92
ribose-1-phosphate inosine adenosine xanthosine guanosine deoxy-d-ribose vitamin xanthosine_ext adenine 1-phosphate
trans-rxn-92 drug drug_ext l-glutamate ketoglutarate acrab acrab_mrna marrab_mrna marrab gaba
csgdefg csgdefg_mrna phosphate csga rxn-5068 csga_ext csge-csgg flhdc_mrna flhd-flhc flhdc
phosphate gluconate glyceraldehyde-p xylulose-p csgdefg_mrna csgdefg 6-p fructose-6-p arbutin-6p dhap
vitamin nitrite fumarate menaquinol k2 aspartate phosphate oxygen ribose-1-phosphate gluconate
phosphate 6-phosphate acetyl-coa beta-d-glucose-6-p xylulose gluconate arbutin-6p 6-p ribulose-5-p xylulose-p
gluconate 6-phosphate arbutin-6p salicin-6-p beta-d-glucose-6-p 6-p xylulose-p bglg bglgfb bglgfb_mrna
acpp trans hexadecenoyl-[acp] malonyl-s-acp acetyl-coa ester phosphate coenzyme fatty beta-hydroxymyristoyl-[acp]
oxygen ketoglutarate acid flda thioredoxin vitamin acpp csgdefg_mrna csgdefg nitrite
thioredoxin udpg udp-galactose d-glucose flda 1-phosphate galactose-1-p disulfide amylose acetyl-coa
udpg udp-galactose thioredoxin d-glucose 1-phosphate oxygen galactose-1-p acetyl-coa alanine ketoglutarate
gluconate xylulose ribulose-5-p 3-keto-l-gulonate ribulose 5p xylulose-p kdgp thioredoxin lactaldehyde
alanine desulfurase] persulfide [l-cysteine l-cysteine cysteines protein unsulfurated donor] [sulfur
thioredoxin flda flhd-flhc flhdc_mrna flhdc rela-mazef_mrna rela-mazef mazf-maze 7,8-dihydrofolate disulfide
flhd-flhc flhdc_mrna flhdc flhc flhd csgdefg_mrna csgdefg gltbdf_mrna gltbdf thioredoxin
acetyl-coa acpp ketoglutarate tdcabcdefg_mrna tdcabcdefg trans coenzyme hyaabcdef_mrna hyaabcdef hexadecenoyl-[acp]
tdcabcdefg tdcabcdefg_mrna tdca serine serine_ext dehydroalanine propanoyl-p tdce threonine_ext tdcd
poly-beta-1,6-n-acetyl-d-glucosamine n-deacetylated partially poly-beta-1,6-n-acetyl-d-glucosamine_ext 7,8-dihydrofolate mazefg_mrna mazefg acetyl-coa biodex coenzyme
relbe-hokd_mrna relb-rele relbe-hokd relb rele hokd 7,8-dihydrofolate yefm-yoeb poly-beta-1,6-n-acetyl-d-glucosamine yoeb-yefm
poly-beta-1,6-n-acetyl-d-glucosamine 7,8-dihydrofolate guanosine-diphosphate mazefg_mrna mazefg n-deacetylated partially trna poly-beta-1,6-n-acetyl-d-glucosamine_ext glyceraldehyde-p
zinc zinc_ext zint poly-beta-1,6-n-acetyl-d-glucosamine zint_mrna acetyl-coa n-deacetylated partially poly-beta-1,6-n-acetyl-d-glucosamine_ext coenzyme
yefm-yoeb yefm-yoeb_mrna yoeb-yefm yefm yoeb zinc zinc_ext zint poly-beta-1,6-n-acetyl-d-glucosamine acetyl-coa
poly-beta-1,6-n-acetyl-d-glucosamine zinc partially n-deacetylated acetyl-coa poly-beta-1,6-n-acetyl-d-glucosamine_ext zinc_ext coenzyme fmn flda
glyceraldehyde-p dhap 1,6-diphosphate tagatofuranose phosphate fructose-6-p 3-keto-l-gulonate ribulose-5-p tagaaldol-rxn sedoheptulose-7-p
biodex lysine ketoglutarate 1-_ext cadba_mrna cadba thioredoxin acid lysine_ext cadc
dhap fructose-6-p lactaldehyde rhasr rhasr_mrna rhamnose rhar xylulose-p glyceraldehyde-p rhas
carbonate methionine oxygen succinate fumarate aminoacrylate glutamine hyaabcdef_mrna hyaabcdef aspartate
fructuronate uidabc_mrna uidabc uxur mannonate hyaabcdef_mrna hyaabcdef uxuab_mrna uxuab appcbxa
hyaabcdef_mrna hyaabcdef fructuronate appcbxa_mrna appcbxa uidabc_mrna uidabc flda uxur methionine
glycerate tartronate-s-ald ureidoglycolate glyoxylate allantoate 5-kdg allantoin alls methionine 2-p-d-glycerate
methionine ketoglutarate rela-mazef rela-mazef_mrna 7,8-dihydrofolate biodex amylose thioredoxin hipb-hipa lysine
oxygen methionine ketoglutarate 2,3-dihydroxycinnamic acetyl-coa biodex 2,3-dhp fmn gaba lysine
7,8-dihydrofolate amylose rela-mazef_mrna rela-mazef trna hipb-hipa methylene-h4pteglu(n) hipba hipba_mrna ppgpp
fmn flavin amylose sulfite mononucleotide sulfide thioredoxin d-glucose flda maltohexaose
amylose fmn acid flavin d-glucose 1-phosphate 2,3-dihydroxycinnamic carbonate maltohexaose fumarate
fructose-6-p 1,6-diphosphate tagatofuranose rhasr rhasr_mrna rhar rhamnose fumarate rhas tagaaldol-rxn
puta flda glutamate-semialdehyde 5-carboxylate pyrroline thioredoxin proline qsebc_mrna qsebc fumarate
hyfabcdefghijr-focb_mrna hyfabcdefghijr-focb hydrogenase hydrogen electron hyfg-hyfi-hyfa-hyfb-hyfc-hyfd-hyfe-hyff-hyfh mvhmeth-rxn 3 aidb vitamin
carbonate fumarate puta glutamate-semialdehyde 5-carboxylate pyrroline flda succinate_ext vitamin malate
flda lactate with peptidoglycan glycolate sorbitol lactate_ext nascent srlaebd-gutm-srlr-gutq srlaebd-gutm-srlr-gutq_mrna
ion cobalt nickel nickel_ext arsenite nikabcder_mrna nikabcder melibiose nikd-nike-nikb-nikc-nika ion_ext
aidb carbonate marrab_mrna marrab co2 hyfabcdefghijr-focb_mrna hyfabcdefghijr-focb aidb_mrna dna hydrogenase
lactose melibiose marrab marrab_mrna lactose_ext melibiose_ext flda sgrst-seta sgrst-seta_mrna lactate
aidb sorbitol flda srlaebd-gutm-srlr-gutq_mrna srlaebd-gutm-srlr-gutq tryptophan carbonate marrab_mrna marrab diacetylchitobiose
tryptophan indole flda serine fructose-6-p vitamin chorismate shikimate shikimate-5-p tryptophan_ext
lactose melibiose marrab_mrna marrab lactose_ext flda lactate melibiose_ext sgrst-seta sgrst-seta_mrna
aicar 5\'-p-ribosylglycinamide faicar thf polyglutamate 5-phosphoribosylamine glycine aidb lactate lactose
aidb lactate lipid glycolate aicar acetyl-coa 5\'-p-ribosylglycinamide i carbonate faicar
ribulose dhap 1,6-diphosphate tagatofuranose glycoaldehyde 3-keto-l-gulonate lactaldehyde rhasr_mrna rhasr rhar
ribulose beta-neu5ac n-acetylneuraminate short-chain nickel alpha-neu5ac 3-keto-l-gulonate fe+2 alcohol lactate
fructose-6-p cu(i) cu(i)_ext carbonate silver alpha-d-mannose-6-p silver_ext trans-rxn-90 cuscfba_mrna cuscfba
aidb (2s,3s)-2-methylcitrate carbonate cu(i) nemra-gloa nemra-gloa_mrna propanoyl-coa nemr cu(i)_ext tartrate
fliaz-tcyj_mrna fliaz-tcyj flifghijk_mrna flifghijk aidb flda glyoxylate vitamin i-cit coenzyme
ureidoglycolate allantoate glycerate glycerol-3-phosphate lactate arsenite fliaz-tcyj fliaz-tcyj_mrna fe+2 flifghijk_mrna
3-keto-l-gulonate 5p 7 p-enol-pyruvate aidb carbonate tartrate ureidoglycolate ribulose dhap
ureidoglycolate hyacinthin allantoate glycerate phenylacetate flda acetyl-coa 5-kdg coenzyme allantoin
fe+2 mn(ii) flda mn(ii)_ext 3-keto-l-gulonate ftsq acetyl-coa hns beta-neu5ac 5p
tartrate mn(ii) hyacinthin arsenite fe+2 nemra-gloa nemra-gloa_mrna nemr (2s,3s)-2-methylcitrate mn(ii)_ext
arsenite glycerol-3-phosphate hyacinthin tartrate melibiose alanine sgrst-seta sgrst-seta_mrna rcho alcohol
hyacinthin tartrate glycerol-3-phosphate arsenite phenylacetate aspartate mn(ii) 2,3-dihydroxycinnamic fe+2 rpos
uidabc uidabc_mrna hyacinthin exut ftsq tartrate ureidoglycolate rpos i-cit lyase
hyacinthin melibiose betaine glycerol-3-phosphate sgrst-seta_mrna sgrst-seta choline melibiose_ext mn(ii) lactate
rpos rpos_mrna rpob-rpoc-rpoa-rpos hns dna dinj-yafq hns_mrna ureidoglycolate double hyacinthin
ftsq ftsq-ftsb-ftsl rna tartrate dinj-yafq norr gadw lactate norvw_mrna norvw
norr norvw norvw_mrna hyacinthin ureidoglycolate dinj-yafq rna norr_mrna flrd betaine
ftsq dinj-yafq ftsq-ftsb-ftsl gadw yafq-dinj dinj-yafq_mrna rna ftsz ftsa gadw_mrna
lysa rna lysa_mrna dinj-yafq yafq-dinj dinj-yafq_mrna rpos yafq dinj hns
bola rcho alcohol rna lysa arsenite poly-beta-1,6-n-acetyl-d-glucosamine lysa_mrna rpos melibiose
maly bola ftsq lysa rna rcho nemra-gloa_mrna nemra-gloa nemr ftsq-ftsb-ftsl
rna lysa rcho alcohol lysa_mrna bola rpos hns dinj-yafq guanylate
rcho lysa alcohol betaine lysa_mrna choline rna aldehyde nemr nemra-gloa
rcho alcohol arsenite rna hns betaine rpos choline aldehyde dna
hns dna lysa rpos hns_mrna double aidb rcho betaine stranded
tartrate betaine hyacinthin nemr nemra-gloa_mrna nemra-gloa choline norr aidb aldehyde
uidabc uidabc_mrna arsenite melibiose hyacinthin sgrst-seta sgrst-seta_mrna dinj-yafq 7 uxur
tartrate hyacinthin serine dsdc (2s,3s)-2-methylcitrate propanoyl-coa aspartate acetyl-coa melibiose succinate_ext
serine dsdc mn(ii) arsenite serine_ext mn(ii)_ext mdtabcd-baesr mdtabcd-baesr_mrna dehydroalanine uidabc
mn(ii) mdtabcd-baesr_mrna mdtabcd-baesr mn(ii)_ext tartrate fe+2 dps mntp mannitol-1-p marrab
arsenite moaabcde_mrna moaabcde cpmp fe+2 mn(ii) synthase small molybdopterin citcdefxg_mrna
serine dsdc vitamin tartrate serine_ext (2s,3s)-2-methylcitrate mn(ii) propanoyl-coa fe+2 methylisocitrate
mannitol-1-p mtladr mtladr_mrna 6-phosphate mtlr alpha-d-mannose-6-p moaabcde_mrna moaabcde cpmp mannitol_ext
pimelyl-[acp] melibiose ester arsenite alanine malonyl-s-acp mdtabcd-baesr_mrna mdtabcd-baesr methyl dinj-yafq
glycerol-3-phosphate arsenite vitamin mannitol-1-p mdtabcd-baesr mdtabcd-baesr_mrna mtladr_mrna mtladr gltbdf_mrna gltbdf
alanine vitamin dinj-yafq lipid aspartate tyrosine flda i citcdefxg_mrna citcdefxg
aspartate soxr glycerol-3-phosphate fumc fumhydr-rxn fumac_mrna fumac soxs thf polyglutamate
qsebc qsebc_mrna gltbdf_mrna gltbdf glycerol-3-phosphate qseb gltd-gltb aspartate puta ftsq
pimelyl-[acp] ester methyl co2 ferredoxin [2fe-2s] vitamin flda gltbdf_mrna gltbdf
glrr glycerol-3-phosphate lactaldehyde ribulose glycoaldehyde xylopyranose xylab_mrna xylab aspartate tyrosine
arsenite aspartate glycerol-3-phosphate short-chain rcho thf polyglutamate p-enol-pyruvate n5-methyl-tetrahydrofolate hyacinthin
mdtabcd-baesr mdtabcd-baesr_mrna malonyl-coa vitamin glrr acetyl-coa-carboxyltransfer-rxn accd-acca-accc-accb accbc_mrna accbc accb
glrr vitamin aspartate alanine short-chain (2s,3s)-2-methylcitrate arsenite thf polyglutamate soxr
glrr putrescine_ext putrescine 5-phosphate gaba vitamin glycerol-3-phosphate tyrosine phosphate acetyl-coa
p-enol-pyruvate glrr ribulose 7 2 lactaldehyde vitamin tyrosine glycoaldehyde beta-neu5ac
diacetylchitobiose beta-d-cellobiose-6\'-p ptsh lipid phosphorylated_ext hpr i isocitrate-dehydrogenase 6\'-phosphate 6-phosphate
putrescine_ext glrr putrescine n2-succinylornithine alanine n2-succinylarginine arginine gaba 2-acetamido-5-oxopentanoate tyrosine
alanine lactose vitamin tyrosine qsebc qsebc_mrna aminoacrylate melibiose pimelyl-[acp] d-glucose
alanine pimelyl-[acp] lactose melibiose ester putrescine_ext putrescine aminoacrylate tyrosine qsebc_mrna
nitrate_ext vitamin narghji narghji_mrna tyrosine menaquinol_ext te0 narg-narh-nari rxn0-7124 rxn-15119
kdgp eda 4-hydroxy-2-oxoglutarate 5-ketogluconate edd-eda_mrna edd-eda glyceraldehyde-p idonate 6-p sedoheptulose-7-p
d-glucose succinate aminoacrylate n2-succinylornithine ketoglutarate amylose tyrosine succ-s-ald aidb n2-succinylarginine
lactate glcdefgba glcdefgba_mrna glycolate lactate_ext 5-kdg vitamin kdgp isocitrate-dehydrogenase 6-phosphate
cobalt nikabcder nikabcder_mrna nikd-nike-nikb-nikc-nika abc-20-rxn ion arsenite rcnab rcnab_mrna malonyl-s-acp
putrescine_ext putrescine cobalt carbonate malonyl-s-acp fatty malonyl-coa carbamate tyrosine carbamoyl-p
carbonate carbamate putrescine_ext cobalt putrescine alanine carbamoyl-p glutamine malonyl-coa pimelyl-[acp]
6-phosphate beta-d-cellobiose-6\'-p salicin-6-p prpr glycerate glucose-6-p amylose lactose beta-d-cellobiose_ext arbutin-6p
lactose sgrst-seta sgrst-seta_mrna melibiose cobalt alanine glucopyranose beta-d-cellobiose-6\'-p lactose_ext nikabcder
malonyl-s-acp 5-phosphate fatty alpha-d-mannose-6-p beta-d-glucose-6-p co2 ribulose-5p ribose-5p ester chorismate
amylose lactose d-glucose alpha-glucose 1-phosphate succinate 6-phosphate beta-d-cellobiose-6\'-p maltohexaose aspartate
tyrosine amylose maltohexaose aminoacrylate qsebc_mrna qsebc flifghijk flifghijk_mrna d-glucose alpha-glucose
alpha-d-mannose-6-p ribulose-5p ribose-5p beta-d-glucose-6-p 5-phosphate malonyl-s-acp phenylacetate suc-coa hyacinthin fatty
aspartate tyrosine alpha-d-mannose-6-p 5-phosphate tartrate hydrogenase fumarate oxygen ribulose-5p acetyl-coa
hydrogenase 5-phosphate 3 tyrosine hyfabcdefghijr-focb hyfabcdefghijr-focb_mrna mvhmeth-rxn hyfg-hyfi-hyfa-hyfb-hyfc-hyfd-hyfe-hyff-hyfh electron formate
6-phosphate diacetylchitobiose 5-phosphate hydrogenase sorbitol n-monoacetylchitobiose trer alpha-d-glucosamine 6\'-phosphate chitobiose_ext
5-phosphate hydrogenase flic flifghijk flifghijk_mrna 6-phosphate fliaz-tcyj_mrna fliaz-tcyj gade-mdtef_mrna gade-mdtef
5-phosphate aspartate phenylacetate suc-coa coenzyme alpha-d-mannose-6-p succinate 6-phosphate vitamin hyacinthin
5-phosphate flic acetyl-coa csgbac_mrna csgbac aspartate fliaz-tcyj_mrna fliaz-tcyj flifghijk flifghijk_mrna
glycerate ptsh dehydrogenase e2 [pyruvate h [glycine-cleavage 5-kdg 6-phosphate complex
-------------------------------------------------