power_law_plot.py 1.19 KB

import matplotlib.patches as mpatches
import powerlaw

# Load word count file
with open("guil/GUs_word_count.csv",'r') as f:
    words_array =np.array([c[0] for c in list(csv.reader(f, delimiter=" "))]).astype("int")


np.savetxt("words.txt", words_array)
np.savetxt("words.txt", words_array, fmt='% 10d')
# loading file of sorted frequencies
data=np.genfromtxt("words.txt")

fit = powerlaw.Fit(data)
fit.power_law.alpha
# \alpha = 1.8517470767126287 (GUs)
fit.power_law.sigma
# \sigma = 0.011286616988085252

# Prueba de hipótesis Kolmogorov-Smirnof: entre la exponencial y la ley de potencias 
# Tiene mucho más confianza la ley de potencias (R=32012.27, p-value=3.56e-275).
fit.distribution_compare('power_law', 'exponential')
# (32012.272128016586, 3.5658919192201475e-275)

figPDF = powerlaw.plot_pdf(data, color='b')
powerlaw.plot_pdf(data, linear_bins=True, color='r', ax=figPDF)

figPDF.set_ylabel("p(BO)")
figPDF.set_xlabel(r"BO Frequency")
figPDF.set_title(r"Power Law in Biological Objects (BOs) of GENSOR units")
red_patch = mpatches.Patch(color='red', label='Distribution linear bins')
blue_patch = mpatches.Patch(color='blue', label='Distribution log bins')
figPDF.legend(handles=[red_patch, blue_patch])