import warnings
warnings.filterwarnings('ignore')
import numpy as np
import matplotlib.pyplot as plt
def f(x): # The function we wish to integrate
if x[0]**2 + x[1]**2 <= 1:
return 1
else:
return 0
def Q(x,V): # Estimator of the integral I of f, i.e., V times sample mean of f
m = len(x)
return V * sum([f(x[mu]) for mu in range(m)])/m
def sigma2(x,V): # Sample variance of f
m = len(x)
return sum([f(x[mu])**2 for mu in range(m)])/m - (Q(x,V)/V)**2
def sample(m): # Generates a sample of m points uniformly distributed in the square
X1 = np.random.uniform(-1,1,m)
X2 = np.random.uniform(-1,1,m)
return np.stack((X1, X2), axis=-1)
np.random.seed(1)
M = np.unique(np.logspace(1, 5, num=500).astype(int)) # equally log-spaced values for m
X = {m : sample(m) for m in M}
q = np.array([Q(X[m],4) for m in M])
error = abs(q - np.pi)/np.pi
q_theo = np.array([np.pi for m in M])
error_theo = np.array([np.sqrt(sigma2(X[m],4)/m) for m in M])
# comparison between Qm and I as a function of m
plt.plot(M,q,'k',label='MC')
plt.plot(M,q_theo,'r',label='theoretical ($\pi$)')
plt.xscale('log')
plt.ylim(1.8,4.2)
plt.legend()
plt.xlabel('$m$')
plt.ylabel('$Q_m$')
plt.show()
# comparison of the error and the theoretical estimate sqrt(sigma2/m)
plt.plot(M,error,'k',label='MC')
plt.plot(M,error_theo,'r',label='theoretical')
plt.xscale('log')
plt.yscale('log')
plt.legend()
plt.xlabel('$m$')
plt.ylabel('$\delta Q_m$')
plt.show()
def S(x): # S(x) = -log(L(x)), with L proportional to the distribution we wish to sample
return x**2/2
def markov_chain(x0,c,m,S):
# Return a sequence of m points following a pdf proportional to L(x) = exp(-S(x)) (starting point x0, step-size c)
chain = [x0]
for mu in range(1,m):
x_old = chain[-1]
x = np.random.normal(x_old,c)
alpha = np.exp(S(x_old)-S(x))
u = np.random.uniform(0,1)
if u <= alpha:
chain.append(x)
else:
chain.append(x_old)
return chain
np.random.seed(1)
m = 1000
M = np.array(range(m))
X = np.array(markov_chain(100,1,m,S))
# plot of X[mu] as a function of mu: thermalization takes place after initial burn-in
plt.plot(M,X,'k')
plt.xlabel('$\mu$')
plt.ylabel('$x^{(\mu)}$')
plt.show()
# histograms of X for two values of m: profile of the theoretical pdf is approached for m larger
X = np.array(markov_chain(100,1,int(10e3),S))[300:]
plt.subplot(1,2,1)
plt.hist(X,density=True,bins=500,range=(-4,4),color='k')
plt.ylim(0,0.5)
plt.ylabel('$p_X(x)$')
plt.xlabel('$x$')
plt.title('$m = 10^3$')
X = np.array(markov_chain(100,1,int(10e5),S))[300:]
plt.subplot(1,2,2)
plt.hist(X,density=True,bins=500,range=(-4,4),color='k')
plt.ylim(0,0.5)
plt.xlabel('$x$')
plt.title('$m = 10^5$')
plt.show()
def expected_value(F,X,*args): # Expected value of function F (which may depend on variables *args) on the chain X
m = len(X)
return sum([F(x,*args) for x in X])/m
def F_power(X,n): # Power function (used as F in expected_value). Note that F_power(X,1) is the identity function
return X**n
print("E[x] = {0}".format(expected_value(F_power,X,1)))
print("E[x^2] = {0}".format(expected_value(F_power,X,2)))
x_data = np.linspace(0,9,10)
y_data = np.array([0.22,0.42,6.67,6.66,8.01,15.52,12.67,17.10,18.15,21.74])
sigma0 = 1.5
plt.errorbar(x_data,y_data,yerr=sigma0,color='k')
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.show()
def h(x,theta): # Hypothesis of the fit
return theta[0] + theta[1]*x
def S(theta): # Log-likelihood
return sum((y_data - h(x_data,theta))**2/(2 * sigma0**2))
np.random.seed(1)
m = 10e5
theta0 = np.array([-5,10]) # Starting point in parameter space a = 10 and b = 10
theta_sequence = np.array(markov_chain(theta0,1,int(m),S))
# plot of values of a in the sequence
plt.plot(theta_sequence[:,0],'k')
plt.xlabel('$\mu$')
plt.ylabel('$a$')
plt.show()
# plot of values of b in the sequence
plt.plot(theta_sequence[:,1],'k')
plt.xlabel('$\mu$')
plt.ylabel('$b$')
plt.show()
# 2d plot of values (a,b) (i.e., parameter space)
plt.plot(theta_sequence[:,0],theta_sequence[:,1],'k')
plt.xlabel('$a$')
plt.ylabel('$b$')
plt.show()
a = theta_sequence[1000:,0]
b = theta_sequence[1000:,1]
plt.hist2d(a,b,bins=(50, 50))
plt.xlabel('$a$')
plt.ylabel('$b$')
plt.show()
mean_a = expected_value(F_power,a,1)
mean_b = expected_value(F_power,b,1)
sigma_a = np.sqrt(expected_value(F_power,a,2) - mean_a**2)
sigma_b = np.sqrt(expected_value(F_power,b,2) - mean_b**2)
print("a_* = {0}".format(mean_a))
print("b_* = {0}".format(mean_b))
print("sigma_a = {0}".format(sigma_a))
print("sigma_b = {0}".format(sigma_b))
# t-values
print("t_a = {0}".format(mean_a/sigma_a))
print("t_b = {0}".format(mean_b/sigma_b))
fit_list = [h(x,[0,mean_b]) for x in x_data]
# 2-sigma confidence for b (95%)
fit_list_min = [h(x,[0,mean_b - 2*sigma_b]) for x in x_data]
fit_list_max = [h(x,[0,mean_b + 2*sigma_b]) for x in x_data]
plt.errorbar(x_data, y_data, yerr=sigma0,color='k',label='data')
plt.plot(x_data,fit_list,'r',label='MCMC best fit')
plt.fill_between(x_data, fit_list_min, fit_list_max,color='r',alpha=0.25)
plt.xlabel('$x$')
plt.ylabel('$y$')
plt.legend()
plt.show()