Hi,
I am trying to use a sympy expression to represent my unknown parameter: t.
However, when using emcee for parallel computing, this error occurred:
h5py/h5t.pyx in h5py.h5t.py_create()
h5py/h5t.pyx in h5py.h5t.py_create()
h5py/h5t.pyx in h5py.h5t.py_create()
h5py/h5t.pyx in h5py.h5t._c_array()
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
h5py/_objects.pyx in h5py._objects.with_phil.wrapper()
h5py/h5t.pyx in h5py.h5t.array_create()
ValueError: Zero-sized dimension specified (zero-sized dimension specified)
A minimal example:
import numpy as np
import os
from sympy import symbols, Matrix
from sympy import log,pi
from sympy.abc import t,s,c
from sympy import lambdify
from multiprocessing import Pool
import time
import emcee
import h5py
import multiprocessing
multiprocessing.set_start_method("fork")
def log_prior(x):
if x<=1000 and x>0:
return 0.0
return -np.inf
def log_likelihood(x):
ll=-0.5 *chi-0.5*log(sigma)-0.5*log(2*pi)
ll_t = lambdify(t,ll)
ll_x=ll_t(x)
return ll_x
def log_probability(x):
ll=log_likelihood(x)
lp = log_prior(x)
if not np.isfinite(lp):
return -np.inf
return lp + ll
t=symbols('t',positive=True)
sigma=log(t)+pi
chi=t**2-pi
t_all=[500]
n_dim = len(t_all)
n_walkers = int(4*n_dim)
step = 1000
var_values_init =float(t_all[0])#+np.random.rand(nwalkers, ndim)
filename = "test.h5"
values_2 = np.random.uniform(var_values_init-100, var_values_init+100,size = [n_walkers, n_dim] )
with Pool() as pool:
start = time.time()
if not os.path.exists(filename):
backend = emcee.backends.HDFBackend(filename)
sampler = emcee.EnsembleSampler(nwalkers = n_walkers, ndim = n_dim, log_prob_fn=log_probability, pool = pool, backend=backend)
print('First run success')
sampler.run_mcmc(values_2, nsteps = step, progress=True)
else:
os.remove(filename)
backend = emcee.backends.HDFBackend(filename)
sampler = emcee.EnsembleSampler(nwalkers = n_walkers, ndim = n_dim, log_prob_fn=log_probability, pool = pool, backend=backend)
print('Second run success')
sampler.run_mcmc(values_2, nsteps = step, progress=True)
end = time.time()
serial_time = end - start
print("1 nodes * 4 cores with multiprocess took {0:.1f} seconds".format(serial_time))
Then I tried to debug by running only one step, but got this weird error:
AttributeError: 'function' object has no attribute 'map'
Debug code:
filename1 = "test1.h5"
backend1 = emcee.backends.HDFBackend(filename1)
sampler = emcee.EnsembleSampler(nwalkers = n_walkers, ndim = n_dim, log_prob_fn=log_probability, pool = Pool, backend=backend)
sampler.run_mcmc(values_2 , 1, skip_initial_state_check=True)
Best regards!