Does it harm that the n_eff is rather small? The paper provides different estimates, however they use MLE for the hyperparameters.
HBCode <-"data {
int<lower=0> n_cust;
vector<lower=0>[n_cust] x;
vector<lower=0>[n_cust] time_of_last_txn;
vector<lower=0>[n_cust] observation_time;
}
transformed data {
vector<lower=0>[n_cust] x_one;
x_one = x + 1;
}
parameters {
vector[n_cust] lambda_raw;
vector[n_cust] mu_raw;
vector<lower=0>[2] Beta; // log_lambda_mean and log_mu_mean
cov_matrix[2] Sigma; // lambda_sd,cov(lambda,mu),mu_sd
}
transformed parameters {
vector<lower=0>[n_cust] lambda;
vector<lower=0>[n_cust] mu;
row_vector[2] elementswise;
for ( i in 1:n_cust) {
elementswise[1] = lambda_raw[i]*Sigma[1,1] + mu_raw[i]*Sigma[2,1];
elementswise[2] = lambda_raw[i]*Sigma[1,2] + mu_raw[i]*Sigma[2,2];
lambda[i] = exp(Beta[1] + elementswise[1] );
mu[i] = exp(Beta[2] + elementswise[2] );
}
}
model {
// local variables likelihood
vector[n_cust] ll1;
vector[n_cust] ll2;
vector[n_cust] mu_lambda;
vector[n_cust] log_lambda;
vector[n_cust] log_mu;
vector[n_cust] log_mu_lambda;
real v0;
//local variables hyper
vector[2] Beta0;
matrix[2,2] Sigma0;
matrix[n_cust,2] theta;
mu_lambda = lambda + mu;
log_mu_lambda = log(mu_lambda);
log_lambda = log(lambda);
log_mu = log(mu);
theta[,1] = lambda;
theta[,2] = mu;
Beta0[1] = 0;
Beta0[2] = 0;
Sigma0[1,1]= 1;
Sigma0[1,2]= 0;
Sigma0[2,1]= 0;
Sigma0[2,2]= 1;
v0 = 3;
// hyperprior
Beta ~ multi_normal(Beta0, Sigma0);
Sigma ~ inv_wishart(v0, v0*Sigma0);
//prior
for ( i in 1:n_cust){
theta[i,] ~ multi_normal(Beta0,Sigma0); // implies theta ~ lognormal(zero, identity);
}
//likelihood
ll1 = x .* log_lambda + log_mu - log_mu_lambda - time_of_last_txn .* (mu_lambda);
ll2 = (x_one) .* log_lambda - log_mu_lambda - observation_time .* (mu_lambda);
target+= log(exp(ll1)+exp(ll2));
}
"
The estimates of Sigma don't correspond to the estimates as reported in Abe(2006). Moreover, n_eff is even smaller