declare ordered parameter vector in TMB

81 views
Skip to first unread message

catar...@gmail.com

unread,
May 26, 2023, 5:36:09 PM5/26/23
to TMB Users
Hi! 

I have been working on building a hidden markov model in TMB. i.e., a model in which two or more states and transition probabilities

In order to get beyond relative convergence issues I need to order the states somehow. e.g., parameter a(1) < a(2).  I am wondering if there is a way to do this in TMB? similarly to this feature available in stan.

I am currently implementing my own version of the ordered vector based on the work of Tang et al 2021 . But I am wondering if there are other options out there or if you have any recommendations. 

here is my (heavily based on the Tang et al 2021) cpp code:

#include <TMB.hpp>
#include <iostream>

template<class Type>
vector<Type> segment_1(vector<Type> yt, vector<Type> st, matrix<Type> qij,vector<Type> pi1,vector<Type> alpha,vector<Type> beta,Type sigma,int t){
 
  int k_regime = beta.size();
  Type small = pow(10,-300);
  vector<Type> sr = log(pi1 + small);
 
  for(int j = 0;j < k_regime;++j){
    Type f_now = alpha(j) - beta(j)*st(0);
    sr(j) += dnorm(yt(0), f_now, sigma,true);
  }
 
  for(int i = 1;i <= t;++i){
    vector<Type> sr_new = sr;    
   
    for(int j = 0;j < k_regime;++j){
      sr_new(j) = sr(0) +qij(0,j);
   
      for(int jj = 1;jj < k_regime;++jj){
        Type temp = sr(jj) +qij(jj,j);
        sr_new(j) = logspace_add(sr_new(j),temp);
      }
    }
    sr = sr_new;
 
    for(int j = 0;j < k_regime;++j){
      Type f_now = alpha(j) - beta(j)*st(i);
      sr(j) += dnorm(yt(i), f_now, sigma,true);
    }
  }
  return sr;
}


template<class Type>
Type segment_2(vector<Type> yt, vector<Type> st, matrix<Type> qij,vector<Type>
pi1,vector<Type> alpha,vector<Type> beta,Type sigma,int rt,int t){
 
  int k_regime = beta.size();
  int n = yt.size();
  vector<Type> sr = qij.row(rt);
 
  for(int j = 0;j < k_regime;++j){
    Type f_now = alpha(j) - beta(j)*st(t+1);
    sr(j) += dnorm(yt(t+1), f_now, sigma,true);
  }
 
  for(int i = t+2;i < n;++i){
    vector<Type> sr_new = sr;
    for(int j = 0;j < k_regime;++j){
      sr_new(j) = sr(0) +qij(0,j);
      for(int jj = 1;jj < k_regime;++jj){
        Type temp = sr(jj) +qij(jj,j);
        sr_new(j) = logspace_add(sr_new(j),temp);
      }
    }
    sr = sr_new;
    for(int j = 0;j < k_regime;++j){
      Type f_now = alpha(j) - beta(j)*st(i);
      sr(j) += dnorm(yt(i), f_now, sigma,true);
    }
  }
  Type seg2 = sr(0);
  for(int j = 1;j < k_regime;++j){
    seg2 = logspace_add(seg2,sr(j));
  }
  return seg2;
}

//main //////////////////////////////////////////////
template<class Type>
Type objective_function<Type>::operator() ()
{

  DATA_VECTOR(yt);
  DATA_VECTOR(st);
  DATA_SCALAR(alpha_u); //upper bound for a
  DATA_SCALAR(alpha_l); //lower bound for a
  DATA_SCALAR(beta_u);  //upper bound for b
  DATA_SCALAR(beta_l);  //upper bound for b


 
  PARAMETER_VECTOR(lalpha);
  PARAMETER_VECTOR(lbeta);
  PARAMETER(logsigma);
  PARAMETER_VECTOR(pi1_tran); // initial state probabilities
  PARAMETER_MATRIX(qij_tran); // transition probabilities

  int k_regime = lbeta.size();
 
  vector<Type> beta = (beta_u-beta_l)/(1+exp(-lbeta))+beta_l;// when lbeta is negative infinity, beta=beta_l; when lbeta is positive infinity, beta=beta_u
 
  vector<Type> alpha(k_regime);


 //ordered vector
  alpha(0) = (alpha_u-alpha_l)/(1+exp(-lalpha(0)))+alpha_l;
 
  for(int i = 1;i < k_regime;++i){
    alpha(i) = alpha(i-1) + (alpha_u-alpha(i-1))/(1+exp(-lalpha(i)));

  } // alpha(1) from alpha(0) to alpha_u

 
  Type sigma = exp(logsigma);
  vector<Type> pi1(k_regime);
 
  for(int i = 0;i < k_regime-1;++i){
    pi1(i) = exp(pi1_tran(i));    
  }
  pi1(k_regime-1) = 1;
  pi1 = pi1/(pi1.sum());
 
  Type small = pow(10,-300);
  matrix<Type> qij(k_regime,k_regime);
  for(int i = 0;i < k_regime;++i){
    for(int j = 0;j < k_regime-1;++j){
      qij(i,j) = exp(qij_tran(i,j));
    }
    qij(i,k_regime-1) = 1;
    vector<Type> qij_row = qij.row(i);
    Type row_sum = qij_row.sum();
    for(int j = 0;j < k_regime;++j){
      qij(i,j) = qij(i,j)/row_sum;
      qij(i,j) = log(qij(i,j)+small);
    }
  }
  int n = yt.size();
  vector<Type> sr = segment_1(yt, st, qij,pi1,alpha,beta,sigma,n-1);
  Type nll = sr(0);
 
  for(int j = 1;j < k_regime;++j){
    nll = logspace_add(nll,sr(j));
  }
  nll = -nll;

 
 
// predict r_t ///////////////////////////////////
  matrix<Type> r_pred(k_regime,n);
  for(int i = 0;i < n-1;++i){
    sr = segment_1(yt, st, qij,pi1,alpha,beta,sigma,i);
    for(int j = 0;j < k_regime;++j){
      Type tempt = sr(j) + segment_2(yt, st, qij,pi1,alpha,beta,sigma,j,i) + nll;
      r_pred(j,i) = exp(tempt);
    }
  }
  sr = segment_1(yt, st, qij,pi1,alpha,beta,sigma,n-1);
 
 
 qij = exp(qij.array());

 



  Type ans= nll;

REPORT(beta);
REPORT(alpha);
REPORT(sigma);
REPORT(pi1);
REPORT(qij);
REPORT(r_pred);
REPORT(nll);

ADREPORT(alpha);
ADREPORT(beta);
ADREPORT(sigma);
ADREPORT(pi1);
ADREPORT(qij);

return ans;

}


 Thank you!


Catarina

Hans Skaug

unread,
May 29, 2023, 11:38:55 AM5/29/23
to TMB Users
One could argue that such constraints should be task of the function minimizer (if that is what you are doing).
One optimizer with such functionality is "nloptr", but there is a bit to read to get started.

A simple hack in the C++ file is to let b(1), and b(2) be unconstrained and then
a(1)=b(1)
a(2) = a(1)+exp(b(2))
Reply all
Reply to author
Forward
0 new messages