Below you can find a script that generates a data set with two normally distributed parameters and recovers them using fitting a Mixed Multinomial Logit. However, at the very bottom of the script, I am confused with how to specify what you called "numerator" and "denominator" in the example you provided using the MonteCarlo() function (i.e. line: numerator = MonteCarlo(B_x1_RND * B_x2_RND * logprob) ### ???).
Could you please help me out with this issue, or point me towards an example/documentation part that I might possible overlook?
Thank you a lot for your time.
# -*- coding: utf-8 -*-
#----------------------------------
#"""
Created on Tue Oct 12 10:24:06 2021
@author: Álvaro A. Gutiérrez-Vargas
"""
import pandas as pd
import numpy as np
import biogeme.database as db
import biogeme.biogeme as bio
from biogeme import models
import biogeme.messaging as msg
from biogeme.expressions import Beta, DefineVariable, bioDraws, log, MonteCarlo, PanelLikelihoodTrajectory
####----------------------------####
####---- SIMULATED DATA ----####
####---- IN ----####
####---- LONG FORMAT ----####
####----------------------------####
# Random seed for replicability
np.random.seed(777777)
N = 300 # Number of individuals
J = 3 # Number of alternatives
t = 20 # Choice sets per individual
# Individual id
id = np.repeat(range(1, N*t+1), J).reshape(N*t*J,1)
# Alternative identifier in long format
altern = np.tile(range(1,J+1),N*t).reshape(N*t*J,1)
# Attribute levels
x1 = np.random.normal(0,1,N*t*J).reshape(N*t*J,1)
x2 = np.random.normal(0,1,N*t*J).reshape(N*t*J,1)
# Random parameters
#beta_1: Normal distribution
b_1 = np.random.normal(1,0.5,N)
b_1 = np.repeat(b_1,t*J).reshape(N*t*J,1)#comformability
#beta_2: Normal distribution
b_2 = np.random.normal(1,0.5,N)
b_2 = np.repeat(b_2,t*J).reshape(N*t*J,1) #comformability
# Utility
U = b_1 * x1 + b_2 * x2
# Exp(U)
U_exp = np.exp(U)
# Denominator sum exp(U) of all alternatives
sum_exp = np.repeat(np.array([(U_exp[id==i]).sum() for i in np.unique(id)]), J).reshape(N*t*J,1)
# Probability
pbb = U_exp/sum_exp
# This generates (N*t x J) matrix with chosen alternatives using multinomial samples
choice = np.array([np.random.multinomial(1,(pbb[id==i])) for i in np.unique(id)])
# this identify the number of the selected alternative
num_choice = np.repeat(np.argmax(choice, axis=1)+1,J).reshape(N*t*J,1)
# this turn the last object into a dummy variable indicating the chosen altenative
Y = 1*(num_choice ==altern).reshape(N*t*J,1)
## long format data set
array_long = np.concatenate((id,altern,x1,x2), axis=1)
df_long = pd.DataFrame(data = array_long, columns = ['id','altern','x1','x2'])
# astype integer in order to get ride of names variables such as "x1_1.0"
df_long['altern'] = df_long['altern'].astype(int)
####----------------------------####
####---- LONG TO WIDE DATA ----####
####----------------------------####
# Reshape/Pivot long format data into Wide format required by Biogeme.
df_wide=df_long.pivot(index='id', columns='altern', values=['x1','x2'])
df_wide = df_wide.sort_index(axis=1, level=1)
df_wide.columns = [f'{x}_{y}' for x,y in df_wide.columns]
df_wide = df_wide.reset_index()
#CHOICE variable in wide format
CHOICE = (np.argmax(choice, axis=1)+1).reshape(N*t,1)
#id individuals in wide format
id_ind = np.repeat(range(1, N+1), t).reshape(N*t,1)
#id choice situation of individual n in wide format
id_choice_situation_of_individual_n = np.tile(range(1,t+1),N).reshape(N*t,1)
## long format data set
id_plus_choice_wide = pd.DataFrame(data =np.concatenate((id_ind,id_choice_situation_of_individual_n,CHOICE), axis=1),
columns = ['id_ind','id_choice_situation_of_individual_n','CHOICE'])
df = id_plus_choice_wide.join(df_wide)
####----------------------------####
####---- BIOGEME DATABASE ----####
####----------------------------####
database = db.Database('MMNL', df)
database.panel("id_ind")
# The following statement allows you to use the names of the variable
# as Python variable.
globals().update(database.variables)
# Parameters to be estimated
# Define two random parameters, normally distributed, designed to be used
# for Monte-Carlo simulation
## Beta 1
B_x1 = Beta('B_x1', 1, None, None, 0)
B_x1_sigma = Beta('B_x1_sigma', 1, None, None, 0)
B_x1_RND = B_x1 + B_x1_sigma * bioDraws('B_x1_RND', 'NORMAL_MLHS_ANTI')
## Beta 2
B_x2 = Beta('B_x2', 1, None, None, 0)
B_x2_sigma = Beta('B_x2_sigma', 1, None, None, 0)
B_x2_RND = B_x2 + B_x2_sigma * bioDraws('B_x2_RND', 'NORMAL_MLHS_ANTI')
# Definition of the utility functions
V1 = B_x1_RND * x1_1 + B_x2_RND * x2_1
V2 = B_x1_RND * x1_2 + B_x2_RND * x2_2
V3 = B_x1_RND * x1_3 + B_x2_RND * x2_3
# Associate utility functions with the numbering of alternatives
V = {1: V1, 2: V2, 3: V3}
# Associate the availability conditions with the alternatives
av = {1: 1, 2: 1, 3: 1}
# Conditional to the random parameters, the likelihood of one observation is
# given by the logit model (called the kernel)
obsprob = models.logit(V, av, CHOICE)
# Conditional to the random parameters, the likelihood of all observations for
# one individual (the trajectory) is the product of the likelihood of
# each observation.
condprobIndiv = PanelLikelihoodTrajectory(obsprob)
# We integrate over the random parameters using Monte-Carlo
logprob = log(MonteCarlo(condprobIndiv))
# Create the Biogeme object
biogeme = bio.BIOGEME(
database, logprob, numberOfDraws=500
)
biogeme.modelName = 'Two_Normally_Distributed_Parameters'
# Estimate the parameters
results = biogeme.estimate()
pandasResults = results.getEstimatedParameters()
print(pandasResults)
panda_get_BetaValues=results.getBetaValues
####---------------------------------------####
####---- INDIVIDUAL LEVEL PARAMETERS ----####
####---------------------------------------####
'''
# Estimate individual level parameters
numerator = MonteCarlo(B_x1_RND * B_x2_RND * logprob) ### ???
denominator = MonteCarlo(logprob)
simulate = {'Numerator': numerator, 'Denominator': denominator}
biosim = bio.BIOGEME(database,simulate,numberOfDraws=1000)
sim = biosim.simulate(results.data.betaValues)
sim['beta'] = sim['Numerator'] / sim['Denominator']
'''