This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.
Load the required libraries.
Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.
nobs <- 5000
tobs <- 5
alpha_d <- -1.9
beta_d0 <- 4.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)
alpha_s <- 2.8
beta_s0 <- 1.2
beta_s <- c(0.65)
eta_s <- c(1.15, 4.2)
sigma_d <- 1
sigma_s <- 1
rho_ds <- 0.5
seed <- 443
eq_data <- simulate_model_data(
"eq_fiml", nobs, tobs,
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA, NA, c(NA),
sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
seed = seed
)
Prepare the basic parameters for model initialization.
key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
use_correlated_shocks <- TRUE
Using the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.
eq2sls <- new(
"eq_2sls",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
verbose = verbose
)
#> Info: This is 'Equilibrium 2SLS with independent shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#>
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
eqfiml <- new(
"eq_fiml",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium FIML with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#>
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
"diseq_basic",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#>
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
damdl <- new(
"diseq_deterministic_adjustment",
key_columns, time_column,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data,
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 5000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 10003 rows in excess supply and 9997 in excess demand regime.
Set the estimation parameters.
Estimate the models.
eq2sls <- estimate(eq2sls)
eqfiml_est <- estimate(eqfiml,
control = optimization_controls,
method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
control = optimization_controls,
method = optimization_method
)
damdl_est <- estimate(damdl,
control = optimization_controls,
method = optimization_method
)
All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.
summary(eq2sls@first_stage_model)
#>
#> Call:
#> lm(formula = first_stage_formula, data = object@model_tibble)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.85453 -0.14407 0.00004 0.14368 0.78798
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.761849 0.017106 44.54 <2e-16 ***
#> Xd1 0.448077 0.003055 146.68 <2e-16 ***
#> Xd2 -0.144864 0.003056 -47.41 <2e-16 ***
#> X1 0.505181 0.003026 166.92 <2e-16 ***
#> X2 0.437596 0.003072 142.44 <2e-16 ***
#> Xs1 -0.139496 0.003057 -45.63 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.215 on 19994 degrees of freedom
#> Multiple R-squared: 0.7873, Adjusted R-squared: 0.7873
#> F-statistic: 1.48e+04 on 5 and 19994 DF, p-value: < 2.2e-16
summary(eq2sls@system_model)
#>
#> systemfit results
#> method: 2SLS
#>
#> N DF SSR detRCov OLS-R2 McElroy-R2
#> system 40000 39989 30544.3 8.5e-05 0.923359 0.846725
#>
#> N DF SSR MSE RMSE R2 Adj R2
#> demand 20000 19994 15271.1 0.763782 0.873946 0.923364 0.923345
#> supply 20000 19995 15273.3 0.763855 0.873988 0.923353 0.923338
#>
#> The covariance matrix of the residuals
#> demand supply
#> demand 0.763782 0.763763
#> supply 0.763763 0.763855
#>
#> The correlations of the residuals
#> demand supply
#> demand 1.000000 0.999927
#> supply 0.999927 1.000000
#>
#>
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P_FITTED + Xd1 + Xd2 + X1 + X2
#> <environment: 0x55cc6a3f7b28>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55cc6a3f7b28>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 4.8762033 0.0722859 67.4571 < 2.22e-16 ***
#> P_FITTED -1.8442136 0.0890651 -20.7064 < 2.22e-16 ***
#> Xd1 2.0524139 0.0418253 49.0712 < 2.22e-16 ***
#> Xd2 -0.6858288 0.0179589 -38.1887 < 2.22e-16 ***
#> X1 3.5025023 0.0466818 75.0293 < 2.22e-16 ***
#> X2 6.2117062 0.0409079 151.8463 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.873946 on 19994 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19994
#> SSR: 15271.060811 MSE: 0.763782 Root MSE: 0.873946
#> Multiple R-Squared: 0.923364 Adjusted R-Squared: 0.923345
#>
#>
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P_FITTED + Xs1 + X1 + X2
#> <environment: 0x55cc6a3f7b28>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55cc6a3f7b28>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1.3095281 0.0673668 19.4388 < 2.22e-16 ***
#> P_FITTED 2.7509924 0.0263305 104.4792 < 2.22e-16 ***
#> Xs1 0.6409146 0.0129713 49.4101 < 2.22e-16 ***
#> X1 1.1808148 0.0181389 65.0984 < 2.22e-16 ***
#> X2 4.2008260 0.0170625 246.2027 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.873988 on 19995 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19995
#> SSR: 15273.281424 MSE: 0.763855 Root MSE: 0.873988
#> Multiple R-Squared: 0.923353 Adjusted R-Squared: 0.923338
bbmle::summary(eqfiml_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", start = c(D_P = 0.248904000229706, D_CONST = 4.01094480918336,
#> D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488, D_X1 = 2.44419761908979,
#> D_X2 = 5.29620369779804, S_P = 1.70876661459514, S_CONST = 2.885697667298,
#> S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901, S_X2 = 4.66113245104969,
#> D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -1.847620 0.102162 -18.085 < 2.2e-16 ***
#> D_CONST 4.845601 0.079952 60.606 < 2.2e-16 ***
#> D_Xd1 2.057105 0.047842 42.998 < 2.2e-16 ***
#> D_Xd2 -0.676054 0.019563 -34.558 < 2.2e-16 ***
#> D_X1 3.503609 0.053540 65.439 < 2.2e-16 ***
#> D_X2 6.213180 0.046915 132.435 < 2.2e-16 ***
#> S_P 2.750472 0.030038 91.567 < 2.2e-16 ***
#> S_CONST 1.311502 0.076848 17.066 < 2.2e-16 ***
#> S_Xs1 0.640863 0.014796 43.313 < 2.2e-16 ***
#> S_X1 1.180568 0.020695 57.045 < 2.2e-16 ***
#> S_X2 4.201078 0.019466 215.811 < 2.2e-16 ***
#> D_VARIANCE 1.001167 0.024052 41.625 < 2.2e-16 ***
#> S_VARIANCE 0.994439 0.011789 84.353 < 2.2e-16 ***
#> RHO 0.510389 0.017863 28.572 < 2.2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 46374.23
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706,
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488,
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514,
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901,
#> S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P 0.049812 0.053832 0.9253 0.3548000
#> D_CONST 4.014641 0.115685 34.7033 < 2.2e-16 ***
#> D_Xd1 1.322024 0.036993 35.7369 < 2.2e-16 ***
#> D_Xd2 -0.455326 0.019782 -23.0177 < 2.2e-16 ***
#> D_X1 2.577838 0.037536 68.6762 < 2.2e-16 ***
#> D_X2 5.390141 0.034642 155.5962 < 2.2e-16 ***
#> S_P 1.761937 0.105878 16.6412 < 2.2e-16 ***
#> S_CONST 2.836331 0.359832 7.8824 3.212e-15 ***
#> S_Xs1 1.119391 0.086554 12.9329 < 2.2e-16 ***
#> S_X1 1.575385 0.091235 17.2674 < 2.2e-16 ***
#> S_X2 4.613717 0.082291 56.0662 < 2.2e-16 ***
#> D_VARIANCE 0.822471 0.017428 47.1932 < 2.2e-16 ***
#> S_VARIANCE 1.372021 0.090718 15.1240 < 2.2e-16 ***
#> RHO 0.204999 0.056839 3.6066 0.0003102 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 51245.47
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706,
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488,
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514,
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901,
#> S_X2 = 4.66113245104969, P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -1.849202 0.102147 -18.1034 <2e-16 ***
#> D_CONST 4.860420 0.082592 58.8486 <2e-16 ***
#> D_Xd1 2.054982 0.047717 43.0664 <2e-16 ***
#> D_Xd2 -0.675406 0.019519 -34.6019 <2e-16 ***
#> D_X1 3.501842 0.053401 65.5767 <2e-16 ***
#> D_X2 6.211275 0.046794 132.7369 <2e-16 ***
#> S_P 2.757014 0.030859 89.3424 <2e-16 ***
#> S_CONST 1.284230 0.081551 15.7476 <2e-16 ***
#> S_Xs1 0.640862 0.014799 43.3037 <2e-16 ***
#> S_X1 1.181013 0.020696 57.0636 <2e-16 ***
#> S_X2 4.200912 0.019469 215.7692 <2e-16 ***
#> P_DIFF -0.013436 0.013574 -0.9898 0.3223
#> D_VARIANCE 0.999927 0.023931 41.7841 <2e-16 ***
#> S_VARIANCE 0.994434 0.011789 84.3560 <2e-16 ***
#> RHO 0.511222 0.017821 28.6869 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 46373.32
The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.
sim_coef <- c(
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA,
sigma_d, sigma_s,
rho_ds
)
names(sim_coef) <- names(damdl_est@coef)
dm_inc <- eq2sls@system_model$coefficients[
grep(
"demand",
names(eq2sls@system_model$coefficients)
)
]
sp_inc <- eq2sls@system_model$coefficients[
grep(
"supply",
names(eq2sls@system_model$coefficients)
)
]
lm_coef <- c(
dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
NA,
NA, NA,
NA
)
eqfiml_coef <- append(
eqfiml_est@coef, c(NA),
after = which(names(eqfiml_est@coef) ==
get_prefixed_variance_variable(eqfiml@system@demand)) - 1
)
bsmdl_coef <- append(
bsmdl_est@coef, c(NA),
after = which(names(bsmdl_est@coef) ==
get_prefixed_variance_variable(bsmdl@system@demand)) - 1
)
damdl_coef <- damdl_est@coef
comp <- tibble::tibble(
parameter = names(sim_coef),
sim = sim_coef, lm = lm_coef, fi = eqfiml_coef,
bm = bsmdl_coef, da = damdl_coef,
lmerr = abs(lm_coef - sim_coef), fierr = abs(eqfiml_coef - sim_coef),
bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#> parameter sim lm fi bm da lmerr fierr bmerr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 D_P -1.9 -1.84 -1.85 0.0498 -1.85 5.58e-2 0.0524 1.95
#> 2 D_CONST 4.9 4.88 4.85 4.01 4.86 2.38e-2 0.0544 0.885
#> 3 D_Xd1 2.1 2.05 2.06 1.32 2.05 4.76e-2 0.0429 0.778
#> 4 D_Xd2 -0.7 -0.686 -0.676 -0.455 -0.675 1.42e-2 0.0239 0.245
#> 5 D_X1 3.5 3.50 3.50 2.58 3.50 2.50e-3 0.00361 0.922
#> 6 D_X2 6.25 6.21 6.21 5.39 6.21 3.83e-2 0.0368 0.860
#> 7 S_P 2.8 2.75 2.75 1.76 2.76 4.90e-2 0.0495 1.04
#> 8 S_CONST 1.2 1.31 1.31 2.84 1.28 1.10e-1 0.112 1.64
#> 9 S_Xs1 0.65 0.641 0.641 1.12 0.641 9.09e-3 0.00914 0.469
#> 10 S_X1 1.15 1.18 1.18 1.58 1.18 3.08e-2 0.0306 0.425
#> 11 S_X2 4.2 4.20 4.20 4.61 4.20 8.26e-4 0.00108 0.414
#> 12 P_DIFF NA NA NA NA -0.0134 NA NA NA
#> 13 D_VARIAN… 1 NA 1.00 0.822 1.00 NA 0.00117 0.178
#> 14 S_VARIAN… 1 NA 0.994 1.37 0.994 NA 0.00556 0.372
#> 15 RHO 0.5 NA 0.510 0.205 0.511 NA 0.0104 0.295
#> # … with 1 more variable: daerr <dbl>
Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.
comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#> lmerr fierr bmerr daerr
#> 0.03467257 0.03092702 0.74766291 0.02754986
Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.
model_names <- c(
eqfiml@model_type_string,
bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
get_number_of_observations(eqfiml),
get_number_of_observations(bsmdl),
get_number_of_observations(damdl)
)
model_errors <- c(
comp_means["fierr"],
comp_means["bmerr"],
comp_means["daerr"]
)
seltbl <- AIC(eqfiml_est, bsmdl_est, damdl_est) %>%
tibble::add_column(Model = model_names, .before = 1) %>%
tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
dplyr::rename(D.F. = df) %>%
dplyr::arrange(AIC)
seltbl
#> Model D.F. AIC Obs. Mean Error
#> eqfiml_est Equilibrium FIML 14 46402.23 20000 0.03092702
#> damdl_est Deterministic Adjustment 15 46403.32 20000 0.02754986
#> bsmdl_est Basic 14 51273.47 20000 0.74766291