- Using STAN for MAP estimation
- Multivariate time series models
- DFA models
- Writing our own Stan code
11 Feb 2021
bayes_fit = fit_stan(y = SalmonSurvCUI$logit.s, model_name="dlm-intercept")
map_fit = fit_stan(y = SalmonSurvCUI$logit.s, model_name="dlm-intercept", map_estimation=TRUE)
print(map_fit)
## $par ## x0 beta[1] pro_dev[1] pro_dev[2] pro_dev[3] ## -3.460000e+00 -1.503290e+00 1.046740e-01 -1.943951e-01 4.112205e-01 ## pro_dev[4] pro_dev[5] pro_dev[6] pro_dev[7] pro_dev[8] ## -4.336506e-01 1.943952e-01 -4.336507e-01 -1.943952e-01 -4.710349e-01 ## pro_dev[9] pro_dev[10] pro_dev[11] pro_dev[12] pro_dev[13] ## -6.205692e-01 7.850578e-01 8.074876e-01 -1.659836e+00 4.186974e-01 ## pro_dev[14] pro_dev[15] pro_dev[16] pro_dev[17] pro_dev[18] ## 4.261744e-01 -2.542092e-01 -1.719652e-01 1.345815e-01 5.607551e-01 ## pro_dev[19] pro_dev[20] pro_dev[21] pro_dev[22] pro_dev[23] ## -2.317783e-01 -6.729076e-02 3.514065e-01 -8.074876e-01 8.972066e-02 ## pro_dev[24] pro_dev[25] pro_dev[26] pro_dev[27] pro_dev[28] ## 1.271049e-01 -1.869187e-01 6.729066e-01 -7.476740e-01 -6.729063e-01 ## pro_dev[29] pro_dev[30] pro_dev[31] pro_dev[32] pro_dev[33] ## 6.729059e-02 8.972085e-01 -5.233691e-02 -5.308487e-01 1.375720e+00 ## pro_dev[34] pro_dev[35] pro_dev[36] pro_dev[37] pro_dev[38] ## -8.859995e-08 4.261745e-01 -1.046745e-01 -2.841160e-01 -4.486054e-02 ## pro_dev[39] pro_dev[40] pro_dev[41] sigma_process sigma_obs ## -8.897319e-01 2.542093e-01 -5.607554e-01 1.337481e+00 5.537342e-07 ## pred[1] pred[2] pred[3] pred[4] pred[5] ## -3.460000e+00 -3.320000e+00 -3.580000e+00 -3.030000e+00 -3.610000e+00 ## pred[6] pred[7] pred[8] pred[9] pred[10] ## -3.350000e+00 -3.930000e+00 -4.190000e+00 -4.820000e+00 -5.650000e+00 ## pred[11] pred[12] pred[13] pred[14] pred[15] ## -4.600000e+00 -3.520000e+00 -5.740000e+00 -5.180000e+00 -4.610000e+00 ## pred[16] pred[17] pred[18] pred[19] pred[20] ## -4.950000e+00 -5.180000e+00 -5.000000e+00 -4.250000e+00 -4.560000e+00 ## pred[21] pred[22] pred[23] pred[24] pred[25] ## -4.650000e+00 -4.180000e+00 -5.260000e+00 -5.140000e+00 -4.970000e+00 ## pred[26] pred[27] pred[28] pred[29] pred[30] ## -5.220000e+00 -4.320000e+00 -5.320000e+00 -6.220000e+00 -6.130000e+00 ## pred[31] pred[32] pred[33] pred[34] pred[35] ## -4.930000e+00 -5.000000e+00 -5.710000e+00 -3.870000e+00 -3.870000e+00 ## pred[36] pred[37] pred[38] pred[39] pred[40] ## -3.300000e+00 -3.440000e+00 -3.820000e+00 -3.880000e+00 -5.070000e+00 ## pred[41] pred[42] intercept[1] intercept[2] intercept[3] ## -4.730000e+00 -5.480000e+00 -3.460000e+00 -3.320000e+00 -3.580000e+00 ## intercept[4] intercept[5] intercept[6] intercept[7] intercept[8] ## -3.030000e+00 -3.610000e+00 -3.350000e+00 -3.930000e+00 -4.190000e+00 ## intercept[9] intercept[10] intercept[11] intercept[12] intercept[13] ## -4.820000e+00 -5.650000e+00 -4.600000e+00 -3.520000e+00 -5.740000e+00 ## intercept[14] intercept[15] intercept[16] intercept[17] intercept[18] ## -5.180000e+00 -4.610000e+00 -4.950000e+00 -5.180000e+00 -5.000000e+00 ## intercept[19] intercept[20] intercept[21] intercept[22] intercept[23] ## -4.250000e+00 -4.560000e+00 -4.650000e+00 -4.180000e+00 -5.260000e+00 ## intercept[24] intercept[25] intercept[26] intercept[27] intercept[28] ## -5.140000e+00 -4.970000e+00 -5.220000e+00 -4.320000e+00 -5.320000e+00 ## intercept[29] intercept[30] intercept[31] intercept[32] intercept[33] ## -6.220000e+00 -6.130000e+00 -4.930000e+00 -5.000000e+00 -5.710000e+00 ## intercept[34] intercept[35] intercept[36] intercept[37] intercept[38] ## -3.870000e+00 -3.870000e+00 -3.300000e+00 -3.440000e+00 -3.820000e+00 ## intercept[39] intercept[40] intercept[41] intercept[42] log_lik[1] ## -3.880000e+00 -5.070000e+00 -4.730000e+00 -5.480000e+00 1.326919e+01 ## log_lik[2] log_lik[3] log_lik[4] log_lik[5] log_lik[6] ## 1.348558e+01 1.348143e+01 1.343850e+01 1.346689e+01 1.348198e+01 ## log_lik[7] log_lik[8] log_lik[9] log_lik[10] log_lik[11] ## 1.340442e+01 1.338355e+01 1.341164e+01 1.348663e+01 1.337676e+01 ## log_lik[12] log_lik[13] log_lik[14] log_lik[15] log_lik[16] ## 1.348383e+01 1.341941e+01 1.342276e+01 1.346445e+01 1.348422e+01 ## log_lik[17] log_lik[18] log_lik[19] log_lik[20] log_lik[21] ## 1.335915e+01 1.348345e+01 1.326469e+01 1.317618e+01 1.334588e+01 ## log_lik[22] log_lik[23] log_lik[24] log_lik[25] log_lik[26] ## 1.348756e+01 1.340756e+01 1.348373e+01 1.326981e+01 1.348561e+01 ## log_lik[27] log_lik[28] log_lik[29] log_lik[30] log_lik[31] ## 1.346174e+01 1.348731e+01 1.337548e+01 1.343007e+01 1.348366e+01 ## log_lik[32] log_lik[33] log_lik[34] log_lik[35] log_lik[36] ## 1.333669e+01 1.348752e+01 1.342055e+01 1.331926e+01 1.342071e+01 ## log_lik[37] log_lik[38] log_lik[39] log_lik[40] log_lik[41] ## 1.348268e+01 1.348710e+01 1.346313e+01 1.346517e+01 1.347789e+01 ## log_lik[42] ## 1.346525e+01 ## ## $value ## [1] 595.6098 ## ## $return_code ## [1] 0 ## ## $theta_tilde ## x0 beta[1] pro_dev[1] pro_dev[2] pro_dev[3] pro_dev[4] pro_dev[5] ## [1,] -3.46 -1.50329 0.104674 -0.1943951 0.4112205 -0.4336506 0.1943952 ## pro_dev[6] pro_dev[7] pro_dev[8] pro_dev[9] pro_dev[10] pro_dev[11] ## [1,] -0.4336507 -0.1943952 -0.4710349 -0.6205692 0.7850578 0.8074876 ## pro_dev[12] pro_dev[13] pro_dev[14] pro_dev[15] pro_dev[16] pro_dev[17] ## [1,] -1.659836 0.4186974 0.4261744 -0.2542092 -0.1719652 0.1345815 ## pro_dev[18] pro_dev[19] pro_dev[20] pro_dev[21] pro_dev[22] pro_dev[23] ## [1,] 0.5607551 -0.2317783 -0.06729076 0.3514065 -0.8074876 0.08972066 ## pro_dev[24] pro_dev[25] pro_dev[26] pro_dev[27] pro_dev[28] pro_dev[29] ## [1,] 0.1271049 -0.1869187 0.6729066 -0.747674 -0.6729063 0.06729059 ## pro_dev[30] pro_dev[31] pro_dev[32] pro_dev[33] pro_dev[34] pro_dev[35] ## [1,] 0.8972085 -0.05233691 -0.5308487 1.37572 -8.859995e-08 0.4261745 ## pro_dev[36] pro_dev[37] pro_dev[38] pro_dev[39] pro_dev[40] pro_dev[41] ## [1,] -0.1046745 -0.284116 -0.04486054 -0.8897319 0.2542093 -0.5607554 ## sigma_process sigma_obs pred[1] pred[2] pred[3] pred[4] pred[5] pred[6] ## [1,] 1.337481 5.537342e-07 -3.46 -3.32 -3.58 -3.03 -3.61 -3.35 ## pred[7] pred[8] pred[9] pred[10] pred[11] pred[12] pred[13] pred[14] ## [1,] -3.93 -4.19 -4.82 -5.65 -4.6 -3.52 -5.74 -5.18 ## pred[15] pred[16] pred[17] pred[18] pred[19] pred[20] pred[21] pred[22] ## [1,] -4.61 -4.95 -5.18 -5 -4.25 -4.56 -4.65 -4.18 ## pred[23] pred[24] pred[25] pred[26] pred[27] pred[28] pred[29] pred[30] ## [1,] -5.26 -5.14 -4.97 -5.22 -4.32 -5.32 -6.22 -6.13 ## pred[31] pred[32] pred[33] pred[34] pred[35] pred[36] pred[37] pred[38] ## [1,] -4.93 -5 -5.71 -3.87 -3.87 -3.3 -3.44 -3.82 ## pred[39] pred[40] pred[41] pred[42] intercept[1] intercept[2] intercept[3] ## [1,] -3.88 -5.07 -4.73 -5.48 -3.46 -3.32 -3.58 ## intercept[4] intercept[5] intercept[6] intercept[7] intercept[8] ## [1,] -3.03 -3.61 -3.35 -3.93 -4.19 ## intercept[9] intercept[10] intercept[11] intercept[12] intercept[13] ## [1,] -4.82 -5.65 -4.6 -3.52 -5.74 ## intercept[14] intercept[15] intercept[16] intercept[17] intercept[18] ## [1,] -5.18 -4.61 -4.95 -5.18 -5 ## intercept[19] intercept[20] intercept[21] intercept[22] intercept[23] ## [1,] -4.25 -4.56 -4.65 -4.18 -5.26 ## intercept[24] intercept[25] intercept[26] intercept[27] intercept[28] ## [1,] -5.14 -4.97 -5.22 -4.32 -5.32 ## intercept[29] intercept[30] intercept[31] intercept[32] intercept[33] ## [1,] -6.22 -6.13 -4.93 -5 -5.71 ## intercept[34] intercept[35] intercept[36] intercept[37] intercept[38] ## [1,] -3.87 -3.87 -3.3 -3.44 -3.82 ## intercept[39] intercept[40] intercept[41] intercept[42] log_lik[1] ## [1,] -3.88 -5.07 -4.73 -5.48 13.26919 ## log_lik[2] log_lik[3] log_lik[4] log_lik[5] log_lik[6] log_lik[7] ## [1,] 13.48558 13.48143 13.4385 13.46689 13.48198 13.40442 ## log_lik[8] log_lik[9] log_lik[10] log_lik[11] log_lik[12] log_lik[13] ## [1,] 13.38355 13.41164 13.48663 13.37676 13.48383 13.41941 ## log_lik[14] log_lik[15] log_lik[16] log_lik[17] log_lik[18] log_lik[19] ## [1,] 13.42276 13.46445 13.48422 13.35915 13.48345 13.26469 ## log_lik[20] log_lik[21] log_lik[22] log_lik[23] log_lik[24] log_lik[25] ## [1,] 13.17618 13.34588 13.48756 13.40756 13.48373 13.26981 ## log_lik[26] log_lik[27] log_lik[28] log_lik[29] log_lik[30] log_lik[31] ## [1,] 13.48561 13.46174 13.48731 13.37548 13.43007 13.48366 ## log_lik[32] log_lik[33] log_lik[34] log_lik[35] log_lik[36] log_lik[37] ## [1,] 13.33669 13.48752 13.42055 13.31926 13.42071 13.48268 ## log_lik[38] log_lik[39] log_lik[40] log_lik[41] log_lik[42] ## [1,] 13.4871 13.46313 13.46517 13.47789 13.46525
map_fit$return_code
## [1] 0
map_fit$value
## [1] 595.6098
grep("pred",names(map_fit$par))
## [1] 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 ## [26] 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
pred = map_fit$par[grep("pred",names(map_fit$par))]
grep("pred",names(map_fit$par))
## [1] 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 ## [26] 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
pred = map_fit$par[grep("pred",names(map_fit$par))]
grep("pred",names(map_fit$par))
## [1] 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 ## [26] 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
pred = map_fit$par[grep("pred",names(map_fit$par))]
map_fit = fit_stan(y = SalmonSurvCUI$logit.s, model_name="dlm-intercept", map_estimation=TRUE, hessian=TRUE)
map_fit = fit_stan(y = SalmonSurvCUI$logit.s, model_name="dlm-intercept", map_estimation=TRUE, hessian=TRUE, algorithm="BFGS")
devtools::install_github("nwfsc-timeseries/tvvarss")
family
argument defaults to Gaussian, but can be many otherstvvarss::tvvarss(y = y, family = "poisson",...)
tvvarss::tvvarss(y = y, shared_r = R, shared_q = Q,...)
Z
in MARSS and maps time series to latent state processestvvarss::tvvarss(y = y, shared_r = R, shared_q = Q,...)
tvvarss::tvvarss(y = y, dynamicB = TRUE,...)
atsar
bundled with other DFA code we’ve developeddevtools::install_github("fate-ewi/bayesdfa")
Solution 1: priors for identifiability
Solution 2: post-hoc ‘chain flipping’ before convergence tests run
data("harborSealWA")
We’ll extract predictions from the best model,
fit = bayesdfa::fit_dfa(y = t(harborSealWA[,-1]), num_trends = 1)
And as an stanfit
object, we can extract summaries of states x
pars = extract(fit$model)
bayesdfa::plot_fitted(fit) + theme_bw()
\[x_{t}=x_{t-1}+\delta_{t-1}\] \[\delta_{t-1} \sim Normal(0,q)\]
\[\delta_{t-1} \sim Student-t(\nu,0,q)\]
coho = readRDS("coho_mean_length.rds") # rename variables coho=dplyr::rename(coho, obs=mean_length,time=recovery_year,ts=fishery) fit = fit_dfa(y = coho, data_shape="long",estimate_nu = TRUE)
pars = extract(fit$model)
(source: Wikipedia)
AR models (conventional DFA) may not be well suited
Alternative approach: use Gaussian Process models to model smooth trends
Feddern et al. 2021 (Global Change Biology)
What’s a Gaussian Process model?
Instead of modeling process \[x_{t} = x_{t-1}...\]
We model the covariance of the \(\textbf{x}\)
But there are a lot of \(\textbf{x}\)!!
For model with \(T\) timesteps
\(\Sigma\) has \(T*(T-1)/2\) parameters
GP models implement covariance functions, e.g. exponential, Gaussian, Matern
Model covariance as function of space or distance in time
\[\Sigma_{t,t+3} = \sigma^2exp(\frac{(t-(t+3))^2}{2\tau^2})\]
fit_dfa(y= y, num_trends = 2, ..., trend_model = c("gp"))
DFA extension 3: alternate constraints on loadings matrix
Conventional constraints on loadings matrix
## [,1] [,2] [,3] ## [1,] -0.56047565 0.0000000 0.0000000 ## [2,] -0.23017749 0.4609162 0.0000000 ## [3,] 1.55870831 -1.2650612 0.4007715 ## [4,] 0.07050839 -0.6868529 0.1106827 ## [5,] 0.12928774 -0.4456620 -0.5558411
## [,1] [,2] [,3] ## [1,] 0.6871757 0.24516978 0.06765452 ## [2,] 0.1491745 0.06014499 0.79068047 ## [3,] 0.3885419 0.59306319 0.01839489 ## [4,] 0.2919682 0.30545457 0.40257721 ## [5,] 0.3760087 0.33262301 0.29136825
\[\textbf{Y} = \textbf{Zx}\]
fit_dfa(y = y, num_trends = 2, ..., z_model = "proportion")
data { int<lower=0> N; int<lower=0> K; real y[N]; int P; int y_int[N]; matrix[N, K] x; }
transformed data { int zeros[N]; for(i in 1:N) { zeros[i] = 0; } }
parameters { real x0; vector[K] beta0; vector[K] pro_dev[N-1]; real<lower=0> sigma_process[K]; real<lower=0> sigma_obs; }
parameters { real x0; vector[K] beta0; vector[K] pro_dev[N-1]; real<lower=0> sigma_process[K]; real<lower=0> sigma_obs; }
transformed parameters { vector[N] pred; pred[1] = x0; for(i in 2:N) { pred[i] = phi*pred[i-1] + sigma_process*pro_dev[i-1]; } }
model { x0 ~ normal(0,10); phi ~ normal(0,1); sigma_process ~ student_t(3,0,2); sigma_obs ~ student_t(3,0,2); pro_dev ~ std_normal(); for(i in 1:N) { y[i] ~ normal(pred[i], sigma_obs); } }
generated quantities { vector[N] log_lik; for (n in 1:N) log_lik[n] = normal_lpdf(y[n] | pred[n], sigma_obs); }
Ok – let’s do this in practice
Problem # 1: let’s take our state space random walk model and modify it to better handle extreme events with the Student - t distribution
Mathematically
\[\delta_{t-1} \sim Normal(0,q)\] becomes
\[\delta_{t-1} \sim Student-t(\nu,0,q)\]
You could do this in RStudio (File -> New File -> Stan File)
Scripts for the atsar
package linked below
https://github.com/nwfsc-timeseries/atsar/tree/master/inst/stan
2 things need to change
Change process error deviations from Normal to Student-t distribution
Add \(\nu\) as a parameter, with constraints (> 2)
parameters { real<lower=2> nu; ... }
model{ ... nu ~ student_t(3, 2, 3); pro_dev ~ student_t(nu, 0, 1); //pro_dev ~ std_normal(); ... }
y = gme$Close N = length(y) n_pos = length(which(!is.na(y))) pos_indx = c(which(!is.na(y)),0,0) fit = stan(file = "ss_ar_t.stan", data = list(y = y, N = N, n_pos = n_pos, pos_indx = pos_indx))