# Retrieve VIX data from 2000-01-01 to current date and convert to a tsibble
<- tq_get("^VIX", from = "2000-01-01",
vix_data to = "2025-01-01",
get = "stock.prices") %>%
as_tsibble(index = date)
# Clean and interpolate missing values
<- vix_data %>%
vix_data ::fill_gaps() %>%
tsibblemutate(adjusted = zoo::na.approx(adjusted)) %>%
select(date, adjusted) %>%
::drop_na() tidyr
Activity12
Model Selection & Diagnostic Criteria
Today, we will walk through a complete model selection exercise using VIX data. Our goals are to:
- Perform Exploratory Data Analysis (EDA) with visual diagnostics.
- Fit built-in models (ARIMA and ETS) using the
Tidyverts
framework. - Evaluate models based on five metrics: AIC, AICc, BIC, Mean Squared Error (MSE), and Root Mean Squared Error (RMSE).
- Conclude on the optimal model by comparing these metrics along with residual diagnostics and forecast performance.
For a model with \(n\) observations and \(k\) parameters (including the intercept), the evaluation criteria are defined as:
\[ \begin{align} \text{AIC} &= n\ln\left(\frac{\text{RSS}}{n}\right) + 2k,\\[1mm] \text{BIC} &= n\ln\left(\frac{\text{RSS}}{n}\right) + k\ln(n),\\[1mm] \text{AICc} &= \text{AIC} + \frac{2k(k+1)}{n-k-1}, \end{align} \]
and the error metrics:
\[ \begin{align} \text{MSE} &= \frac{\text{RSS}}{n},\\[1mm] \text{RMSE} &= \sqrt{\text{MSE}}. \end{align} \]
Data Preparation
We retrieve the VIX data and format as a tsibble
, handling missing values while preserving the original adjusted price series.
Exploratory Data Analysis (EDA)
Time Series Plot
%>%
vix_data autoplot(adjusted) +
labs(title = "VIX Adjusted Prices", y = "Adjusted", x = "Date")
ACF and PACF Analysis
%>%
vix_data ACF(adjusted) %>%
autoplot() +
labs(title = "ACF of VIX Data")
%>%
vix_data PACF(adjusted) %>%
autoplot() +
labs(title = "PACF of VIX Data")
Observation:
The persistence in ACF suggests potential need for differencing, but we’ll let ARIMA automatically handle any required transformations through its \(d\) parameter.
Model Fitting Using Tidyverts
1. ARIMA Model (Original Series)
# Fit an ARIMA model on original series
<- vix_data %>%
model_arima model(ARIMA(adjusted))
# Display summary report
report(model_arima)
Series: adjusted
Model: ARIMA(3,1,1)(0,0,2)[7]
Coefficients:
ar1 ar2 ar3 ma1 sma1 sma2
0.9615 0.0082 -0.0179 -0.9811 0.0286 0.0553
s.e. 0.0168 0.0145 0.0122 0.0133 0.0115 0.0107
sigma^2 estimated as 1.728: log likelihood=-15446.52
AIC=30907.05 AICc=30907.06 BIC=30956.88
# Display accuracy metrics
%>% accuracy() model_arima
# A tibble: 1 × 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA(adjusted) Traini… -0.00181 1.31 0.731 -0.220 3.42 0.356 0.403 -2.97e-4
2. ETS Model
# Fit ETS model on original series
<- vix_data %>%
model_ets model(ETS(adjusted))
# Display summary report
report(model_ets)
Series: adjusted
Model: ETS(M,Ad,M)
Smoothing parameters:
alpha = 0.9998863
beta = 0.0449181
gamma = 0.0001019892
phi = 0.8178318
Initial states:
l[0] b[0] s[0] s[-1] s[-2] s[-3] s[-4] s[-5]
25.49738 -0.7829361 1.00114 0.9990668 0.9956332 1.002539 0.9994978 0.9996608
s[-6]
1.002462
sigma^2: 0.003
AIC AICc BIC
83569.41 83569.45 83661.96
# Display accuracy metrics
%>% accuracy() model_ets
# A tibble: 1 × 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ETS(adjusted) Training -0.000342 1.32 0.732 -0.120 3.43 0.356 0.405 -0.0392
Forecasting with the Optimal Model
# Forecast 12 months ahead using ARIMA
<- model_arima %>%
forecast_arima forecast(h = "12 months")
# Plot forecast in original units
%>%
forecast_arima autoplot(vix_data) +
labs(title = "12-Month VIX Forecast", y = "Adjusted VIX", x = "Date")
Lab Activity: Modeling Stock Returns with ARIMA
Model daily SPY returns using ARIMA, analyze residuals, and compare with ETS.
Tasks
- Retrieve SPY data (2015-01-01 to present) and compute daily log returns
- Perform EDA (time series plot, ACF/PACF)
- Fit ARIMA(2,0,1) and ETS models to returns
- Compare models using AIC and RMSE
- Forecast 1-month ahead returns
Solution
# Task 1: Data Preparation
<- tq_get("SPY", from = "2015-01-01") %>%
spy_data as_tsibble(index = date) %>%
::fill_gaps() %>%
tsibblemutate(adjusted = na.approx(adjusted),
log_return = difference(log(adjusted))) %>%
select(date, adjusted, log_return) %>%
::drop_na()
tidyr
# Task 2: EDA
%>% autoplot(log_return) +
spy_data labs(title = "SPY Daily Log Returns", y = "Return")
%>% ACF(log_return) %>% autoplot() spy_data
%>% PACF(log_return) %>% autoplot() spy_data
# Task 3: Model Fitting
<- spy_data %>%
fit_arima model(ARIMA(log_return ~ pdq(2,0,1)))
<- spy_data %>%
fit_ets model(ETS(log_return))
# Task 4: Model Comparison
glance(fit_arima) %>% select(AIC, AICc, BIC)
# A tibble: 1 × 3
AIC AICc BIC
<dbl> <dbl> <dbl>
1 -25033. -25033. -24983.
glance(fit_ets) %>% select(AIC, AICc, BIC)
# A tibble: 1 × 3
AIC AICc BIC
<dbl> <dbl> <dbl>
1 -4947. -4947. -4928.
accuracy(fit_arima)
# A tibble: 1 × 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ARIMA(log_ret… Trai… -1.52e-6 0.00838 0.00504 NaN Inf 0.694 0.710 -4.44e-4
accuracy(fit_ets)
# A tibble: 1 × 10
.model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ETS(log_return) Train… 3.35e-6 0.00843 0.00503 -Inf Inf 0.692 0.714 -0.0176
# Task 5: Forecasting
%>%
fit_arima forecast(h = 30) %>%
autoplot(spy_data) +
labs(title = "30-Day SPY Return Forecast", y = "Log Return")