pacman::p_load(
here, # file locator
tidyverse, # data management and ggplot2 graphics
skimr, # get overview of data
janitor, # produce and adorn tabulations and cross-tabulations
tsibble,
fable,
feasts,
fabletools,
lubridate
)
covid_data <- readRDS(here("data", "clean", "final_covid_data.rds"))Univariate prediction
Load packages:
For each period to study, we will train three different models:
- Attempt 1: using ARIMA function from fable package with stepwise (much faster)
- Attempt 2: using ARIMA function from fable package without stepwise
- Attempt 3: using Snaive function from fable package
Before sixth epidemiological period
On one hand, we are going to train the model with data previous the start of the sixth wave of coronavirus (14 oct 2021).
On the other hand, we are going to train the model with data as of June 14th which is considered the beginning of the second epidemiological period in Spain.
Asturias
data_asturias <- covid_data %>%
filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-06-15 0 0 16 -6 39
2 Asturias 2020-06-16 1 0 15.6 -6 7
3 Asturias 2020-06-17 0 0 15.6 -7 20
4 Asturias 2020-06-18 0 0 15.1 -4 68
5 Asturias 2020-06-19 0 0 14.8 -4 52
6 Asturias 2020-06-20 0 0 16.8 -10 71
7 Asturias 2020-06-21 0 0 18.2 -5.5 46
8 Asturias 2020-06-22 0 0 18.2 -1 99
9 Asturias 2020-06-23 0 0 20 -2 129
10 Asturias 2020-06-24 0 0 18.2 -9 63
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_asturias_train <- data_asturias %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
data_asturias_test <- data_asturias %>%
filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
lambda_asturias <- data_asturias %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_asturias_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Asturias <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(2,1,4)> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.944 -673. 1360. 1360. 1390. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.910 -664. 1343. 1343. 1372. <cpl [2]> <cpl [4]>
3 Asturias Snaive 2.32 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(2,1,4)> 0.910 -664. 1343. 1343. 1372.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.944 -673. 1360. 1360. 1390.
3 Snaive <SNAIVE> 2.32 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 10.6 0.158
2 Asturias arima_at2 8.11 0.323
3 Asturias Snaive 629. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 33.1 0.00282
2 Asturias arima_at2 23.6 0.0509
3 Asturias Snaive 892. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 49.3 0.000460
2 Asturias arima_at2 33.3 0.0433
3 Asturias Snaive 927. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 118. 0.0242
2 Asturias arima_at2 86.5 0.584
3 Asturias Snaive 1195. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 90)7 days
# Plots
fc_h7 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Asturias - forecast h7")
fc_h7 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 5.64 10.3 7.00 15.7 28.4 0.141 0.126 0.214
2 arima_at2 Asturias Test 6.06 9.97 7.28 18.4 30.5 0.147 0.121 0.385
3 Snaive Asturias Test 2.41 14.0 9.98 -10.1 49.0 0.201 0.171 0.00766
14 days
# Plots
fc_h14 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Asturias - forecast h14")
fc_h14 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 10.5 14.3 11.1 29.6 35.9 0.224 0.174 0.172
2 arima_at2 Asturias Test 10.9 14.2 11.5 32.1 38.1 0.232 0.173 0.303
3 Snaive Asturias Test 6.69 16.0 11.4 8.17 41.5 0.229 0.195 -0.156
21 days
# Plots
fc_h21 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Asturias - forecast h21")
fc_h21 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 9.79 13.5 10.5 27.5 32.9 0.211 0.164 0.0331
2 arima_at2 Asturias Test 10.6 13.7 11.0 31.0 35.1 0.222 0.167 0.126
3 Snaive Asturias Test 5.77 15.6 11.5 6.52 40.8 0.232 0.190 -0.223
90 days
# Plots
fc_h90 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
) +
labs(y = "Nº cases", title = "Asturias - forecast h90")
fc_h90 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 704. 1211. 704. 67.5 70.0 14.2 14.8 0.925
2 arima_at2 Asturias Test 713. 1221. 714. 71.1 72.9 14.4 14.9 0.926
3 Snaive Asturias Test 702. 1213. 704. 60.0 71.7 14.2 14.8 0.925
Barcelona
data_barcelona <- covid_data %>%
filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-06-15 62 3 21.2 -9 17
2 Barcelona 2020-06-16 66 3 20.8 -10 -16
3 Barcelona 2020-06-17 70 4 20.3 -4 14
4 Barcelona 2020-06-18 68 4 18.8 -8 -6
5 Barcelona 2020-06-19 65 4 20.4 -5 10
6 Barcelona 2020-06-20 53 4 21.8 -12 15
7 Barcelona 2020-06-21 38 1 22.8 -18 18
8 Barcelona 2020-06-22 69 5 23.8 5 24
9 Barcelona 2020-06-23 95 3 23.4 19 28
10 Barcelona 2020-06-24 44 4 23.6 -71 28
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_barcelona_train <- data_barcelona %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_barcelona_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
data_barcelona_test <- data_barcelona %>%
filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_barcelona_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
lambda_Barcelona <- data_barcelona %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_barcelona_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Barcelona <ARIMA(0,1,3)(0,1,2)[7]> <ARIMA(1,1,2)(0,1,2)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.196 -293. 597. 598. 622. <cpl [0]> <cpl [17]>
2 Barcelona arima_at2 0.188 -283. 579. 579. 604. <cpl [1]> <cpl [16]>
3 Barcelona Snaive 0.964 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,1,2)(0,1,2)[7]> 0.188 -283. 579. 579. 604.
2 arima_at1 <ARIMA(0,1,3)(0,1,2)[7]> 0.196 -293. 597. 598. 622.
3 Snaive <SNAIVE> 0.964 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 11.7 0.110
2 Barcelona arima_at2 2.55 0.923
3 Barcelona Snaive 1610. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 27.0 0.0191
2 Barcelona arima_at2 8.86 0.840
3 Barcelona Snaive 2198. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 30.7 0.0788
2 Barcelona arima_at2 13.4 0.892
3 Barcelona Snaive 2259. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 131. 0.00328
2 Barcelona arima_at2 88.9 0.513
3 Barcelona Snaive 3453. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 90)7 days
# Plots
fc_h7 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h7")
fc_h7 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 18.9 64.7 54.8 1.87 28.9 0.146 0.0976 0.273
2 arima_at2 Barcelona Test 23.0 68.1 55.2 3.74 28.5 0.147 0.103 0.160
3 Snaive Barcelona Test -21.0 79.0 67.8 -15.5 40.3 0.180 0.119 0.184
14 days
# Plots
fc_h14 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h14")
fc_h14 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 35.3 73.8 57.1 10.1 26.0 0.152 0.111 0.279
2 arima_at2 Barcelona Test 38.4 76.8 58.4 11.4 26.3 0.155 0.116 0.216
3 Snaive Barcelona Test -15.3 87.1 76.0 -12.2 40.4 0.202 0.131 0.0610
21 days
# Plots
fc_h21 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h21")
fc_h21 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 58.9 107. 75.5 15.8 28.6 0.201 0.161 0.191
2 arima_at2 Barcelona Test 56.9 103. 73.6 15.2 28.1 0.196 0.156 0.104
3 Snaive Barcelona Test -2.10 119. 90.4 -10.9 41.3 0.240 0.179 -0.239
90 days
# Plots
fc_h90 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h90")
fc_h90 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 5068. 9027. 5072. 71.8 74.8 13.5 13.6 0.868
2 arima_at2 Barcelona Test 4960. 8907. 4964. 67.8 70.8 13.2 13.4 0.868
3 Snaive Barcelona Test 4916. 8890. 4939. 58.7 71.4 13.1 13.4 0.870
Madrid
data_madrid <- covid_data %>%
filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-06-15 153 10 20.7 -8 34
2 Madrid 2020-06-16 91 16 21 -8 21
3 Madrid 2020-06-17 93 14 20.2 -5 34
4 Madrid 2020-06-18 85 17 21.9 -7 30
5 Madrid 2020-06-19 78 20 22.6 -9 22
6 Madrid 2020-06-20 67 20 23.4 -12 25
7 Madrid 2020-06-21 42 3 25 -13 14
8 Madrid 2020-06-22 60 16 27.3 -8 29
9 Madrid 2020-06-23 68 11 28.4 -9 23
10 Madrid 2020-06-24 49 15 28 -7 22
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_madrid_train <- data_madrid %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_madrid_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
data_madrid_test <- data_madrid %>%
filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_madrid_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
lambda_Madrid <- data_madrid %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_madrid_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Madrid <ARIMA(2,1,1)(1,1,1)[7]> <ARIMA(2,1,1)(1,1,1)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.0667 -33.4 78.8 79.0 104. <cpl [9]> <cpl [8]>
2 Madrid arima_at2 0.0667 -33.4 78.8 79.0 104. <cpl [9]> <cpl [8]>
3 Madrid Snaive 0.291 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,1,1)(1,1,1)[7]> 0.0667 -33.4 78.8 79.0 104.
2 arima_at2 <ARIMA(2,1,1)(1,1,1)[7]> 0.0667 -33.4 78.8 79.0 104.
3 Snaive <SNAIVE> 0.291 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 10.9 0.143
2 Madrid arima_at2 10.9 0.143
3 Madrid Snaive 1678. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 39.6 0.000292
2 Madrid arima_at2 39.6 0.000292
3 Madrid Snaive 2594. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 48.0 0.000679
2 Madrid arima_at2 48.0 0.000679
3 Madrid Snaive 2854. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 105. 0.130
2 Madrid arima_at2 105. 0.130
3 Madrid Snaive 3856. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 90)7 days
# Plots
fc_h7 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Madrid - forecast h7")
fc_h7 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 135. 163. 144. 39.5 47.6 0.339 0.243 -0.0355
2 arima_at2 Madrid Test 135. 163. 144. 39.5 47.6 0.339 0.243 -0.0355
3 Snaive Madrid Test 32.1 124. 105. -7.03 48.2 0.247 0.185 -0.109
14 days
# Plots
fc_h14 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Madrid - forecast h14")
fc_h14 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 150. 178. 155. 45.1 49.4 0.364 0.265 0.0892
2 arima_at2 Madrid Test 150. 178. 155. 45.1 49.4 0.364 0.265 0.0892
3 Snaive Madrid Test 26.3 126. 105. -8.58 46.6 0.246 0.188 -0.0472
21 days
# Plots
fc_h21 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Madrid - forecast h21")
fc_h21 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
2 arima_at2 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
3 Snaive Madrid Test 34.8 140. 117. -8.01 48.6 0.275 0.209 0.131
90 days
# Plots
fc_h90 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
) +
labs(y = "Nº cases", title = "Madrid - forecast h90")
fc_h90 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 5007. 8744. 5008. 81.7 82.4 11.8 13.0 0.840
2 arima_at2 Madrid Test 5007. 8744. 5008. 81.7 82.4 11.8 13.0 0.840
3 Snaive Madrid Test 4763. 8564. 4792. 51.8 68.9 11.3 12.7 0.840
Málaga
data_malaga <- covid_data %>%
filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-06-15 1 0 27.2 -16 -9
2 Málaga 2020-06-16 1 0 27.8 -13 -4
3 Málaga 2020-06-17 2 0 26.9 -13 1
4 Málaga 2020-06-18 2 1 21.6 -12 3
5 Málaga 2020-06-19 1 0 24.7 -12 2
6 Málaga 2020-06-20 4 0 22.6 -14 8
7 Málaga 2020-06-21 4 0 22.7 -16 -3
8 Málaga 2020-06-22 6 0 25 -13 1
9 Málaga 2020-06-23 8 0 25.6 -10 18
10 Málaga 2020-06-24 65 2 24.5 -13 12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_malaga_train <- data_malaga %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_malaga_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
data_malaga_test <- data_malaga %>%
filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_malaga_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
lambda_Malaga <- data_malaga %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_malaga_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Málaga <ARIMA(4,1,0)(1,1,1)[7]> <ARIMA(1,1,3)(0,1,1)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.518 -525. 1063. 1064. 1093. <cpl [11]> <cpl [7]>
2 Málaga arima_at2 0.504 -518. 1049. 1049. 1074. <cpl [1]> <cpl [10]>
3 Málaga Snaive 2.15 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,1,3)(0,1,1)[7]> 0.504 -518. 1049. 1049. 1074.
2 arima_at1 <ARIMA(4,1,0)(1,1,1)[7]> 0.518 -525. 1063. 1064. 1093.
3 Snaive <SNAIVE> 2.15 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 12.3 0.0912
2 Málaga arima_at2 8.13 0.321
3 Málaga Snaive 1361. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 30.5 0.00654
2 Málaga arima_at2 18.3 0.195
3 Málaga Snaive 1956. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 35.2 0.0270
2 Málaga arima_at2 24.2 0.282
3 Málaga Snaive 2100. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 130. 0.00387
2 Málaga arima_at2 85.1 0.627
3 Málaga Snaive 3195. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 90)7 days
# Plots
fc_h7 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Malaga - forecast h7")
fc_h7 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 15.6 22.6 21.8 18.9 46.0 0.221 0.134 -0.133
2 arima_at2 Málaga Test 15.4 22.9 22.3 18.1 47.8 0.226 0.135 -0.0302
3 Snaive Málaga Test -4.00 22.9 19.8 -27.0 52.2 0.201 0.135 0.0123
14 days
# Plots
fc_h14 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Malaga - forecast h14")
fc_h14 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 20.1 27.7 24.0 26.8 42.4 0.243 0.163 0.199
2 arima_at2 Málaga Test 21.4 29.0 25.2 28.8 44.9 0.256 0.172 0.261
3 Snaive Málaga Test -3.86 22.7 18.7 -22.4 43.6 0.190 0.134 -0.0973
21 days
# Plots
fc_h21 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Malaga - forecast h21")
fc_h21 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 26.7 33.3 29.2 35.0 45.4 0.297 0.197 0.270
2 arima_at2 Málaga Test 28.1 34.8 30.7 37.2 47.9 0.311 0.205 0.303
3 Snaive Málaga Test -0.951 22.0 18.6 -14.8 37.9 0.188 0.130 0.0886
90 days
# Plots
fc_h90 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
) +
labs(y = "Nº cases", title = "Malaga - forecast h90")
fc_h90 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 680. 1100. 680. 73.4 75.9 6.91 6.50 0.934
2 arima_at2 Málaga Test 676. 1099. 677. 72.2 74.7 6.87 6.49 0.935
3 Snaive Málaga Test 620. 1049. 628. 44.9 62.3 6.37 6.20 0.929
Sevilla
data_sevilla <- covid_data %>%
filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-06-15 2 1 23.1 -14 -13
2 Sevilla 2020-06-16 1 0 24.0 -10 -4
3 Sevilla 2020-06-17 0 0 24.9 -10 -7
4 Sevilla 2020-06-18 0 0 25.8 -12 -13
5 Sevilla 2020-06-19 0 0 26.6 -13 -20
6 Sevilla 2020-06-20 0 0 27.5 -20 -34
7 Sevilla 2020-06-21 0 0 28.4 -27 -47
8 Sevilla 2020-06-22 1 0 29.3 -17 -19
9 Sevilla 2020-06-23 0 0 30.2 -12 -13
10 Sevilla 2020-06-24 1 0 29.4 -12 -12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_sevilla_train <- data_sevilla %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_sevilla_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
data_sevilla_test <- data_sevilla %>%
filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_sevilla_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
lambda_Sevilla <- data_sevilla %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_sevilla_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Sevilla <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(4,1,0)(2,0,0)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.924 -670. 1353. 1354. 1383. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.922 -669. 1353. 1353. 1382. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.02 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(4,1,0)(2,0,0)[7]> 0.922 -669. 1353. 1353. 1382.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.924 -670. 1353. 1354. 1383.
3 Snaive <SNAIVE> 2.02 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 8.72 0.274
2 Sevilla arima_at2 7.65 0.365
3 Sevilla Snaive 757. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 24.5 0.0402
2 Sevilla arima_at2 23.6 0.0511
3 Sevilla Snaive 1110. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 37.4 0.0151
2 Sevilla arima_at2 36.6 0.0189
3 Sevilla Snaive 1235. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 103. 0.161
2 Sevilla arima_at2 102. 0.179
3 Sevilla Snaive 1692. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 90)7 days
# Plots
fc_h7 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h7")
fc_h7 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 7.30 11.4 8.80 19.1 24.7 0.0870 0.0721 -0.0998
2 arima_at2 Sevilla Test 7.38 11.4 8.93 19.3 25.1 0.0883 0.0725 -0.0852
3 Snaive Sevilla Test -5.85 16.0 13.7 -22.0 40.6 0.135 0.102 0.0310
14 days
# Plots
fc_h14 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h14")
fc_h14 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_a… Sevilla Test 4.45 9.82 7.83 6.10 27.4 0.0774 0.0623 -0.0579
2 arima_a… Sevilla Test 4.58 9.88 7.91 6.63 27.5 0.0782 0.0626 -0.0509
3 Snaive Sevilla Test -12.5 19.3 16.4 -57.6 66.9 0.162 0.122 0.275
21 days
# Plots
fc_h21 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h21")
fc_h21 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 5.70 11.3 8.54 8.79 26.3 0.0844 0.0716 0.183
2 arima_at2 Sevilla Test 5.84 11.4 8.61 9.32 26.3 0.0850 0.0720 0.187
3 Snaive Sevilla Test -13.9 22.1 19.1 -60.8 72.0 0.189 0.140 0.327
90 days
# Plots
fc_h90 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h90")
fc_h90 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 681. 1161. 682. 59.5 64.8 6.74 7.36 0.891
2 arima_at2 Sevilla Test 681. 1161. 682. 59.7 64.8 6.74 7.36 0.891
3 Snaive Sevilla Test 665. 1161. 677. 31.7 74.6 6.69 7.36 0.892
End of sixth epidemiological period
We are going to keep train the model with data as of June 14th which is considered the beginning of the second epidemiological period in Spain. There are two reasons.
But, instead of predict the last wave, we are going to predict the deleration of infections as to 31 jan 2022.
Asturias
data_asturias <- covid_data %>%
filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-06-15 0 0 16 -6 39
2 Asturias 2020-06-16 1 0 15.6 -6 7
3 Asturias 2020-06-17 0 0 15.6 -7 20
4 Asturias 2020-06-18 0 0 15.1 -4 68
5 Asturias 2020-06-19 0 0 14.8 -4 52
6 Asturias 2020-06-20 0 0 16.8 -10 71
7 Asturias 2020-06-21 0 0 18.2 -5.5 46
8 Asturias 2020-06-22 0 0 18.2 -1 99
9 Asturias 2020-06-23 0 0 20 -2 129
10 Asturias 2020-06-24 0 0 18.2 -9 63
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_asturias_train <- data_asturias %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
data_asturias_test <- data_asturias %>%
filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
lambda_asturias <- data_asturias %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_asturias_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Asturias <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(0,1,1)(1,0,1)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.917 -816. 1646. 1646. 1677. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.894 -811. 1629. 1629. 1647. <cpl [7]> <cpl [8]>
3 Asturias Snaive 2.53 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(0,1,1)(1,0,1)[7]> 0.894 -811. 1629. 1629. 1647.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.917 -816. 1646. 1646. 1677.
3 Snaive <SNAIVE> 2.53 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 13.8 0.0551
2 Asturias arima_at2 12.9 0.0743
3 Asturias Snaive 981. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 38.9 0.000378
2 Asturias arima_at2 35.2 0.00137
3 Asturias Snaive 1393. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 60.0 0.0000128
2 Asturias arima_at2 49.0 0.000509
3 Asturias Snaive 1470. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 101. 0.000286
2 Asturias arima_at2 78.6 0.0304
3 Asturias Snaive 1527. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 57)7 days
# Plots
fc_h7 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Asturias - forecast h7")
fc_h7 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -173. 272. 251. -25.1 30.1 2.60 1.24 0.192
2 arima_at2 Asturias Test -254. 334. 317. -33.7 37.7 3.27 1.52 0.162
3 Snaive Asturias Test -570. 586. 570. -62.8 62.8 5.89 2.66 -0.550
14 days
# Plots
fc_h14 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Asturias - forecast h14")
fc_h14 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -271. 332. 310. -44.0 46.5 3.20 1.51 0.372
2 arima_at2 Asturias Test -393. 447. 424. -60.8 62.8 4.38 2.03 0.444
3 Snaive Asturias Test -769. 812. 769. -107. 107. 7.94 3.69 0.299
21 days
# Plots
fc_h21 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Asturias - forecast h21")
fc_h21 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -355. 411. 381. -74.5 76.1 3.94 1.87 0.532
2 arima_at2 Asturias Test -493. 543. 514. -98.0 99.3 5.31 2.47 0.597
3 Snaive Asturias Test -920. 980. 920. -168. 168. 9.50 4.46 0.437
57 days
# Plots
fc_h90 %>%
autoplot(
data_asturias_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
) +
labs(y = "Nº cases", title = "Asturias - forecast h90")
fc_h90 %>%
autoplot(
data_asturias %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Asturias - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_asturias)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -579. 631. 589. -Inf Inf 6.09 2.87 0.719
2 arima_at2 Asturias Test -694. 735. 702. -Inf Inf 7.25 3.34 0.714
3 Snaive Asturias Test -1282. 1359. 1282. -Inf Inf 13.3 6.18 0.471
Barcelona
data_barcelona <- covid_data %>%
filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-06-15 62 3 21.2 -9 17
2 Barcelona 2020-06-16 66 3 20.8 -10 -16
3 Barcelona 2020-06-17 70 4 20.3 -4 14
4 Barcelona 2020-06-18 68 4 18.8 -8 -6
5 Barcelona 2020-06-19 65 4 20.4 -5 10
6 Barcelona 2020-06-20 53 4 21.8 -12 15
7 Barcelona 2020-06-21 38 1 22.8 -18 18
8 Barcelona 2020-06-22 69 5 23.8 5 24
9 Barcelona 2020-06-23 95 3 23.4 19 28
10 Barcelona 2020-06-24 44 4 23.6 -71 28
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_barcelona_train <- data_barcelona %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_barcelona_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
data_barcelona_test <- data_barcelona %>%
filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_barcelona_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
lambda_Barcelona <- data_barcelona %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_barcelona_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Barcelona <ARIMA(1,0,2)(0,1,1)[7]> <ARIMA(1,0,4)(0,1,1)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.344 -523. 1055. 1055. 1077. <cpl [1]> <cpl [9]>
2 Barcelona arima_at2 0.341 -520. 1053. 1053. 1084. <cpl [1]> <cpl [11]>
3 Barcelona Snaive 1.26 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,0,4)(0,1,1)[7]> 0.341 -520. 1053. 1053. 1084.
2 arima_at1 <ARIMA(1,0,2)(0,1,1)[7]> 0.344 -523. 1055. 1055. 1077.
3 Snaive <SNAIVE> 1.26 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 33.4 0.0000220
2 Barcelona arima_at2 26.1 0.000480
3 Barcelona Snaive 1654. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 51.3 0.00000363
2 Barcelona arima_at2 43.2 0.0000793
3 Barcelona Snaive 2253. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 57.6 0.0000289
2 Barcelona arima_at2 49.3 0.000455
3 Barcelona Snaive 2362. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 162. 4.59e-12
2 Barcelona arima_at2 153. 9.83e-11
3 Barcelona Snaive 2651. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 57)7 days
# Plots
fc_h7 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h7")
fc_h7 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -4569. 4824. 4569. -62.2 62.2 5.74 2.46 -0.0331
2 arima_at2 Barcelona Test -4145. 4383. 4145. -56.2 56.2 5.21 2.24 -0.0435
3 Snaive Barcelona Test -7870. 8139. 7870. -99.0 99.0 9.88 4.16 0.551
14 days
# Plots
fc_h14 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h14")
fc_h14 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -6582. 7100. 6582. -132. 132. 8.27 3.63 0.471
2 arima_at2 Barcelona Test -6038. 6534. 6038. -122. 122. 7.58 3.34 0.471
3 Snaive Barcelona Test -9985. 10606. 9985. -188. 188. 12.5 5.42 0.548
21 days
# Plots
fc_h21 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h21")
fc_h21 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -7823. 8400. 7823. -199. 199. 9.83 4.29 0.573
2 arima_at2 Barcelona Test -7199. 7750. 7199. -184. 184. 9.04 3.96 0.575
3 Snaive Barcelona Test -11275. 12018. 11275. -272. 272. 14.2 6.14 0.602
57 days
# Plots
fc_h90 %>%
autoplot(
data_barcelona_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h90")
fc_h90 %>%
autoplot(
data_barcelona %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Barcelona - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_barcelona)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -11642. 12454. 11642. -Inf Inf 14.6 6.36 0.695
2 arima_at2 Barcelona Test -10693. 11449. 10693. -Inf Inf 13.4 5.85 0.691
3 Snaive Barcelona Test -14809. 15796. 14809. -Inf Inf 18.6 8.06 0.592
Madrid
data_madrid <- covid_data %>%
filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-06-15 153 10 20.7 -8 34
2 Madrid 2020-06-16 91 16 21 -8 21
3 Madrid 2020-06-17 93 14 20.2 -5 34
4 Madrid 2020-06-18 85 17 21.9 -7 30
5 Madrid 2020-06-19 78 20 22.6 -9 22
6 Madrid 2020-06-20 67 20 23.4 -12 25
7 Madrid 2020-06-21 42 3 25 -13 14
8 Madrid 2020-06-22 60 16 27.3 -8 29
9 Madrid 2020-06-23 68 11 28.4 -9 23
10 Madrid 2020-06-24 49 15 28 -7 22
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_madrid_train <- data_madrid %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_madrid_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
data_madrid_test <- data_madrid %>%
filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_madrid_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
lambda_Madrid <- data_madrid %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_madrid_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Madrid <ARIMA(2,0,2)(0,1,2)[7]> <ARIMA(2,0,2)(0,1,2)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.132 -240. 494. 495. 525. <cpl [2]> <cpl [16]>
2 Madrid arima_at2 0.132 -240. 494. 495. 525. <cpl [2]> <cpl [16]>
3 Madrid Snaive 0.412 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,0,2)(0,1,2)[7]> 0.132 -240. 494. 495. 525.
2 arima_at2 <ARIMA(2,0,2)(0,1,2)[7]> 0.132 -240. 494. 495. 525.
3 Snaive <SNAIVE> 0.412 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 24.8 0.000838
2 Madrid arima_at2 24.8 0.000838
3 Madrid Snaive 1617. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 38.5 0.000443
2 Madrid arima_at2 38.5 0.000443
3 Madrid Snaive 2282. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 47.9 0.000718
2 Madrid arima_at2 47.9 0.000718
3 Madrid Snaive 2422. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 123. 0.000000995
2 Madrid arima_at2 123. 0.000000995
3 Madrid Snaive 2762. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 57)7 days
# Plots
fc_h7 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Madrid - forecast h7")
fc_h7 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 471. 788. 629. 8.70 11.7 0.843 0.441 0.0572
2 arima_at2 Madrid Test 471. 788. 629. 8.70 11.7 0.843 0.441 0.0572
3 Snaive Madrid Test -1836. 2070. 1836. -36.2 36.2 2.46 1.16 0.541
14 days
# Plots
fc_h14 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Madrid - forecast h14")
fc_h14 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 174. 612. 467. 2.98 10.9 0.625 0.343 0.215
2 arima_at2 Madrid Test 174. 612. 467. 2.98 10.9 0.625 0.343 0.215
3 Snaive Madrid Test -2914. 3386. 2914. -77.1 77.1 3.90 1.90 0.466
21 days
# Plots
fc_h21 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Madrid - forecast h21")
fc_h21 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -228. 851. 664. -16.7 26.5 0.888 0.477 0.512
2 arima_at2 Madrid Test -228. 851. 664. -16.7 26.5 0.888 0.477 0.512
3 Snaive Madrid Test -3903. 4584. 3903. -158. 158. 5.23 2.57 0.574
57 days
# Plots
fc_h90 %>%
autoplot(
data_madrid_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
) +
labs(y = "Nº cases", title = "Madrid - forecast h90")
fc_h90 %>%
autoplot(
data_madrid %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Madrid - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_madrid)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -1540. 2217. 1701. -2124. 2127. 2.28 1.24 0.699
2 arima_at2 Madrid Test -1540. 2217. 1701. -2124. 2127. 2.28 1.24 0.699
3 Snaive Madrid Test -6507. 7446. 6507. -4965. 4965. 8.71 4.17 0.627
Málaga
data_malaga <- covid_data %>%
filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-06-15 1 0 27.2 -16 -9
2 Málaga 2020-06-16 1 0 27.8 -13 -4
3 Málaga 2020-06-17 2 0 26.9 -13 1
4 Málaga 2020-06-18 2 1 21.6 -12 3
5 Málaga 2020-06-19 1 0 24.7 -12 2
6 Málaga 2020-06-20 4 0 22.6 -14 8
7 Málaga 2020-06-21 4 0 22.7 -16 -3
8 Málaga 2020-06-22 6 0 25 -13 1
9 Málaga 2020-06-23 8 0 25.6 -10 18
10 Málaga 2020-06-24 65 2 24.5 -13 12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_malaga_train <- data_malaga %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_malaga_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
data_malaga_test <- data_malaga %>%
filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_malaga_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
lambda_Malaga <- data_malaga %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_malaga_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Málaga <ARIMA(2,0,3)(0,1,1)[7]> <ARIMA(2,0,3)(0,1,1)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.584 -679. 1372. 1372. 1402. <cpl [2]> <cpl [10]>
2 Málaga arima_at2 0.584 -679. 1372. 1372. 1402. <cpl [2]> <cpl [10]>
3 Málaga Snaive 2.41 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,0,3)(0,1,1)[7]> 0.584 -679. 1372. 1372. 1402.
2 arima_at2 <ARIMA(2,0,3)(0,1,1)[7]> 0.584 -679. 1372. 1372. 1402.
3 Snaive <SNAIVE> 2.41 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 3.28 0.858
2 Málaga arima_at2 3.28 0.858
3 Málaga Snaive 1621. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 15.4 0.350
2 Málaga arima_at2 15.4 0.350
3 Málaga Snaive 2376. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 29.2 0.109
2 Málaga arima_at2 29.2 0.109
3 Málaga Snaive 2555. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 58.4 0.423
2 Málaga arima_at2 58.4 0.423
3 Málaga Snaive 3422. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 57)7 days
# Plots
fc_h7 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Malaga - forecast h7")
fc_h7 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 74.3 190. 152. 3.78 24.3 1.15 0.769 0.249
2 arima_at2 Málaga Test 74.3 190. 152. 3.78 24.3 1.15 0.769 0.249
3 Snaive Málaga Test -170. 240. 191. -33.7 35.9 1.45 0.968 0.246
14 days
# Plots
fc_h14 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Malaga - forecast h14")
fc_h14 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 185. 290. 229. 17.9 29.6 1.73 1.17 0.340
2 arima_at2 Málaga Test 185. 290. 229. 17.9 29.6 1.73 1.17 0.340
3 Snaive Málaga Test -141. 223. 170. -28.9 31.8 1.28 0.899 0.0239
21 days
# Plots
fc_h21 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Malaga - forecast h21")
fc_h21 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 173. 267. 203. 19.1 26.9 1.53 1.08 0.422
2 arima_at2 Málaga Test 173. 267. 203. 19.1 26.9 1.53 1.08 0.422
3 Snaive Málaga Test -208. 279. 227. -41.4 43.4 1.72 1.13 0.172
57 days
# Plots
fc_h90 %>%
autoplot(
data_malaga_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
) +
labs(y = "Nº cases", title = "Malaga - forecast h90")
fc_h90 %>%
autoplot(
data_malaga %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Malaga - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_malaga)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test -6.39 278. 190. -Inf Inf 1.44 1.12 0.656
2 arima_at2 Málaga Test -6.39 278. 190. -Inf Inf 1.44 1.12 0.656
3 Snaive Málaga Test -436. 525. 443. -Inf Inf 3.35 2.12 0.513
Sevilla
data_sevilla <- covid_data %>%
filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>%
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-06-15 2 1 23.1 -14 -13
2 Sevilla 2020-06-16 1 0 24.0 -10 -4
3 Sevilla 2020-06-17 0 0 24.9 -10 -7
4 Sevilla 2020-06-18 0 0 25.8 -12 -13
5 Sevilla 2020-06-19 0 0 26.6 -13 -20
6 Sevilla 2020-06-20 0 0 27.5 -20 -34
7 Sevilla 2020-06-21 0 0 28.4 -27 -47
8 Sevilla 2020-06-22 1 0 29.3 -17 -19
9 Sevilla 2020-06-23 0 0 30.2 -12 -13
10 Sevilla 2020-06-24 1 0 29.4 -12 -12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
data_sevilla_train <- data_sevilla %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_sevilla_train$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
data_sevilla_test <- data_sevilla %>%
filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_sevilla_test$fecha) Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
lambda_Sevilla <- data_sevilla %>%
features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
fit_model <- data_sevilla_train %>%
model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla))
)
# Show and report model
fit_model# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Sevilla <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(4,1,0)(2,0,0)[7]> <SNAIVE>
glance(fit_model)# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.962 -832. 1679. 1679. 1709. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.962 -832. 1678. 1679. 1709. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.23 NA NA NA NA <NULL> <NULL>
fit_model %>%
pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(4,1,0)(2,0,0)[7]> 0.962 -832. 1678. 1679. 1709.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.962 -832. 1679. 1679. 1709.
3 Snaive <SNAIVE> 2.23 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>%
select(Snaive) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at1) %>%
gg_tsresiduals()
fit_model %>%
select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 7.13 0.416
2 Sevilla arima_at2 6.24 0.512
3 Sevilla Snaive 965. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 27.9 0.0146
2 Sevilla arima_at2 27.1 0.0185
3 Sevilla Snaive 1403. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 38.6 0.0108
2 Sevilla arima_at2 37.9 0.0132
3 Sevilla Snaive 1510. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 78.0 0.0335
2 Sevilla arima_at2 77.4 0.0372
3 Sevilla Snaive 1813. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
fc_h7 <- fabletools::forecast(fit_model, h = 7)
fc_h14 <- fabletools::forecast(fit_model, h = 14)
fc_h21 <- fabletools::forecast(fit_model, h = 21)
fc_h90 <- fabletools::forecast(fit_model, h = 57)7 days
# Plots
fc_h7 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h7")
fc_h7 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
fabletools::accuracy(fc_h7, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 27.5 78.6 56.6 3.30 10.0 0.396 0.288 0.143
2 arima_at2 Sevilla Test 29.1 78.8 55.9 3.60 9.91 0.391 0.289 0.142
3 Snaive Sevilla Test -305. 368. 311. -48.0 49.4 2.17 1.35 0.523
14 days
# Plots
fc_h14 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h14")
fc_h14 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
fabletools::accuracy(fc_h14, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 131. 223. 156. 13.5 21.5 1.09 0.817 0.433
2 arima_at2 Sevilla Test 133. 224. 157. 13.9 21.4 1.09 0.822 0.432
3 Snaive Sevilla Test -297. 338. 299. -50.4 51.1 2.09 1.24 0.411
21 days
# Plots
fc_h21 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h21")
fc_h21 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
fabletools::accuracy(fc_h21, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 118. 208. 154. 9.96 26.1 1.08 0.762 0.499
2 arima_at2 Sevilla Test 121. 209. 155. 10.4 26.0 1.08 0.767 0.498
3 Snaive Sevilla Test -380. 430. 381. -76.5 77.0 2.67 1.58 0.507
57 days
# Plots
fc_h90 %>%
autoplot(
data_sevilla_test %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h90")
fc_h90 %>%
autoplot(
data_sevilla %>%
filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
fecha > as.Date("2021-12-01", format = "%Y-%m-%d"))
) +
labs(y = "Nº cases", title = "Sevilla - forecast h90")
# Accuracy
fabletools::accuracy(fc_h90, data_sevilla)# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test -49.7 208. 157. -Inf Inf 1.10 0.761 0.718
2 arima_at2 Sevilla Test -47.4 207. 156. -Inf Inf 1.09 0.759 0.717
3 Snaive Sevilla Test -737. 850. 738. -Inf Inf 5.16 3.11 0.642