::p_load(
pacman# file locator
here, # data management and ggplot2 graphics
tidyverse, # get overview of data
skimr, # produce and adorn tabulations and cross-tabulations
janitor,
tsibble,
fable,
feasts,
fabletools,
lubridate
)
<- readRDS(here("data", "clean", "final_covid_data.rds")) covid_data
Univariate prediction
Load packages:
For each period to study, we will train three different models:
- Attempt 1: using ARIMA function from fable package with stepwise (much faster)
- Attempt 2: using ARIMA function from fable package without stepwise
- Attempt 3: using Snaive function from fable package
Before sixth epidemiological period
On one hand, we are going to train the model with data previous the start of the sixth wave of coronavirus (14 oct 2021).
On the other hand, we are going to train the model with data as of June 14th which is considered the beginning of the second epidemiological period in Spain.
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-06-15 0 0 16 -6 39
2 Asturias 2020-06-16 1 0 15.6 -6 7
3 Asturias 2020-06-17 0 0 15.6 -7 20
4 Asturias 2020-06-18 0 0 15.1 -4 68
5 Asturias 2020-06-19 0 0 14.8 -4 52
6 Asturias 2020-06-20 0 0 16.8 -10 71
7 Asturias 2020-06-21 0 0 18.2 -5.5 46
8 Asturias 2020-06-22 0 0 18.2 -1 99
9 Asturias 2020-06-23 0 0 20 -2 129
10 Asturias 2020-06-24 0 0 18.2 -9 63
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_asturias %>%
data_asturias_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_asturias %>%
data_asturias_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
<- data_asturias %>%
lambda_asturias features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_asturias_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Asturias <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(2,1,4)> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.944 -673. 1360. 1360. 1390. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.910 -664. 1343. 1343. 1372. <cpl [2]> <cpl [4]>
3 Asturias Snaive 2.32 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(2,1,4)> 0.910 -664. 1343. 1343. 1372.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.944 -673. 1360. 1360. 1390.
3 Snaive <SNAIVE> 2.32 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 10.6 0.158
2 Asturias arima_at2 8.11 0.323
3 Asturias Snaive 629. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 33.1 0.00282
2 Asturias arima_at2 23.6 0.0509
3 Asturias Snaive 892. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 49.3 0.000460
2 Asturias arima_at2 33.3 0.0433
3 Asturias Snaive 927. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 118. 0.0242
2 Asturias arima_at2 86.5 0.584
3 Asturias Snaive 1195. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Asturias - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
::accuracy(fc_h7, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 5.64 10.3 7.00 15.7 28.4 0.141 0.126 0.214
2 arima_at2 Asturias Test 6.06 9.97 7.28 18.4 30.5 0.147 0.121 0.385
3 Snaive Asturias Test 2.41 14.0 9.98 -10.1 49.0 0.201 0.171 0.00766
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Asturias - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
::accuracy(fc_h14, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 10.5 14.3 11.1 29.6 35.9 0.224 0.174 0.172
2 arima_at2 Asturias Test 10.9 14.2 11.5 32.1 38.1 0.232 0.173 0.303
3 Snaive Asturias Test 6.69 16.0 11.4 8.17 41.5 0.229 0.195 -0.156
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Asturias - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
::accuracy(fc_h21, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 9.79 13.5 10.5 27.5 32.9 0.211 0.164 0.0331
2 arima_at2 Asturias Test 10.6 13.7 11.0 31.0 35.1 0.222 0.167 0.126
3 Snaive Asturias Test 5.77 15.6 11.5 6.52 40.8 0.232 0.190 -0.223
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Asturias - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h90")
# Accuracy
::accuracy(fc_h90, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 704. 1211. 704. 67.5 70.0 14.2 14.8 0.925
2 arima_at2 Asturias Test 713. 1221. 714. 71.1 72.9 14.4 14.9 0.926
3 Snaive Asturias Test 702. 1213. 704. 60.0 71.7 14.2 14.8 0.925
Barcelona
<- covid_data %>%
data_barcelona filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-06-15 62 3 21.2 -9 17
2 Barcelona 2020-06-16 66 3 20.8 -10 -16
3 Barcelona 2020-06-17 70 4 20.3 -4 14
4 Barcelona 2020-06-18 68 4 18.8 -8 -6
5 Barcelona 2020-06-19 65 4 20.4 -5 10
6 Barcelona 2020-06-20 53 4 21.8 -12 15
7 Barcelona 2020-06-21 38 1 22.8 -18 18
8 Barcelona 2020-06-22 69 5 23.8 5 24
9 Barcelona 2020-06-23 95 3 23.4 19 28
10 Barcelona 2020-06-24 44 4 23.6 -71 28
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_barcelona %>%
data_barcelona_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_barcelona_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_barcelona %>%
data_barcelona_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_barcelona_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
<- data_barcelona %>%
lambda_Barcelona features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_barcelona_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Barcelona <ARIMA(0,1,3)(0,1,2)[7]> <ARIMA(1,1,2)(0,1,2)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.196 -293. 597. 598. 622. <cpl [0]> <cpl [17]>
2 Barcelona arima_at2 0.188 -283. 579. 579. 604. <cpl [1]> <cpl [16]>
3 Barcelona Snaive 0.964 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,1,2)(0,1,2)[7]> 0.188 -283. 579. 579. 604.
2 arima_at1 <ARIMA(0,1,3)(0,1,2)[7]> 0.196 -293. 597. 598. 622.
3 Snaive <SNAIVE> 0.964 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 11.7 0.110
2 Barcelona arima_at2 2.55 0.923
3 Barcelona Snaive 1610. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 27.0 0.0191
2 Barcelona arima_at2 8.86 0.840
3 Barcelona Snaive 2198. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 30.7 0.0788
2 Barcelona arima_at2 13.4 0.892
3 Barcelona Snaive 2259. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 131. 0.00328
2 Barcelona arima_at2 88.9 0.513
3 Barcelona Snaive 3453. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
::accuracy(fc_h7, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 18.9 64.7 54.8 1.87 28.9 0.146 0.0976 0.273
2 arima_at2 Barcelona Test 23.0 68.1 55.2 3.74 28.5 0.147 0.103 0.160
3 Snaive Barcelona Test -21.0 79.0 67.8 -15.5 40.3 0.180 0.119 0.184
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
::accuracy(fc_h14, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 35.3 73.8 57.1 10.1 26.0 0.152 0.111 0.279
2 arima_at2 Barcelona Test 38.4 76.8 58.4 11.4 26.3 0.155 0.116 0.216
3 Snaive Barcelona Test -15.3 87.1 76.0 -12.2 40.4 0.202 0.131 0.0610
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
::accuracy(fc_h21, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 58.9 107. 75.5 15.8 28.6 0.201 0.161 0.191
2 arima_at2 Barcelona Test 56.9 103. 73.6 15.2 28.1 0.196 0.156 0.104
3 Snaive Barcelona Test -2.10 119. 90.4 -10.9 41.3 0.240 0.179 -0.239
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
# Accuracy
::accuracy(fc_h90, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 5068. 9027. 5072. 71.8 74.8 13.5 13.6 0.868
2 arima_at2 Barcelona Test 4960. 8907. 4964. 67.8 70.8 13.2 13.4 0.868
3 Snaive Barcelona Test 4916. 8890. 4939. 58.7 71.4 13.1 13.4 0.870
Madrid
<- covid_data %>%
data_madrid filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-06-15 153 10 20.7 -8 34
2 Madrid 2020-06-16 91 16 21 -8 21
3 Madrid 2020-06-17 93 14 20.2 -5 34
4 Madrid 2020-06-18 85 17 21.9 -7 30
5 Madrid 2020-06-19 78 20 22.6 -9 22
6 Madrid 2020-06-20 67 20 23.4 -12 25
7 Madrid 2020-06-21 42 3 25 -13 14
8 Madrid 2020-06-22 60 16 27.3 -8 29
9 Madrid 2020-06-23 68 11 28.4 -9 23
10 Madrid 2020-06-24 49 15 28 -7 22
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_madrid %>%
data_madrid_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_madrid_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_madrid %>%
data_madrid_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_madrid_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
<- data_madrid %>%
lambda_Madrid features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_madrid_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Madrid <ARIMA(2,1,1)(1,1,1)[7]> <ARIMA(2,1,1)(1,1,1)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.0667 -33.4 78.8 79.0 104. <cpl [9]> <cpl [8]>
2 Madrid arima_at2 0.0667 -33.4 78.8 79.0 104. <cpl [9]> <cpl [8]>
3 Madrid Snaive 0.291 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,1,1)(1,1,1)[7]> 0.0667 -33.4 78.8 79.0 104.
2 arima_at2 <ARIMA(2,1,1)(1,1,1)[7]> 0.0667 -33.4 78.8 79.0 104.
3 Snaive <SNAIVE> 0.291 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 10.9 0.143
2 Madrid arima_at2 10.9 0.143
3 Madrid Snaive 1678. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 39.6 0.000292
2 Madrid arima_at2 39.6 0.000292
3 Madrid Snaive 2594. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 48.0 0.000679
2 Madrid arima_at2 48.0 0.000679
3 Madrid Snaive 2854. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 105. 0.130
2 Madrid arima_at2 105. 0.130
3 Madrid Snaive 3856. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Madrid - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
::accuracy(fc_h7, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 135. 163. 144. 39.5 47.6 0.339 0.243 -0.0355
2 arima_at2 Madrid Test 135. 163. 144. 39.5 47.6 0.339 0.243 -0.0355
3 Snaive Madrid Test 32.1 124. 105. -7.03 48.2 0.247 0.185 -0.109
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Madrid - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
::accuracy(fc_h14, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 150. 178. 155. 45.1 49.4 0.364 0.265 0.0892
2 arima_at2 Madrid Test 150. 178. 155. 45.1 49.4 0.364 0.265 0.0892
3 Snaive Madrid Test 26.3 126. 105. -8.58 46.6 0.246 0.188 -0.0472
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Madrid - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
::accuracy(fc_h21, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
2 arima_at2 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
3 Snaive Madrid Test 34.8 140. 117. -8.01 48.6 0.275 0.209 0.131
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Madrid - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h90")
# Accuracy
::accuracy(fc_h90, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 5007. 8744. 5008. 81.7 82.4 11.8 13.0 0.840
2 arima_at2 Madrid Test 5007. 8744. 5008. 81.7 82.4 11.8 13.0 0.840
3 Snaive Madrid Test 4763. 8564. 4792. 51.8 68.9 11.3 12.7 0.840
Málaga
<- covid_data %>%
data_malaga filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-06-15 1 0 27.2 -16 -9
2 Málaga 2020-06-16 1 0 27.8 -13 -4
3 Málaga 2020-06-17 2 0 26.9 -13 1
4 Málaga 2020-06-18 2 1 21.6 -12 3
5 Málaga 2020-06-19 1 0 24.7 -12 2
6 Málaga 2020-06-20 4 0 22.6 -14 8
7 Málaga 2020-06-21 4 0 22.7 -16 -3
8 Málaga 2020-06-22 6 0 25 -13 1
9 Málaga 2020-06-23 8 0 25.6 -10 18
10 Málaga 2020-06-24 65 2 24.5 -13 12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_malaga %>%
data_malaga_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_malaga_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_malaga %>%
data_malaga_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_malaga_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
<- data_malaga %>%
lambda_Malaga features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_malaga_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Málaga <ARIMA(4,1,0)(1,1,1)[7]> <ARIMA(1,1,3)(0,1,1)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.518 -525. 1063. 1064. 1093. <cpl [11]> <cpl [7]>
2 Málaga arima_at2 0.504 -518. 1049. 1049. 1074. <cpl [1]> <cpl [10]>
3 Málaga Snaive 2.15 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,1,3)(0,1,1)[7]> 0.504 -518. 1049. 1049. 1074.
2 arima_at1 <ARIMA(4,1,0)(1,1,1)[7]> 0.518 -525. 1063. 1064. 1093.
3 Snaive <SNAIVE> 2.15 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 12.3 0.0912
2 Málaga arima_at2 8.13 0.321
3 Málaga Snaive 1361. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 30.5 0.00654
2 Málaga arima_at2 18.3 0.195
3 Málaga Snaive 1956. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 35.2 0.0270
2 Málaga arima_at2 24.2 0.282
3 Málaga Snaive 2100. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 130. 0.00387
2 Málaga arima_at2 85.1 0.627
3 Málaga Snaive 3195. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Malaga - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
::accuracy(fc_h7, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 15.6 22.6 21.8 18.9 46.0 0.221 0.134 -0.133
2 arima_at2 Málaga Test 15.4 22.9 22.3 18.1 47.8 0.226 0.135 -0.0302
3 Snaive Málaga Test -4.00 22.9 19.8 -27.0 52.2 0.201 0.135 0.0123
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Malaga - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
::accuracy(fc_h14, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 20.1 27.7 24.0 26.8 42.4 0.243 0.163 0.199
2 arima_at2 Málaga Test 21.4 29.0 25.2 28.8 44.9 0.256 0.172 0.261
3 Snaive Málaga Test -3.86 22.7 18.7 -22.4 43.6 0.190 0.134 -0.0973
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Malaga - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
::accuracy(fc_h21, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 26.7 33.3 29.2 35.0 45.4 0.297 0.197 0.270
2 arima_at2 Málaga Test 28.1 34.8 30.7 37.2 47.9 0.311 0.205 0.303
3 Snaive Málaga Test -0.951 22.0 18.6 -14.8 37.9 0.188 0.130 0.0886
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Malaga - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h90")
# Accuracy
::accuracy(fc_h90, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 680. 1100. 680. 73.4 75.9 6.91 6.50 0.934
2 arima_at2 Málaga Test 676. 1099. 677. 72.2 74.7 6.87 6.49 0.935
3 Snaive Málaga Test 620. 1049. 628. 44.9 62.3 6.37 6.20 0.929
Sevilla
<- covid_data %>%
data_sevilla filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-06-15 2 1 23.1 -14 -13
2 Sevilla 2020-06-16 1 0 24.0 -10 -4
3 Sevilla 2020-06-17 0 0 24.9 -10 -7
4 Sevilla 2020-06-18 0 0 25.8 -12 -13
5 Sevilla 2020-06-19 0 0 26.6 -13 -20
6 Sevilla 2020-06-20 0 0 27.5 -20 -34
7 Sevilla 2020-06-21 0 0 28.4 -27 -47
8 Sevilla 2020-06-22 1 0 29.3 -17 -19
9 Sevilla 2020-06-23 0 0 30.2 -12 -13
10 Sevilla 2020-06-24 1 0 29.4 -12 -12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_sevilla %>%
data_sevilla_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_sevilla_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_sevilla %>%
data_sevilla_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_sevilla_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
<- data_sevilla %>%
lambda_Sevilla features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_sevilla_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Sevilla <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(4,1,0)(2,0,0)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.924 -670. 1353. 1354. 1383. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.922 -669. 1353. 1353. 1382. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.02 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(4,1,0)(2,0,0)[7]> 0.922 -669. 1353. 1353. 1382.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.924 -670. 1353. 1354. 1383.
3 Snaive <SNAIVE> 2.02 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 8.72 0.274
2 Sevilla arima_at2 7.65 0.365
3 Sevilla Snaive 757. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 24.5 0.0402
2 Sevilla arima_at2 23.6 0.0511
3 Sevilla Snaive 1110. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 37.4 0.0151
2 Sevilla arima_at2 36.6 0.0189
3 Sevilla Snaive 1235. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 103. 0.161
2 Sevilla arima_at2 102. 0.179
3 Sevilla Snaive 1692. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
::accuracy(fc_h7, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 7.30 11.4 8.80 19.1 24.7 0.0870 0.0721 -0.0998
2 arima_at2 Sevilla Test 7.38 11.4 8.93 19.3 25.1 0.0883 0.0725 -0.0852
3 Snaive Sevilla Test -5.85 16.0 13.7 -22.0 40.6 0.135 0.102 0.0310
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
::accuracy(fc_h14, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_a… Sevilla Test 4.45 9.82 7.83 6.10 27.4 0.0774 0.0623 -0.0579
2 arima_a… Sevilla Test 4.58 9.88 7.91 6.63 27.5 0.0782 0.0626 -0.0509
3 Snaive Sevilla Test -12.5 19.3 16.4 -57.6 66.9 0.162 0.122 0.275
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
::accuracy(fc_h21, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 5.70 11.3 8.54 8.79 26.3 0.0844 0.0716 0.183
2 arima_at2 Sevilla Test 5.84 11.4 8.61 9.32 26.3 0.0850 0.0720 0.187
3 Snaive Sevilla Test -13.9 22.1 19.1 -60.8 72.0 0.189 0.140 0.327
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
# Accuracy
::accuracy(fc_h90, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 681. 1161. 682. 59.5 64.8 6.74 7.36 0.891
2 arima_at2 Sevilla Test 681. 1161. 682. 59.7 64.8 6.74 7.36 0.891
3 Snaive Sevilla Test 665. 1161. 677. 31.7 74.6 6.69 7.36 0.892
End of sixth epidemiological period
We are going to keep train the model with data as of June 14th which is considered the beginning of the second epidemiological period in Spain. There are two reasons.
But, instead of predict the last wave, we are going to predict the deleration of infections as to 31 jan 2022.
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-06-15 0 0 16 -6 39
2 Asturias 2020-06-16 1 0 15.6 -6 7
3 Asturias 2020-06-17 0 0 15.6 -7 20
4 Asturias 2020-06-18 0 0 15.1 -4 68
5 Asturias 2020-06-19 0 0 14.8 -4 52
6 Asturias 2020-06-20 0 0 16.8 -10 71
7 Asturias 2020-06-21 0 0 18.2 -5.5 46
8 Asturias 2020-06-22 0 0 18.2 -1 99
9 Asturias 2020-06-23 0 0 20 -2 129
10 Asturias 2020-06-24 0 0 18.2 -9 63
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_asturias %>%
data_asturias_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_asturias %>%
data_asturias_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
<- data_asturias %>%
lambda_asturias features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_asturias_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Asturias <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(0,1,1)(1,0,1)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.917 -816. 1646. 1646. 1677. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.894 -811. 1629. 1629. 1647. <cpl [7]> <cpl [8]>
3 Asturias Snaive 2.53 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(0,1,1)(1,0,1)[7]> 0.894 -811. 1629. 1629. 1647.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.917 -816. 1646. 1646. 1677.
3 Snaive <SNAIVE> 2.53 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 13.8 0.0551
2 Asturias arima_at2 12.9 0.0743
3 Asturias Snaive 981. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 38.9 0.000378
2 Asturias arima_at2 35.2 0.00137
3 Asturias Snaive 1393. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 60.0 0.0000128
2 Asturias arima_at2 49.0 0.000509
3 Asturias Snaive 1470. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 101. 0.000286
2 Asturias arima_at2 78.6 0.0304
3 Asturias Snaive 1527. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 57) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Asturias - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
::accuracy(fc_h7, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -173. 272. 251. -25.1 30.1 2.60 1.24 0.192
2 arima_at2 Asturias Test -254. 334. 317. -33.7 37.7 3.27 1.52 0.162
3 Snaive Asturias Test -570. 586. 570. -62.8 62.8 5.89 2.66 -0.550
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Asturias - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
::accuracy(fc_h14, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -271. 332. 310. -44.0 46.5 3.20 1.51 0.372
2 arima_at2 Asturias Test -393. 447. 424. -60.8 62.8 4.38 2.03 0.444
3 Snaive Asturias Test -769. 812. 769. -107. 107. 7.94 3.69 0.299
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Asturias - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
::accuracy(fc_h21, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -355. 411. 381. -74.5 76.1 3.94 1.87 0.532
2 arima_at2 Asturias Test -493. 543. 514. -98.0 99.3 5.31 2.47 0.597
3 Snaive Asturias Test -920. 980. 920. -168. 168. 9.50 4.46 0.437
57 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
+
) labs(y = "Nº cases", title = "Asturias - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h90")
# Accuracy
::accuracy(fc_h90, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -579. 631. 589. -Inf Inf 6.09 2.87 0.719
2 arima_at2 Asturias Test -694. 735. 702. -Inf Inf 7.25 3.34 0.714
3 Snaive Asturias Test -1282. 1359. 1282. -Inf Inf 13.3 6.18 0.471
Barcelona
<- covid_data %>%
data_barcelona filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-06-15 62 3 21.2 -9 17
2 Barcelona 2020-06-16 66 3 20.8 -10 -16
3 Barcelona 2020-06-17 70 4 20.3 -4 14
4 Barcelona 2020-06-18 68 4 18.8 -8 -6
5 Barcelona 2020-06-19 65 4 20.4 -5 10
6 Barcelona 2020-06-20 53 4 21.8 -12 15
7 Barcelona 2020-06-21 38 1 22.8 -18 18
8 Barcelona 2020-06-22 69 5 23.8 5 24
9 Barcelona 2020-06-23 95 3 23.4 19 28
10 Barcelona 2020-06-24 44 4 23.6 -71 28
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_barcelona %>%
data_barcelona_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_barcelona_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_barcelona %>%
data_barcelona_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_barcelona_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
<- data_barcelona %>%
lambda_Barcelona features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_barcelona_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Barcelona <ARIMA(1,0,2)(0,1,1)[7]> <ARIMA(1,0,4)(0,1,1)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.344 -523. 1055. 1055. 1077. <cpl [1]> <cpl [9]>
2 Barcelona arima_at2 0.341 -520. 1053. 1053. 1084. <cpl [1]> <cpl [11]>
3 Barcelona Snaive 1.26 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(1,0,4)(0,1,1)[7]> 0.341 -520. 1053. 1053. 1084.
2 arima_at1 <ARIMA(1,0,2)(0,1,1)[7]> 0.344 -523. 1055. 1055. 1077.
3 Snaive <SNAIVE> 1.26 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 33.4 0.0000220
2 Barcelona arima_at2 26.1 0.000480
3 Barcelona Snaive 1654. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 51.3 0.00000363
2 Barcelona arima_at2 43.2 0.0000793
3 Barcelona Snaive 2253. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 57.6 0.0000289
2 Barcelona arima_at2 49.3 0.000455
3 Barcelona Snaive 2362. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 162. 4.59e-12
2 Barcelona arima_at2 153. 9.83e-11
3 Barcelona Snaive 2651. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 57) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
::accuracy(fc_h7, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -4569. 4824. 4569. -62.2 62.2 5.74 2.46 -0.0331
2 arima_at2 Barcelona Test -4145. 4383. 4145. -56.2 56.2 5.21 2.24 -0.0435
3 Snaive Barcelona Test -7870. 8139. 7870. -99.0 99.0 9.88 4.16 0.551
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
::accuracy(fc_h14, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -6582. 7100. 6582. -132. 132. 8.27 3.63 0.471
2 arima_at2 Barcelona Test -6038. 6534. 6038. -122. 122. 7.58 3.34 0.471
3 Snaive Barcelona Test -9985. 10606. 9985. -188. 188. 12.5 5.42 0.548
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
::accuracy(fc_h21, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -7823. 8400. 7823. -199. 199. 9.83 4.29 0.573
2 arima_at2 Barcelona Test -7199. 7750. 7199. -184. 184. 9.04 3.96 0.575
3 Snaive Barcelona Test -11275. 12018. 11275. -272. 272. 14.2 6.14 0.602
57 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
# Accuracy
::accuracy(fc_h90, data_barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -11642. 12454. 11642. -Inf Inf 14.6 6.36 0.695
2 arima_at2 Barcelona Test -10693. 11449. 10693. -Inf Inf 13.4 5.85 0.691
3 Snaive Barcelona Test -14809. 15796. 14809. -Inf Inf 18.6 8.06 0.592
Madrid
<- covid_data %>%
data_madrid filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-06-15 153 10 20.7 -8 34
2 Madrid 2020-06-16 91 16 21 -8 21
3 Madrid 2020-06-17 93 14 20.2 -5 34
4 Madrid 2020-06-18 85 17 21.9 -7 30
5 Madrid 2020-06-19 78 20 22.6 -9 22
6 Madrid 2020-06-20 67 20 23.4 -12 25
7 Madrid 2020-06-21 42 3 25 -13 14
8 Madrid 2020-06-22 60 16 27.3 -8 29
9 Madrid 2020-06-23 68 11 28.4 -9 23
10 Madrid 2020-06-24 49 15 28 -7 22
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_madrid %>%
data_madrid_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_madrid_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_madrid %>%
data_madrid_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_madrid_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
<- data_madrid %>%
lambda_Madrid features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_madrid_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Madrid <ARIMA(2,0,2)(0,1,2)[7]> <ARIMA(2,0,2)(0,1,2)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.132 -240. 494. 495. 525. <cpl [2]> <cpl [16]>
2 Madrid arima_at2 0.132 -240. 494. 495. 525. <cpl [2]> <cpl [16]>
3 Madrid Snaive 0.412 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,0,2)(0,1,2)[7]> 0.132 -240. 494. 495. 525.
2 arima_at2 <ARIMA(2,0,2)(0,1,2)[7]> 0.132 -240. 494. 495. 525.
3 Snaive <SNAIVE> 0.412 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 24.8 0.000838
2 Madrid arima_at2 24.8 0.000838
3 Madrid Snaive 1617. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 38.5 0.000443
2 Madrid arima_at2 38.5 0.000443
3 Madrid Snaive 2282. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 47.9 0.000718
2 Madrid arima_at2 47.9 0.000718
3 Madrid Snaive 2422. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 123. 0.000000995
2 Madrid arima_at2 123. 0.000000995
3 Madrid Snaive 2762. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 57) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Madrid - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
::accuracy(fc_h7, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 471. 788. 629. 8.70 11.7 0.843 0.441 0.0572
2 arima_at2 Madrid Test 471. 788. 629. 8.70 11.7 0.843 0.441 0.0572
3 Snaive Madrid Test -1836. 2070. 1836. -36.2 36.2 2.46 1.16 0.541
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Madrid - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
::accuracy(fc_h14, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 174. 612. 467. 2.98 10.9 0.625 0.343 0.215
2 arima_at2 Madrid Test 174. 612. 467. 2.98 10.9 0.625 0.343 0.215
3 Snaive Madrid Test -2914. 3386. 2914. -77.1 77.1 3.90 1.90 0.466
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Madrid - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
::accuracy(fc_h21, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -228. 851. 664. -16.7 26.5 0.888 0.477 0.512
2 arima_at2 Madrid Test -228. 851. 664. -16.7 26.5 0.888 0.477 0.512
3 Snaive Madrid Test -3903. 4584. 3903. -158. 158. 5.23 2.57 0.574
57 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
+
) labs(y = "Nº cases", title = "Madrid - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h90")
# Accuracy
::accuracy(fc_h90, data_madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -1540. 2217. 1701. -2124. 2127. 2.28 1.24 0.699
2 arima_at2 Madrid Test -1540. 2217. 1701. -2124. 2127. 2.28 1.24 0.699
3 Snaive Madrid Test -6507. 7446. 6507. -4965. 4965. 8.71 4.17 0.627
Málaga
<- covid_data %>%
data_malaga filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-06-15 1 0 27.2 -16 -9
2 Málaga 2020-06-16 1 0 27.8 -13 -4
3 Málaga 2020-06-17 2 0 26.9 -13 1
4 Málaga 2020-06-18 2 1 21.6 -12 3
5 Málaga 2020-06-19 1 0 24.7 -12 2
6 Málaga 2020-06-20 4 0 22.6 -14 8
7 Málaga 2020-06-21 4 0 22.7 -16 -3
8 Málaga 2020-06-22 6 0 25 -13 1
9 Málaga 2020-06-23 8 0 25.6 -10 18
10 Málaga 2020-06-24 65 2 24.5 -13 12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_malaga %>%
data_malaga_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_malaga_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_malaga %>%
data_malaga_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_malaga_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
<- data_malaga %>%
lambda_Malaga features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_malaga_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Málaga <ARIMA(2,0,3)(0,1,1)[7]> <ARIMA(2,0,3)(0,1,1)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.584 -679. 1372. 1372. 1402. <cpl [2]> <cpl [10]>
2 Málaga arima_at2 0.584 -679. 1372. 1372. 1402. <cpl [2]> <cpl [10]>
3 Málaga Snaive 2.41 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 <ARIMA(2,0,3)(0,1,1)[7]> 0.584 -679. 1372. 1372. 1402.
2 arima_at2 <ARIMA(2,0,3)(0,1,1)[7]> 0.584 -679. 1372. 1372. 1402.
3 Snaive <SNAIVE> 2.41 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 3.28 0.858
2 Málaga arima_at2 3.28 0.858
3 Málaga Snaive 1621. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 15.4 0.350
2 Málaga arima_at2 15.4 0.350
3 Málaga Snaive 2376. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 29.2 0.109
2 Málaga arima_at2 29.2 0.109
3 Málaga Snaive 2555. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 58.4 0.423
2 Málaga arima_at2 58.4 0.423
3 Málaga Snaive 3422. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 57) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Malaga - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
::accuracy(fc_h7, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 74.3 190. 152. 3.78 24.3 1.15 0.769 0.249
2 arima_at2 Málaga Test 74.3 190. 152. 3.78 24.3 1.15 0.769 0.249
3 Snaive Málaga Test -170. 240. 191. -33.7 35.9 1.45 0.968 0.246
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Malaga - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
::accuracy(fc_h14, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 185. 290. 229. 17.9 29.6 1.73 1.17 0.340
2 arima_at2 Málaga Test 185. 290. 229. 17.9 29.6 1.73 1.17 0.340
3 Snaive Málaga Test -141. 223. 170. -28.9 31.8 1.28 0.899 0.0239
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Malaga - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
::accuracy(fc_h21, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 173. 267. 203. 19.1 26.9 1.53 1.08 0.422
2 arima_at2 Málaga Test 173. 267. 203. 19.1 26.9 1.53 1.08 0.422
3 Snaive Málaga Test -208. 279. 227. -41.4 43.4 1.72 1.13 0.172
57 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
+
) labs(y = "Nº cases", title = "Malaga - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h90")
# Accuracy
::accuracy(fc_h90, data_malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test -6.39 278. 190. -Inf Inf 1.44 1.12 0.656
2 arima_at2 Málaga Test -6.39 278. 190. -Inf Inf 1.44 1.12 0.656
3 Snaive Málaga Test -436. 525. 443. -Inf Inf 3.35 2.12 0.513
Sevilla
<- covid_data %>%
data_sevilla filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla
# A tsibble: 653 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-06-15 2 1 23.1 -14 -13
2 Sevilla 2020-06-16 1 0 24.0 -10 -4
3 Sevilla 2020-06-17 0 0 24.9 -10 -7
4 Sevilla 2020-06-18 0 0 25.8 -12 -13
5 Sevilla 2020-06-19 0 0 26.6 -13 -20
6 Sevilla 2020-06-20 0 0 27.5 -20 -34
7 Sevilla 2020-06-21 0 0 28.4 -27 -47
8 Sevilla 2020-06-22 1 0 29.3 -17 -19
9 Sevilla 2020-06-23 0 0 30.2 -12 -13
10 Sevilla 2020-06-24 1 0 29.4 -12 -12
# … with 643 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
<- data_sevilla %>%
data_sevilla_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_sevilla_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_sevilla %>%
data_sevilla_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_sevilla_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
<- data_sevilla %>%
lambda_Sevilla features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
<- data_sevilla_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2 Snaive
<chr> <model> <model> <model>
1 Sevilla <ARIMA(3,1,1)(2,0,0)[7]> <ARIMA(4,1,0)(2,0,0)[7]> <SNAIVE>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.962 -832. 1679. 1679. 1709. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.962 -832. 1678. 1679. 1709. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.23 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at2 <ARIMA(4,1,0)(2,0,0)[7]> 0.962 -832. 1678. 1679. 1709.
2 arima_at1 <ARIMA(3,1,1)(2,0,0)[7]> 0.962 -832. 1679. 1679. 1709.
3 Snaive <SNAIVE> 2.23 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 7.13 0.416
2 Sevilla arima_at2 6.24 0.512
3 Sevilla Snaive 965. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 27.9 0.0146
2 Sevilla arima_at2 27.1 0.0185
3 Sevilla Snaive 1403. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 38.6 0.0108
2 Sevilla arima_at2 37.9 0.0132
3 Sevilla Snaive 1510. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 78.0 0.0335
2 Sevilla arima_at2 77.4 0.0372
3 Sevilla Snaive 1813. 0
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, h = 7)
fc_h7 <- fabletools::forecast(fit_model, h = 14)
fc_h14 <- fabletools::forecast(fit_model, h = 21)
fc_h21 <- fabletools::forecast(fit_model, h = 57) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
::accuracy(fc_h7, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 27.5 78.6 56.6 3.30 10.0 0.396 0.288 0.143
2 arima_at2 Sevilla Test 29.1 78.8 55.9 3.60 9.91 0.391 0.289 0.142
3 Snaive Sevilla Test -305. 368. 311. -48.0 49.4 2.17 1.35 0.523
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
::accuracy(fc_h14, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 131. 223. 156. 13.5 21.5 1.09 0.817 0.433
2 arima_at2 Sevilla Test 133. 224. 157. 13.9 21.4 1.09 0.822 0.432
3 Snaive Sevilla Test -297. 338. 299. -50.4 51.1 2.09 1.24 0.411
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
::accuracy(fc_h21, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 118. 208. 154. 9.96 26.1 1.08 0.762 0.499
2 arima_at2 Sevilla Test 121. 209. 155. 10.4 26.0 1.08 0.767 0.498
3 Snaive Sevilla Test -380. 430. 381. -76.5 77.0 2.67 1.58 0.507
57 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(59) &
> as.Date("2021-12-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
# Accuracy
::accuracy(fc_h90, data_sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test -49.7 208. 157. -Inf Inf 1.10 0.761 0.718
2 arima_at2 Sevilla Test -47.4 207. 156. -Inf Inf 1.09 0.759 0.717
3 Snaive Sevilla Test -737. 850. 738. -Inf Inf 5.16 3.11 0.642