::p_load(
pacman# file locator
here, # data management and ggplot2 graphics
tidyverse, # get overview of data
skimr, # produce and adorn tabulations and cross-tabulations
janitor,
tsibble,
fable,
feasts,
fabletools,
lubridate
)
<- readRDS(here("data", "clean", "final_covid_data.rds")) covid_data
Multivariate (temperature) prediction
Load packages:
The temporal limits of our prediction will be:
- Data as of June 14th 2020 which is considered the beginning of the second epidemiological period in Spain
- In the first case scenario, data previous the start of the sixth wave of covid-19 (14 oct 2021).
- In the second case scenario, all the available data (limited by mobility information 29 dec 2021)
Before sixth epidemiological period
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Asturias 2020-06-15 0 16
2 Asturias 2020-06-16 1 15.6
3 Asturias 2020-06-17 0 15.6
4 Asturias 2020-06-18 0 15.1
5 Asturias 2020-06-19 0 14.8
6 Asturias 2020-06-20 0 16.8
7 Asturias 2020-06-21 0 18.2
8 Asturias 2020-06-22 0 18.2
9 Asturias 2020-06-23 0 20
10 Asturias 2020-06-24 0 18.2
# … with 643 more rows
<- data_asturias %>%
data_asturias_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_asturias %>%
data_asturias_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
# Lamba for num_casos
<- data_asturias %>%
lambda_asturias_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_asturias_casos
[1] 0.2278229
# Lamba for average temp
<- data_asturias %>%
lambda_asturias_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_asturias_tmed
[1] 1.056074
<- data_asturias_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~
box_cox(tmed, lambda_asturias_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~
box_cox(tmed, lambda_asturias_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1 arima_at2
<chr> <model> <model>
1 Asturias <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> <LM w/ ARIMA(2,1,4) errors>
# … with 1 more variable: Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.946 -673. 1362. 1362. 1396. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.912 -664. 1345. 1345. 1378. <cpl [2]> <cpl [4]>
3 Asturias Snaive 2.32 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(2,1,4) errors> 0.912 -664. 1345. 1345. 1378.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> 0.946 -673. 1362. 1362. 1396.
3 Snaive <SNAIVE> 2.32 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 10.6 0.159
2 Asturias arima_at2 8.12 0.322
3 Asturias Snaive 629. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 32.8 0.00309
2 Asturias arima_at2 23.6 0.0509
3 Asturias Snaive 892. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 49.1 0.000485
2 Asturias arima_at2 33.3 0.0432
3 Asturias Snaive 927. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 118. 0.0237
2 Asturias arima_at2 86.6 0.582
3 Asturias Snaive 1195. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_asturias_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_asturias_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_asturias_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_asturias_test %>%
data_fc_h90 select(-num_casos) %>%
slice(1:90)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Asturias - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
::accuracy(fc_h7, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 5.43 10.1 6.80 14.8 27.6 0.137 0.123 0.198
2 arima_at2 Asturias Test 6.02 9.93 7.26 18.2 30.5 0.146 0.121 0.384
3 Snaive Asturias Test 2.41 14.0 9.98 -10.1 49.0 0.201 0.171 0.00766
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Asturias - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
::accuracy(fc_h14, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 10.3 14.2 11.0 29.1 35.5 0.222 0.173 0.171
2 arima_at2 Asturias Test 10.9 14.1 11.5 32.0 38.1 0.232 0.172 0.303
3 Snaive Asturias Test 6.69 16.0 11.4 8.17 41.5 0.229 0.195 -0.156
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Asturias - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
::accuracy(fc_h21, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 9.72 13.4 10.4 27.1 32.5 0.209 0.164 0.0341
2 arima_at2 Asturias Test 10.6 13.7 11.0 31.0 35.1 0.222 0.167 0.127
3 Snaive Asturias Test 5.77 15.6 11.5 6.52 40.8 0.232 0.190 -0.223
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Asturias - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h90")
# Accuracy
::accuracy(fc_h90, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test 704. 1211. 704. 67.5 70.0 14.2 14.8 0.925
2 arima_at2 Asturias Test 713. 1221. 714. 71.1 72.9 14.4 14.9 0.926
3 Snaive Asturias Test 702. 1213. 704. 60.0 71.7 14.2 14.8 0.925
Barcelona
<- covid_data %>%
data_Barcelona filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Barcelona
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Barcelona 2020-06-15 62 21.2
2 Barcelona 2020-06-16 66 20.8
3 Barcelona 2020-06-17 70 20.3
4 Barcelona 2020-06-18 68 18.8
5 Barcelona 2020-06-19 65 20.4
6 Barcelona 2020-06-20 53 21.8
7 Barcelona 2020-06-21 38 22.8
8 Barcelona 2020-06-22 69 23.8
9 Barcelona 2020-06-23 95 23.4
10 Barcelona 2020-06-24 44 23.6
# … with 643 more rows
<- data_Barcelona %>%
data_Barcelona_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Barcelona_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_Barcelona %>%
data_Barcelona_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Barcelona_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
# Lamba for num_casos
<- data_Barcelona %>%
lambda_Barcelona_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Barcelona_casos
[1] 0.1434101
# Lamba for average temp
<- data_Barcelona %>%
lambda_Barcelona_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Barcelona_tmed
[1] 1.098957
<- data_Barcelona_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~
box_cox(tmed, lambda_Barcelona_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~
box_cox(tmed, lambda_Barcelona_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Barcelona <LM w/ ARIMA(0,1,3)(0,1,2)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.195 -291. 595. 595. 624. <cpl [0]> <cpl [17]>
2 Barcelona arima_at2 0.187 -280. 575. 575. 604. <cpl [1]> <cpl [16]>
3 Barcelona Snaive 0.964 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(1,1,2)(0,1,2)[7] errors> 0.187 -280. 575. 575. 604.
2 arima_… <LM w/ ARIMA(0,1,3)(0,1,2)[7] errors> 0.195 -291. 595. 595. 624.
3 Snaive <SNAIVE> 0.964 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 13.5 0.0615
2 Barcelona arima_at2 3.11 0.874
3 Barcelona Snaive 1610. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 29.3 0.00944
2 Barcelona arima_at2 9.65 0.787
3 Barcelona Snaive 2198. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 32.8 0.0484
2 Barcelona arima_at2 14.3 0.856
3 Barcelona Snaive 2259. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 133. 0.00214
2 Barcelona arima_at2 86.8 0.576
3 Barcelona Snaive 3453. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Barcelona_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Barcelona_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Barcelona_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Barcelona_test %>%
data_fc_h90 select(-num_casos) %>%
slice(1:90)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 16.6 64.7 56.2 0.729 30.0 0.149 0.0975 0.327
2 arima_at2 Barcelona Test 19.8 67.5 56.3 2.11 29.6 0.150 0.102 0.211
3 Snaive Barcelona Test -21.0 79.0 67.8 -15.5 40.3 0.180 0.119 0.184
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 36.6 75.7 59.2 10.6 27.1 0.157 0.114 0.308
2 arima_at2 Barcelona Test 39.1 78.5 60.5 11.7 27.4 0.161 0.118 0.245
3 Snaive Barcelona Test -15.3 87.1 76.0 -12.2 40.4 0.202 0.131 0.0610
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 60.6 109. 78.0 16.3 29.7 0.207 0.165 0.206
2 arima_at2 Barcelona Test 58.3 106. 76.7 15.5 29.5 0.204 0.160 0.122
3 Snaive Barcelona Test -2.10 119. 90.4 -10.9 41.3 0.240 0.179 -0.239
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h90")
# Accuracy
::accuracy(fc_h90, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test 5077. 9032. 5081. 72.5 75.6 13.5 13.6 0.868
2 arima_at2 Barcelona Test 4972. 8914. 4977. 68.6 71.8 13.2 13.4 0.868
3 Snaive Barcelona Test 4916. 8890. 4939. 58.7 71.4 13.1 13.4 0.870
Madrid
<- covid_data %>%
data_Madrid filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Madrid
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Madrid 2020-06-15 153 20.7
2 Madrid 2020-06-16 91 21
3 Madrid 2020-06-17 93 20.2
4 Madrid 2020-06-18 85 21.9
5 Madrid 2020-06-19 78 22.6
6 Madrid 2020-06-20 67 23.4
7 Madrid 2020-06-21 42 25
8 Madrid 2020-06-22 60 27.3
9 Madrid 2020-06-23 68 28.4
10 Madrid 2020-06-24 49 28
# … with 643 more rows
<- data_Madrid %>%
data_Madrid_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Madrid_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_Madrid %>%
data_Madrid_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Madrid_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
# Lamba for num_casos
<- data_Madrid %>%
lambda_Madrid_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Madrid_casos
[1] 0.06260614
# Lamba for average temp
<- data_Madrid %>%
lambda_Madrid_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Madrid_tmed
[1] 0.936519
<- data_Madrid_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~
box_cox(tmed, lambda_Madrid_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~
box_cox(tmed, lambda_Madrid_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Madrid <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.0669 -33.4 80.8 81.1 110. <cpl [9]> <cpl [8]>
2 Madrid arima_at2 0.0669 -33.4 80.8 81.1 110. <cpl [9]> <cpl [8]>
3 Madrid Snaive 0.291 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors> 0.0669 -33.4 80.8 81.1 110.
2 arima_… <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors> 0.0669 -33.4 80.8 81.1 110.
3 Snaive <SNAIVE> 0.291 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 10.8 0.146
2 Madrid arima_at2 10.8 0.146
3 Madrid Snaive 1678. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 39.8 0.000272
2 Madrid arima_at2 39.8 0.000272
3 Madrid Snaive 2594. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 48.4 0.000605
2 Madrid arima_at2 48.4 0.000605
3 Madrid Snaive 2854. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 106. 0.126
2 Madrid arima_at2 106. 0.126
3 Madrid Snaive 3856. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Madrid_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Madrid_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Madrid_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Madrid_test %>%
data_fc_h90 select(-num_casos) %>%
slice(1:90)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Madrid - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 135. 163. 144. 39.4 47.5 0.339 0.242 -0.0363
2 arima_at2 Madrid Test 135. 163. 144. 39.4 47.5 0.339 0.242 -0.0363
3 Snaive Madrid Test 32.1 124. 105. -7.03 48.2 0.247 0.185 -0.109
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Madrid - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 150. 178. 154. 45.1 49.4 0.364 0.265 0.0893
2 arima_at2 Madrid Test 150. 178. 154. 45.1 49.4 0.364 0.265 0.0893
3 Snaive Madrid Test 26.3 126. 105. -8.58 46.6 0.246 0.188 -0.0472
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Madrid - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
2 arima_at2 Madrid Test 174. 210. 177. 50.0 52.8 0.418 0.313 0.287
3 Snaive Madrid Test 34.8 140. 117. -8.01 48.6 0.275 0.209 0.131
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Madrid - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h90")
# Accuracy
::accuracy(fc_h90, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 5007. 8744. 5008. 81.8 82.4 11.8 13.0 0.840
2 arima_at2 Madrid Test 5007. 8744. 5008. 81.8 82.4 11.8 13.0 0.840
3 Snaive Madrid Test 4763. 8564. 4792. 51.8 68.9 11.3 12.7 0.840
Malaga
<- covid_data %>%
data_Malaga filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Malaga
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Málaga 2020-06-15 1 27.2
2 Málaga 2020-06-16 1 27.8
3 Málaga 2020-06-17 2 26.9
4 Málaga 2020-06-18 2 21.6
5 Málaga 2020-06-19 1 24.7
6 Málaga 2020-06-20 4 22.6
7 Málaga 2020-06-21 4 22.7
8 Málaga 2020-06-22 6 25
9 Málaga 2020-06-23 8 25.6
10 Málaga 2020-06-24 65 24.5
# … with 643 more rows
<- data_Malaga %>%
data_Malaga_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Malaga_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_Malaga %>%
data_Malaga_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Malaga_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
# Lamba for num_casos
<- data_Malaga %>%
lambda_Malaga_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Malaga_casos
[1] 0.2312255
# Lamba for average temp
<- data_Malaga %>%
lambda_Malaga_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Malaga_tmed
[1] 0.6288527
<- data_Malaga_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~
box_cox(tmed, lambda_Malaga_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~
box_cox(tmed, lambda_Malaga_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Málaga <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.553 -545. 1105. 1105. 1139. <cpl [18]> <cpl [0]>
2 Málaga arima_at2 0.550 -543. 1102. 1103. 1136. <cpl [14]> <cpl [4]>
3 Málaga Snaive 2.15 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(0,1,4)(2,0,0)[7] errors> 0.550 -543. 1102. 1103. 1136.
2 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors> 0.553 -545. 1105. 1105. 1139.
3 Snaive <SNAIVE> 2.15 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 6.37 0.497
2 Málaga arima_at2 2.44 0.932
3 Málaga Snaive 1361. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 20.2 0.125
2 Málaga arima_at2 17.1 0.252
3 Málaga Snaive 1956. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 26.7 0.179
2 Málaga arima_at2 23.1 0.336
3 Málaga Snaive 2100. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 131. 0.00301
2 Málaga arima_at2 119. 0.0227
3 Málaga Snaive 3195. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Malaga_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Malaga_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Malaga_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Malaga_test %>%
data_fc_h90 select(-num_casos) %>%
slice(1:90)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Malaga - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 10.2 19.3 16.7 7.44 35.8 0.170 0.114 0.0661
2 arima_at2 Málaga Test 10.7 19.8 17.2 8.32 36.8 0.175 0.117 0.0950
3 Snaive Málaga Test -4.00 22.9 19.8 -27.0 52.2 0.201 0.135 0.0123
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Malaga - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 13.4 22.2 18.0 14.1 32.1 0.183 0.131 0.179
2 arima_at2 Málaga Test 13.9 22.6 18.4 14.9 32.7 0.187 0.134 0.190
3 Snaive Málaga Test -3.86 22.7 18.7 -22.4 43.6 0.190 0.134 -0.0973
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Malaga - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 18.4 25.9 21.7 20.9 33.3 0.220 0.153 0.249
2 arima_at2 Málaga Test 18.6 26.0 21.9 21.2 33.7 0.222 0.154 0.244
3 Snaive Málaga Test -0.951 22.0 18.6 -14.8 37.9 0.188 0.130 0.0886
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Malaga - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h90")
# Accuracy
::accuracy(fc_h90, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 606. 1013. 607. 55.8 59.8 6.17 5.99 0.927
2 arima_at2 Málaga Test 596. 1001. 597. 54.1 58.3 6.07 5.91 0.926
3 Snaive Málaga Test 620. 1049. 628. 44.9 62.3 6.37 6.20 0.929
Sevilla
<- covid_data %>%
data_Sevilla filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Sevilla
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Sevilla 2020-06-15 2 23.1
2 Sevilla 2020-06-16 1 24.0
3 Sevilla 2020-06-17 0 24.9
4 Sevilla 2020-06-18 0 25.8
5 Sevilla 2020-06-19 0 26.6
6 Sevilla 2020-06-20 0 27.5
7 Sevilla 2020-06-21 0 28.4
8 Sevilla 2020-06-22 1 29.3
9 Sevilla 2020-06-23 0 30.2
10 Sevilla 2020-06-24 1 29.4
# … with 643 more rows
<- data_Sevilla %>%
data_Sevilla_train filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Sevilla_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"
<- data_Sevilla %>%
data_Sevilla_test filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Sevilla_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"
# Lamba for num_casos
<- data_Sevilla %>%
lambda_Sevilla_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Sevilla_casos
[1] 0.1933674
# Lamba for average temp
<- data_Sevilla %>%
lambda_Sevilla_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Sevilla_tmed
[1] 0.9333023
<- data_Sevilla_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~
box_cox(tmed, lambda_Sevilla_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~
box_cox(tmed, lambda_Sevilla_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Sevilla <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.924 -669. 1355. 1355. 1388. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.922 -669. 1354. 1354. 1387. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.02 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors> 0.922 -669. 1354. 1354. 1387.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> 0.924 -669. 1355. 1355. 1388.
3 Snaive <SNAIVE> 2.02 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 8.68 0.276
2 Sevilla arima_at2 7.66 0.363
3 Sevilla Snaive 757. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 24.1 0.0446
2 Sevilla arima_at2 23.3 0.0555
3 Sevilla Snaive 1110. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 37.5 0.0147
2 Sevilla arima_at2 36.7 0.0181
3 Sevilla Snaive 1235. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=90)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 103. 0.166
2 Sevilla arima_at2 102. 0.182
3 Sevilla Snaive 1692. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Sevilla_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Sevilla_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Sevilla_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Sevilla_test %>%
data_fc_h90 select(-num_casos) %>%
slice(1:90)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h90) fc_h90
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 7.42 11.3 8.79 19.6 24.7 0.0869 0.0717 -0.137
2 arima_at2 Sevilla Test 7.51 11.4 8.93 19.8 25.1 0.0882 0.0722 -0.123
3 Snaive Sevilla Test -5.85 16.0 13.7 -22.0 40.6 0.135 0.102 0.0310
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 5.00 9.90 8.02 8.47 27.5 0.0793 0.0628 -0.116
2 arima_at2 Sevilla Test 5.13 9.96 8.10 8.97 27.6 0.0801 0.0631 -0.108
3 Snaive Sevilla Test -12.5 19.3 16.4 -57.6 66.9 0.162 0.122 0.275
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 6.47 11.8 8.94 11.5 26.9 0.0883 0.0745 0.191
2 arima_at2 Sevilla Test 6.59 11.8 9.01 12.0 26.9 0.0890 0.0749 0.194
3 Snaive Sevilla Test -13.9 22.1 19.1 -60.8 72.0 0.189 0.140 0.327
90 days
# Plots
%>%
fc_h90 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
%>%
fc_h90 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
> as.Date("2021-07-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h90")
# Accuracy
::accuracy(fc_h90, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 686. 1166. 687. 61.8 66.2 6.79 7.39 0.892
2 arima_at2 Sevilla Test 686. 1165. 687. 61.9 66.2 6.78 7.39 0.892
3 Snaive Sevilla Test 665. 1161. 677. 31.7 74.6 6.69 7.36 0.892
Middel of sixth epidemiological period
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Asturias 2020-06-15 0 16
2 Asturias 2020-06-16 1 15.6
3 Asturias 2020-06-17 0 15.6
4 Asturias 2020-06-18 0 15.1
5 Asturias 2020-06-19 0 14.8
6 Asturias 2020-06-20 0 16.8
7 Asturias 2020-06-21 0 18.2
8 Asturias 2020-06-22 0 18.2
9 Asturias 2020-06-23 0 20
10 Asturias 2020-06-24 0 18.2
# … with 643 more rows
<- data_asturias %>%
data_asturias_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_asturias %>%
data_asturias_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
# Lamba for num_casos
<- data_asturias %>%
lambda_asturias_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_asturias_casos
[1] 0.2278229
# Lamba for average temp
<- data_asturias %>%
lambda_asturias_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_asturias_tmed
[1] 1.056074
<- data_asturias_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~
box_cox(tmed, lambda_asturias_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~
box_cox(tmed, lambda_asturias_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_asturias_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Asturias <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Asturias arima_at1 0.917 -816. 1648. 1648. 1683. <cpl [17]> <cpl [1]>
2 Asturias arima_at2 0.895 -810. 1631. 1631. 1653. <cpl [7]> <cpl [8]>
3 Asturias Snaive 2.53 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(0,1,1)(1,0,1)[7] errors> 0.895 -810. 1631. 1631. 1653.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> 0.917 -816. 1648. 1648. 1683.
3 Snaive <SNAIVE> 2.53 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 13.8 0.0544
2 Asturias arima_at2 13.0 0.0717
3 Asturias Snaive 981. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 38.1 0.000503
2 Asturias arima_at2 34.9 0.00150
3 Asturias Snaive 1393. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 59.1 0.0000173
2 Asturias arima_at2 48.9 0.000522
3 Asturias Snaive 1470. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Asturias arima_at1 100. 0.000364
2 Asturias arima_at2 78.5 0.0308
3 Asturias Snaive 1527. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_asturias_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_asturias_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_asturias_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_asturias_test %>%
data_fc_h57 select(-num_casos) %>%
slice(1:57)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h57) fc_h57
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
+
) labs(y = "Nº cases", title = "Asturias - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h7")
# Accuracy
::accuracy(fc_h7, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -174. 274. 253. -25.3 30.3 2.61 1.25 0.192
2 arima_at2 Asturias Test -256. 335. 318. -33.9 37.9 3.29 1.52 0.159
3 Snaive Asturias Test -570. 586. 570. -62.8 62.8 5.89 2.66 -0.550
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
+
) labs(y = "Nº cases", title = "Asturias - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h14")
# Accuracy
::accuracy(fc_h14, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -273. 334. 313. -44.4 46.9 3.23 1.52 0.375
2 arima_at2 Asturias Test -395. 449. 426. -61.1 63.1 4.40 2.04 0.443
3 Snaive Asturias Test -769. 812. 769. -107. 107. 7.94 3.69 0.299
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
+
) labs(y = "Nº cases", title = "Asturias - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h21")
# Accuracy
::accuracy(fc_h21, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -359. 414. 385. -75.2 76.8 3.98 1.88 0.534
2 arima_at2 Asturias Test -495. 545. 516. -98.4 99.8 5.34 2.48 0.595
3 Snaive Asturias Test -920. 980. 920. -168. 168. 9.50 4.46 0.437
57 days
# Plots
%>%
fc_h57 autoplot(
%>%
data_asturias_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
+
) labs(y = "Nº cases", title = "Asturias - forecast h57")
%>%
fc_h57 autoplot(
%>%
data_asturias filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Asturias - forecast h57")
# Accuracy
::accuracy(fc_h57, data_asturias) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias Test -583. 634. 592. -Inf Inf 6.12 2.88 0.720
2 arima_at2 Asturias Test -694. 735. 702. -Inf Inf 7.25 3.34 0.712
3 Snaive Asturias Test -1282. 1359. 1282. -Inf Inf 13.3 6.18 0.471
Barcelona
<- covid_data %>%
data_Barcelona filter(provincia == "Barcelona") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Barcelona
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Barcelona 2020-06-15 62 21.2
2 Barcelona 2020-06-16 66 20.8
3 Barcelona 2020-06-17 70 20.3
4 Barcelona 2020-06-18 68 18.8
5 Barcelona 2020-06-19 65 20.4
6 Barcelona 2020-06-20 53 21.8
7 Barcelona 2020-06-21 38 22.8
8 Barcelona 2020-06-22 69 23.8
9 Barcelona 2020-06-23 95 23.4
10 Barcelona 2020-06-24 44 23.6
# … with 643 more rows
<- data_Barcelona %>%
data_Barcelona_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Barcelona_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_Barcelona %>%
data_Barcelona_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Barcelona_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
# Lamba for num_casos
<- data_Barcelona %>%
lambda_Barcelona_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Barcelona_casos
[1] 0.1434101
# Lamba for average temp
<- data_Barcelona %>%
lambda_Barcelona_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Barcelona_tmed
[1] 1.098957
<- data_Barcelona_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~
box_cox(tmed, lambda_Barcelona_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~
box_cox(tmed, lambda_Barcelona_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Barcelona <LM w/ ARIMA(1,0,2)(0,1,1)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Barcelona arima_at1 0.342 -521. 1054. 1054. 1080. <cpl [1]> <cpl [9]>
2 Barcelona arima_at2 0.328 -510. 1035. 1035. 1070. <cpl [4]> <cpl [8]>
3 Barcelona Snaive 1.26 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,0,1)(0,1,1)[7] errors> 0.328 -510. 1035. 1035. 1070.
2 arima_… <LM w/ ARIMA(1,0,2)(0,1,1)[7] errors> 0.342 -521. 1054. 1054. 1080.
3 Snaive <SNAIVE> 1.26 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 35.9 0.00000742
2 Barcelona arima_at2 43.1 0.000000313
3 Barcelona Snaive 1654. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 54.6 0.00000102
2 Barcelona arima_at2 59.0 0.000000174
3 Barcelona Snaive 2253. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 60.9 0.00000917
2 Barcelona arima_at2 70.9 0.000000253
3 Barcelona Snaive 2362. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Barcelona arima_at1 164. 2.71e-12
2 Barcelona arima_at2 162. 4.88e-12
3 Barcelona Snaive 2651. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Barcelona_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Barcelona_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Barcelona_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Barcelona_test %>%
data_fc_h57 select(-num_casos) %>%
slice(1:57)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h57) fc_h57
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -4666. 4908. 4666. -63.7 63.7 5.86 2.51 -0.0314
2 arima_at2 Barcelona Test -1704. 1754. 1704. -22.3 22.3 2.14 0.896 -0.328
3 Snaive Barcelona Test -7870. 8139. 7870. -99.0 99.0 9.88 4.16 0.551
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -6738. 7266. 6738. -136. 136. 8.46 3.71 0.459
2 arima_at2 Barcelona Test -1843. 1947. 1843. -33.4 33.4 2.31 0.994 0.180
3 Snaive Barcelona Test -9985. 10606. 9985. -188. 188. 12.5 5.42 0.548
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -8063. 8669. 8063. -206. 206. 10.1 4.43 0.569
2 arima_at2 Barcelona Test -1589. 1743. 1589. -33.3 33.3 2.00 0.890 0.446
3 Snaive Barcelona Test -11275. 12018. 11275. -272. 272. 14.2 6.14 0.602
57 days
# Plots
%>%
fc_h57 autoplot(
%>%
data_Barcelona_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
+
) labs(y = "Nº cases", title = "Barcelona - forecast h57")
%>%
fc_h57 autoplot(
%>%
data_Barcelona filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Barcelona - forecast h57")
# Accuracy
::accuracy(fc_h57, data_Barcelona) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test -12180. 13056. 12180. -Inf Inf 15.3 6.67 0.704
2 arima_at2 Barcelona Test -582. 1198. 942. -Inf Inf 1.18 0.612 0.806
3 Snaive Barcelona Test -14809. 15796. 14809. -Inf Inf 18.6 8.06 0.592
Madrid
<- covid_data %>%
data_Madrid filter(provincia == "Madrid") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Madrid
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Madrid 2020-06-15 153 20.7
2 Madrid 2020-06-16 91 21
3 Madrid 2020-06-17 93 20.2
4 Madrid 2020-06-18 85 21.9
5 Madrid 2020-06-19 78 22.6
6 Madrid 2020-06-20 67 23.4
7 Madrid 2020-06-21 42 25
8 Madrid 2020-06-22 60 27.3
9 Madrid 2020-06-23 68 28.4
10 Madrid 2020-06-24 49 28
# … with 643 more rows
<- data_Madrid %>%
data_Madrid_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Madrid_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_Madrid %>%
data_Madrid_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Madrid_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
# Lamba for num_casos
<- data_Madrid %>%
lambda_Madrid_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Madrid_casos
[1] 0.06260614
# Lamba for average temp
<- data_Madrid %>%
lambda_Madrid_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Madrid_tmed
[1] 0.936519
<- data_Madrid_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~
box_cox(tmed, lambda_Madrid_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~
box_cox(tmed, lambda_Madrid_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Madrid <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Madrid arima_at1 0.133 -240. 496. 496. 531. <cpl [2]> <cpl [16]>
2 Madrid arima_at2 0.133 -240. 496. 496. 531. <cpl [2]> <cpl [16]>
3 Madrid Snaive 0.412 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors> 0.133 -240. 496. 496. 531.
2 arima_… <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors> 0.133 -240. 496. 496. 531.
3 Snaive <SNAIVE> 0.412 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 24.2 0.00104
2 Madrid arima_at2 24.2 0.00104
3 Madrid Snaive 1617. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 37.9 0.000530
2 Madrid arima_at2 37.9 0.000530
3 Madrid Snaive 2282. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 47.4 0.000825
2 Madrid arima_at2 47.4 0.000825
3 Madrid Snaive 2422. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Madrid arima_at1 122. 0.00000139
2 Madrid arima_at2 122. 0.00000139
3 Madrid Snaive 2762. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Madrid_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Madrid_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Madrid_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Madrid_test %>%
data_fc_h57 select(-num_casos) %>%
slice(1:57)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h57) fc_h57
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
+
) labs(y = "Nº cases", title = "Madrid - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 464. 796. 632. 8.60 11.7 0.846 0.446 0.0623
2 arima_at2 Madrid Test 464. 796. 632. 8.60 11.7 0.846 0.446 0.0623
3 Snaive Madrid Test -1836. 2070. 1836. -36.2 36.2 2.46 1.16 0.541
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
+
) labs(y = "Nº cases", title = "Madrid - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test 179. 613. 463. 3.20 10.8 0.620 0.343 0.206
2 arima_at2 Madrid Test 179. 613. 463. 3.20 10.8 0.620 0.343 0.206
3 Snaive Madrid Test -2914. 3386. 2914. -77.1 77.1 3.90 1.90 0.466
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
+
) labs(y = "Nº cases", title = "Madrid - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -216. 839. 654. -16.1 26.0 0.875 0.470 0.503
2 arima_at2 Madrid Test -216. 839. 654. -16.1 26.0 0.875 0.470 0.503
3 Snaive Madrid Test -3903. 4584. 3903. -158. 158. 5.23 2.57 0.574
57 days
# Plots
%>%
fc_h57 autoplot(
%>%
data_Madrid_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
+
) labs(y = "Nº cases", title = "Madrid - forecast h57")
%>%
fc_h57 autoplot(
%>%
data_Madrid filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Madrid - forecast h57")
# Accuracy
::accuracy(fc_h57, data_Madrid) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid Test -1509. 2183. 1670. -2106. 2110. 2.24 1.22 0.698
2 arima_at2 Madrid Test -1509. 2183. 1670. -2106. 2110. 2.24 1.22 0.698
3 Snaive Madrid Test -6507. 7446. 6507. -4965. 4965. 8.71 4.17 0.627
Malaga
<- covid_data %>%
data_Malaga filter(provincia == "Málaga") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Malaga
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Málaga 2020-06-15 1 27.2
2 Málaga 2020-06-16 1 27.8
3 Málaga 2020-06-17 2 26.9
4 Málaga 2020-06-18 2 21.6
5 Málaga 2020-06-19 1 24.7
6 Málaga 2020-06-20 4 22.6
7 Málaga 2020-06-21 4 22.7
8 Málaga 2020-06-22 6 25
9 Málaga 2020-06-23 8 25.6
10 Málaga 2020-06-24 65 24.5
# … with 643 more rows
<- data_Malaga %>%
data_Malaga_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Malaga_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_Malaga %>%
data_Malaga_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Malaga_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
# Lamba for num_casos
<- data_Malaga %>%
lambda_Malaga_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Malaga_casos
[1] 0.2312255
# Lamba for average temp
<- data_Malaga %>%
lambda_Malaga_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Malaga_tmed
[1] 0.6288527
<- data_Malaga_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~
box_cox(tmed, lambda_Malaga_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~
box_cox(tmed, lambda_Malaga_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Málaga <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Málaga arima_at1 0.650 -715. 1446. 1446. 1481. <cpl [18]> <cpl [0]>
2 Málaga arima_at2 0.628 -706. 1425. 1425. 1456. <cpl [10]> <cpl [7]>
3 Málaga Snaive 2.41 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(3,1,0)(1,0,1)[7] errors> 0.628 -706. 1425. 1425. 1456.
2 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors> 0.650 -715. 1446. 1446. 1481.
3 Snaive <SNAIVE> 2.41 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 8.29 0.308
2 Málaga arima_at2 15.2 0.0335
3 Málaga Snaive 1621. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 28.4 0.0124
2 Málaga arima_at2 35.7 0.00115
3 Málaga Snaive 2376. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 47.6 0.000771
2 Málaga arima_at2 48.3 0.000621
3 Málaga Snaive 2555. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Málaga arima_at1 100. 0.000338
2 Málaga arima_at2 112. 0.0000197
3 Málaga Snaive 3422. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Malaga_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Malaga_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Malaga_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Malaga_test %>%
data_fc_h57 select(-num_casos) %>%
slice(1:57)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h57) fc_h57
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
+
) labs(y = "Nº cases", title = "Malaga - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 45.0 168. 128. -1.35 21.2 0.965 0.680 0.0875
2 arima_at2 Málaga Test -17.6 158. 112. -11.6 21.6 0.851 0.640 0.209
3 Snaive Málaga Test -170. 240. 191. -33.7 35.9 1.45 0.968 0.246
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
+
) labs(y = "Nº cases", title = "Malaga - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 149. 266. 204. 11.6 26.7 1.54 1.08 0.282
2 arima_at2 Málaga Test 66.4 214. 169. -0.972 25.7 1.28 0.866 0.260
3 Snaive Málaga Test -141. 223. 170. -28.9 31.8 1.28 0.899 0.0239
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
+
) labs(y = "Nº cases", title = "Malaga - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 137. 249. 183. 12.1 24.5 1.38 1.01 0.388
2 arima_at2 Málaga Test 48.9 195. 148. -2.29 23.1 1.12 0.786 0.354
3 Snaive Málaga Test -208. 279. 227. -41.4 43.4 1.72 1.13 0.172
57 days
# Plots
%>%
fc_h57 autoplot(
%>%
data_Malaga_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
+
) labs(y = "Nº cases", title = "Malaga - forecast h57")
%>%
fc_h57 autoplot(
%>%
data_Malaga filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Malaga - forecast h57")
# Accuracy
::accuracy(fc_h57, data_Malaga) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga Test 36.7 237. 175. -Inf Inf 1.33 0.957 0.556
2 arima_at2 Málaga Test -24.7 208. 158. -Inf Inf 1.19 0.842 0.501
3 Snaive Málaga Test -436. 525. 443. -Inf Inf 3.35 2.12 0.513
Sevilla
<- covid_data %>%
data_Sevilla filter(provincia == "Sevilla") %>%
filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>%
select(provincia, fecha, num_casos, tmed) %>%
drop_na() %>%
as_tsibble(key = provincia, index = fecha)
data_Sevilla
# A tsibble: 653 x 4 [1D]
# Key: provincia [1]
provincia fecha num_casos tmed
<chr> <date> <dbl> <dbl>
1 Sevilla 2020-06-15 2 23.1
2 Sevilla 2020-06-16 1 24.0
3 Sevilla 2020-06-17 0 24.9
4 Sevilla 2020-06-18 0 25.8
5 Sevilla 2020-06-19 0 26.6
6 Sevilla 2020-06-20 0 27.5
7 Sevilla 2020-06-21 0 28.4
8 Sevilla 2020-06-22 1 29.3
9 Sevilla 2020-06-23 0 30.2
10 Sevilla 2020-06-24 1 29.4
# … with 643 more rows
<- data_Sevilla %>%
data_Sevilla_train filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Sevilla_train$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"
<- data_Sevilla %>%
data_Sevilla_test filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Sevilla_test$fecha)
Min. 1st Qu. Median Mean 3rd Qu. Max.
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"
# Lamba for num_casos
<- data_Sevilla %>%
lambda_Sevilla_casos features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Sevilla_casos
[1] 0.1933674
# Lamba for average temp
<- data_Sevilla %>%
lambda_Sevilla_tmed features(tmed, features = guerrero) %>%
pull(lambda_guerrero)
lambda_Sevilla_tmed
[1] 0.9333023
<- data_Sevilla_train %>%
fit_model model(
arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~
box_cox(tmed, lambda_Sevilla_tmed)),
arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~
box_cox(tmed, lambda_Sevilla_tmed),
stepwise = FALSE, approx = FALSE),
Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla_casos))
)
# Show and report model
fit_model
# A mable: 1 x 4
# Key: provincia [1]
provincia arima_at1
<chr> <model>
1 Sevilla <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>
glance(fit_model)
# A tibble: 3 × 9
provincia .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
1 Sevilla arima_at1 0.961 -832. 1679. 1679. 1714. <cpl [17]> <cpl [1]>
2 Sevilla arima_at2 0.961 -831. 1679. 1679. 1714. <cpl [18]> <cpl [0]>
3 Sevilla Snaive 2.23 NA NA NA NA <NULL> <NULL>
%>%
fit_model pivot_longer(-provincia,
names_to = ".model",
values_to = "orders") %>%
left_join(glance(fit_model), by = c("provincia", ".model")) %>%
arrange(AICc) %>%
select(.model:BIC)
# A mable: 3 x 7
# Key: .model [3]
.model orders sigma2 log_lik AIC AICc BIC
<chr> <model> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors> 0.961 -831. 1679. 1679. 1714.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> 0.961 -832. 1679. 1679. 1714.
3 Snaive <SNAIVE> 2.23 NA NA NA NA
# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
%>%
fit_model select(Snaive) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at1) %>%
gg_tsresiduals()
%>%
fit_model select(arima_at2) %>%
gg_tsresiduals()
augment(fit_model) %>%
features(.innov, ljung_box, lag=7)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 6.88 0.441
2 Sevilla arima_at2 6.06 0.533
3 Sevilla Snaive 965. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=14)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 27.0 0.0190
2 Sevilla arima_at2 26.4 0.0232
3 Sevilla Snaive 1403. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=21)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 37.9 0.0132
2 Sevilla arima_at2 37.3 0.0155
3 Sevilla Snaive 1510. 0
augment(fit_model) %>%
features(.innov, ljung_box, lag=57)
# A tibble: 3 × 4
provincia .model lb_stat lb_pvalue
<chr> <chr> <dbl> <dbl>
1 Sevilla arima_at1 76.9 0.0406
2 Sevilla arima_at2 76.4 0.0439
3 Sevilla Snaive 1813. 0
# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
<- data_Sevilla_test %>%
data_fc_h7 select(-num_casos) %>%
slice(1:7)
<- data_Sevilla_test %>%
data_fc_h14 select(-num_casos) %>%
slice(1:14)
<- data_Sevilla_test %>%
data_fc_h21 select(-num_casos) %>%
slice(1:21)
<- data_Sevilla_test %>%
data_fc_h57 select(-num_casos) %>%
slice(1:57)
# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
#
# Forecast
<- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h7 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h57) fc_h57
7 days
# Plots
%>%
fc_h7 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
%>%
fc_h7 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h7")
# Accuracy
::accuracy(fc_h7, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 35.3 79.3 53.4 4.72 9.49 0.373 0.291 0.152
2 arima_at2 Sevilla Test 37.0 79.8 53.3 5.02 9.48 0.372 0.292 0.149
3 Snaive Sevilla Test -305. 368. 311. -48.0 49.4 2.17 1.35 0.523
14 days
# Plots
%>%
fc_h14 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
%>%
fc_h14 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h14")
# Accuracy
::accuracy(fc_h14, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 139. 228. 158. 15.1 21.4 1.10 0.834 0.436
2 arima_at2 Sevilla Test 141. 229. 158. 15.5 21.5 1.11 0.839 0.435
3 Snaive Sevilla Test -297. 338. 299. -50.4 51.1 2.09 1.24 0.411
21 days
# Plots
%>%
fc_h21 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
%>%
fc_h21 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h21")
# Accuracy
::accuracy(fc_h21, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test 128. 214. 157. 11.9 26.2 1.10 0.784 0.502
2 arima_at2 Sevilla Test 130. 215. 158. 12.3 26.3 1.11 0.789 0.501
3 Snaive Sevilla Test -380. 430. 381. -76.5 77.0 2.67 1.58 0.507
57 days
# Plots
%>%
fc_h57 autoplot(
%>%
data_Sevilla_test filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
+
) labs(y = "Nº cases", title = "Sevilla - forecast h57")
%>%
fc_h57 autoplot(
%>%
data_Sevilla filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
> as.Date("2022-01-01", format = "%Y-%m-%d"))
fecha +
) labs(y = "Nº cases", title = "Sevilla - forecast h57")
# Accuracy
::accuracy(fc_h57, data_Sevilla) fabletools
# A tibble: 3 × 11
.model provincia .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla Test -36.1 203. 151. -Inf Inf 1.06 0.744 0.710
2 arima_at2 Sevilla Test -34.1 203. 151. -Inf Inf 1.05 0.743 0.709
3 Snaive Sevilla Test -737. 850. 738. -Inf Inf 5.16 3.11 0.642