Multivariate (temperature) prediction

Load packages:

pacman::p_load(
      here,      # file locator
      tidyverse, # data management and ggplot2 graphics
      skimr,     # get overview of data
      janitor,   # produce and adorn tabulations and cross-tabulations
      tsibble,
      fable,
      feasts,
      fabletools,
      lubridate
)

covid_data <- readRDS(here("data", "clean", "final_covid_data.rds"))

The temporal limits of our prediction will be:

Data as of June 14th 2020 which is considered the beginning of the second epidemiological period in Spain
In the first case scenario, data previous the start of the sixth wave of covid-19 (14 oct 2021).
In the second case scenario, all the available data (limited by mobility information 29 dec 2021)

Before sixth epidemiological period

Asturias

data_asturias <- covid_data %>% 
      filter(provincia == "Asturias") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_asturias

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Asturias  2020-06-15         0  16  
 2 Asturias  2020-06-16         1  15.6
 3 Asturias  2020-06-17         0  15.6
 4 Asturias  2020-06-18         0  15.1
 5 Asturias  2020-06-19         0  14.8
 6 Asturias  2020-06-20         0  16.8
 7 Asturias  2020-06-21         0  18.2
 8 Asturias  2020-06-22         0  18.2
 9 Asturias  2020-06-23         0  20  
10 Asturias  2020-06-24         0  18.2
# … with 643 more rows

data_asturias_train <- data_asturias %>% 
      filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"

data_asturias_test <- data_asturias %>% 
      filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"

# Lamba for num_casos
lambda_asturias_casos <- data_asturias %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_asturias_casos

[1] 0.2278229

# Lamba for average temp
lambda_asturias_tmed <- data_asturias %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_asturias_tmed

[1] 1.056074

fit_model <- data_asturias_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~ 
                                    box_cox(tmed, lambda_asturias_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~ 
                                    box_cox(tmed, lambda_asturias_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_asturias_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1                   arima_at2
  <chr>                                   <model>                     <model>
1 Asturias  <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors> <LM w/ ARIMA(2,1,4) errors>
# … with 1 more variable: Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Asturias  arima_at1  0.946   -673. 1362. 1362. 1396. <cpl [17]> <cpl [1]>
2 Asturias  arima_at2  0.912   -664. 1345. 1345. 1378. <cpl [2]>  <cpl [4]>
3 Asturias  Snaive     2.32      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_…           <LM w/ ARIMA(2,1,4) errors>  0.912   -664. 1345. 1345. 1378.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>  0.946   -673. 1362. 1362. 1396.
3 Snaive                               <SNAIVE>  2.32      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1   10.6      0.159
2 Asturias  arima_at2    8.12     0.322
3 Asturias  Snaive     629.       0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1    32.8   0.00309
2 Asturias  arima_at2    23.6   0.0509 
3 Asturias  Snaive      892.    0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1    49.1  0.000485
2 Asturias  arima_at2    33.3  0.0432  
3 Asturias  Snaive      927.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=90)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1   118.     0.0237
2 Asturias  arima_at2    86.6    0.582 
3 Asturias  Snaive     1195.     0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_asturias_test %>%
      select(-num_casos) %>%
      slice(1:7)
data_fc_h14 <- data_asturias_test %>%
      select(-num_casos) %>%
      slice(1:14)
data_fc_h21 <- data_asturias_test %>%
      select(-num_casos) %>%
      slice(1:21)
data_fc_h90 <- data_asturias_test %>%
      select(-num_casos) %>%
      slice(1:90)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h90 <- fabletools::forecast(fit_model, new_data = data_fc_h90)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h7")

fc_h7 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Asturias  Test   5.43 10.1   6.80  14.8  27.6 0.137 0.123 0.198  
2 arima_at2 Asturias  Test   6.02  9.93  7.26  18.2  30.5 0.146 0.121 0.384  
3 Snaive    Asturias  Test   2.41 14.0   9.98 -10.1  49.0 0.201 0.171 0.00766

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h14")

fc_h14 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Asturias  Test  10.3   14.2  11.0 29.1   35.5 0.222 0.173  0.171
2 arima_at2 Asturias  Test  10.9   14.1  11.5 32.0   38.1 0.232 0.172  0.303
3 Snaive    Asturias  Test   6.69  16.0  11.4  8.17  41.5 0.229 0.195 -0.156

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h21")

fc_h21 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Asturias  Test   9.72  13.4  10.4 27.1   32.5 0.209 0.164  0.0341
2 arima_at2 Asturias  Test  10.6   13.7  11.0 31.0   35.1 0.222 0.167  0.127 
3 Snaive    Asturias  Test   5.77  15.6  11.5  6.52  40.8 0.232 0.190 -0.223

90 days

# Plots
fc_h90 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h90")

fc_h90 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h90")

# Accuracy
fabletools::accuracy(fc_h90, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias  Test   704. 1211.  704.  67.5  70.0  14.2  14.8 0.925
2 arima_at2 Asturias  Test   713. 1221.  714.  71.1  72.9  14.4  14.9 0.926
3 Snaive    Asturias  Test   702. 1213.  704.  60.0  71.7  14.2  14.8 0.925

Barcelona

data_Barcelona <- covid_data %>% 
      filter(provincia == "Barcelona") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Barcelona

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Barcelona 2020-06-15        62  21.2
 2 Barcelona 2020-06-16        66  20.8
 3 Barcelona 2020-06-17        70  20.3
 4 Barcelona 2020-06-18        68  18.8
 5 Barcelona 2020-06-19        65  20.4
 6 Barcelona 2020-06-20        53  21.8
 7 Barcelona 2020-06-21        38  22.8
 8 Barcelona 2020-06-22        69  23.8
 9 Barcelona 2020-06-23        95  23.4
10 Barcelona 2020-06-24        44  23.6
# … with 643 more rows

data_Barcelona_train <- data_Barcelona %>% 
      filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Barcelona_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"

data_Barcelona_test <- data_Barcelona %>% 
      filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Barcelona_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"

# Lamba for num_casos
lambda_Barcelona_casos <- data_Barcelona %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Barcelona_casos

[1] 0.1434101

# Lamba for average temp
lambda_Barcelona_tmed <- data_Barcelona %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Barcelona_tmed

[1] 1.098957

fit_model <- data_Barcelona_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~ 
                                    box_cox(tmed, lambda_Barcelona_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~ 
                                    box_cox(tmed, lambda_Barcelona_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Barcelona <LM w/ ARIMA(0,1,3)(0,1,2)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots  
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>    
1 Barcelona arima_at1  0.195   -291.  595.  595.  624. <cpl [0]> <cpl [17]>
2 Barcelona arima_at2  0.187   -280.  575.  575.  604. <cpl [1]> <cpl [16]>
3 Barcelona Snaive     0.964     NA    NA    NA    NA  <NULL>    <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(1,1,2)(0,1,2)[7] errors>  0.187   -280.  575.  575.  604.
2 arima_… <LM w/ ARIMA(0,1,3)(0,1,2)[7] errors>  0.195   -291.  595.  595.  624.
3 Snaive                               <SNAIVE>  0.964     NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Barcelona arima_at1   13.5     0.0615
2 Barcelona arima_at2    3.11    0.874 
3 Barcelona Snaive    1610.      0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Barcelona arima_at1   29.3    0.00944
2 Barcelona arima_at2    9.65   0.787  
3 Barcelona Snaive    2198.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Barcelona arima_at1    32.8    0.0484
2 Barcelona arima_at2    14.3    0.856 
3 Barcelona Snaive     2259.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=90)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Barcelona arima_at1   133.    0.00214
2 Barcelona arima_at2    86.8   0.576  
3 Barcelona Snaive     3453.    0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h90 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:90)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h90 <- fabletools::forecast(fit_model, new_data = data_fc_h90)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h7")

fc_h7 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE     MPE  MAPE  MASE  RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>
1 arima_at1 Barcelona Test   16.6  64.7  56.2   0.729  30.0 0.149 0.0975 0.327
2 arima_at2 Barcelona Test   19.8  67.5  56.3   2.11   29.6 0.150 0.102  0.211
3 Snaive    Barcelona Test  -21.0  79.0  67.8 -15.5    40.3 0.180 0.119  0.184

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h14")

fc_h14 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Barcelona Test   36.6  75.7  59.2  10.6  27.1 0.157 0.114 0.308 
2 arima_at2 Barcelona Test   39.1  78.5  60.5  11.7  27.4 0.161 0.118 0.245 
3 Snaive    Barcelona Test  -15.3  87.1  76.0 -12.2  40.4 0.202 0.131 0.0610

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h21")

fc_h21 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Barcelona Test  60.6   109.  78.0  16.3  29.7 0.207 0.165  0.206
2 arima_at2 Barcelona Test  58.3   106.  76.7  15.5  29.5 0.204 0.160  0.122
3 Snaive    Barcelona Test  -2.10  119.  90.4 -10.9  41.3 0.240 0.179 -0.239

90 days

# Plots
fc_h90 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h90")

fc_h90 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h90")

# Accuracy
fabletools::accuracy(fc_h90, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test  5077. 9032. 5081.  72.5  75.6  13.5  13.6 0.868
2 arima_at2 Barcelona Test  4972. 8914. 4977.  68.6  71.8  13.2  13.4 0.868
3 Snaive    Barcelona Test  4916. 8890. 4939.  58.7  71.4  13.1  13.4 0.870

Madrid

data_Madrid <- covid_data %>% 
      filter(provincia == "Madrid") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Madrid

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Madrid    2020-06-15       153  20.7
 2 Madrid    2020-06-16        91  21  
 3 Madrid    2020-06-17        93  20.2
 4 Madrid    2020-06-18        85  21.9
 5 Madrid    2020-06-19        78  22.6
 6 Madrid    2020-06-20        67  23.4
 7 Madrid    2020-06-21        42  25  
 8 Madrid    2020-06-22        60  27.3
 9 Madrid    2020-06-23        68  28.4
10 Madrid    2020-06-24        49  28  
# … with 643 more rows

data_Madrid_train <- data_Madrid %>% 
      filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Madrid_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"

data_Madrid_test <- data_Madrid %>% 
      filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Madrid_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"

# Lamba for num_casos
lambda_Madrid_casos <- data_Madrid %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Madrid_casos

[1] 0.06260614

# Lamba for average temp
lambda_Madrid_tmed <- data_Madrid %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Madrid_tmed

[1] 0.936519

fit_model <- data_Madrid_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~ 
                                    box_cox(tmed, lambda_Madrid_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~ 
                                    box_cox(tmed, lambda_Madrid_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Madrid    <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
1 Madrid    arima_at1 0.0669   -33.4  80.8  81.1  110. <cpl [9]> <cpl [8]>
2 Madrid    arima_at2 0.0669   -33.4  80.8  81.1  110. <cpl [9]> <cpl [8]>
3 Madrid    Snaive    0.291     NA    NA    NA     NA  <NULL>    <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors> 0.0669   -33.4  80.8  81.1  110.
2 arima_… <LM w/ ARIMA(2,1,1)(1,1,1)[7] errors> 0.0669   -33.4  80.8  81.1  110.
3 Snaive                               <SNAIVE> 0.291     NA    NA    NA     NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    10.8     0.146
2 Madrid    arima_at2    10.8     0.146
3 Madrid    Snaive     1678.      0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    39.8  0.000272
2 Madrid    arima_at2    39.8  0.000272
3 Madrid    Snaive     2594.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    48.4  0.000605
2 Madrid    arima_at2    48.4  0.000605
3 Madrid    Snaive     2854.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=90)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    106.     0.126
2 Madrid    arima_at2    106.     0.126
3 Madrid    Snaive      3856.     0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h90 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:90)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h90 <- fabletools::forecast(fit_model, new_data = data_fc_h90)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h7")

fc_h7 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Madrid    Test  135.   163.  144. 39.4   47.5 0.339 0.242 -0.0363
2 arima_at2 Madrid    Test  135.   163.  144. 39.4   47.5 0.339 0.242 -0.0363
3 Snaive    Madrid    Test   32.1  124.  105. -7.03  48.2 0.247 0.185 -0.109

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h14")

fc_h14 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Madrid    Test  150.   178.  154. 45.1   49.4 0.364 0.265  0.0893
2 arima_at2 Madrid    Test  150.   178.  154. 45.1   49.4 0.364 0.265  0.0893
3 Snaive    Madrid    Test   26.3  126.  105. -8.58  46.6 0.246 0.188 -0.0472

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h21")

fc_h21 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid    Test  174.   210.  177. 50.0   52.8 0.418 0.313 0.287
2 arima_at2 Madrid    Test  174.   210.  177. 50.0   52.8 0.418 0.313 0.287
3 Snaive    Madrid    Test   34.8  140.  117. -8.01  48.6 0.275 0.209 0.131

90 days

# Plots
fc_h90 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h90")

fc_h90 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h90")

# Accuracy
fabletools::accuracy(fc_h90, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid    Test  5007. 8744. 5008.  81.8  82.4  11.8  13.0 0.840
2 arima_at2 Madrid    Test  5007. 8744. 5008.  81.8  82.4  11.8  13.0 0.840
3 Snaive    Madrid    Test  4763. 8564. 4792.  51.8  68.9  11.3  12.7 0.840

Malaga

data_Malaga <- covid_data %>% 
      filter(provincia == "Málaga") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Malaga

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Málaga    2020-06-15         1  27.2
 2 Málaga    2020-06-16         1  27.8
 3 Málaga    2020-06-17         2  26.9
 4 Málaga    2020-06-18         2  21.6
 5 Málaga    2020-06-19         1  24.7
 6 Málaga    2020-06-20         4  22.6
 7 Málaga    2020-06-21         4  22.7
 8 Málaga    2020-06-22         6  25  
 9 Málaga    2020-06-23         8  25.6
10 Málaga    2020-06-24        65  24.5
# … with 643 more rows

data_Malaga_train <- data_Malaga %>% 
      filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Malaga_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"

data_Malaga_test <- data_Malaga %>% 
      filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Malaga_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"

# Lamba for num_casos
lambda_Malaga_casos <- data_Malaga %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Malaga_casos

[1] 0.2312255

# Lamba for average temp
lambda_Malaga_tmed <- data_Malaga %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Malaga_tmed

[1] 0.6288527

fit_model <- data_Malaga_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~ 
                                    box_cox(tmed, lambda_Malaga_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~ 
                                    box_cox(tmed, lambda_Malaga_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Málaga    <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Málaga    arima_at1  0.553   -545. 1105. 1105. 1139. <cpl [18]> <cpl [0]>
2 Málaga    arima_at2  0.550   -543. 1102. 1103. 1136. <cpl [14]> <cpl [4]>
3 Málaga    Snaive     2.15      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(0,1,4)(2,0,0)[7] errors>  0.550   -543. 1102. 1103. 1136.
2 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>  0.553   -545. 1105. 1105. 1139.
3 Snaive                               <SNAIVE>  2.15      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    6.37     0.497
2 Málaga    arima_at2    2.44     0.932
3 Málaga    Snaive    1361.       0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    20.2     0.125
2 Málaga    arima_at2    17.1     0.252
3 Málaga    Snaive     1956.      0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    26.7     0.179
2 Málaga    arima_at2    23.1     0.336
3 Málaga    Snaive     2100.      0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=90)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    131.   0.00301
2 Málaga    arima_at2    119.   0.0227 
3 Málaga    Snaive      3195.   0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h90 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:90)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h90 <- fabletools::forecast(fit_model, new_data = data_fc_h90)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h7")

fc_h7 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Málaga    Test  10.2   19.3  16.7   7.44  35.8 0.170 0.114 0.0661
2 arima_at2 Málaga    Test  10.7   19.8  17.2   8.32  36.8 0.175 0.117 0.0950
3 Snaive    Málaga    Test  -4.00  22.9  19.8 -27.0   52.2 0.201 0.135 0.0123

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h14")

fc_h14 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Málaga    Test  13.4   22.2  18.0  14.1  32.1 0.183 0.131  0.179 
2 arima_at2 Málaga    Test  13.9   22.6  18.4  14.9  32.7 0.187 0.134  0.190 
3 Snaive    Málaga    Test  -3.86  22.7  18.7 -22.4  43.6 0.190 0.134 -0.0973

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h21")

fc_h21 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Málaga    Test  18.4    25.9  21.7  20.9  33.3 0.220 0.153 0.249 
2 arima_at2 Málaga    Test  18.6    26.0  21.9  21.2  33.7 0.222 0.154 0.244 
3 Snaive    Málaga    Test  -0.951  22.0  18.6 -14.8  37.9 0.188 0.130 0.0886

90 days

# Plots
fc_h90 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h90")

fc_h90 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h90")

# Accuracy
fabletools::accuracy(fc_h90, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga    Test   606. 1013.  607.  55.8  59.8  6.17  5.99 0.927
2 arima_at2 Málaga    Test   596. 1001.  597.  54.1  58.3  6.07  5.91 0.926
3 Snaive    Málaga    Test   620. 1049.  628.  44.9  62.3  6.37  6.20 0.929

Sevilla

data_Sevilla <- covid_data %>% 
      filter(provincia == "Sevilla") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Sevilla

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Sevilla   2020-06-15         2  23.1
 2 Sevilla   2020-06-16         1  24.0
 3 Sevilla   2020-06-17         0  24.9
 4 Sevilla   2020-06-18         0  25.8
 5 Sevilla   2020-06-19         0  26.6
 6 Sevilla   2020-06-20         0  27.5
 7 Sevilla   2020-06-21         0  28.4
 8 Sevilla   2020-06-22         1  29.3
 9 Sevilla   2020-06-23         0  30.2
10 Sevilla   2020-06-24         1  29.4
# … with 643 more rows

data_Sevilla_train <- data_Sevilla %>% 
      filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Sevilla_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-10-14" "2021-02-13" "2021-02-13" "2021-06-14" "2021-10-14"

data_Sevilla_test <- data_Sevilla %>% 
      filter(fecha > as.Date("2021-10-14", format = "%Y-%m-%d"))
summary(data_Sevilla_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2021-10-15" "2021-11-25" "2022-01-05" "2022-01-05" "2022-02-15" "2022-03-29"

# Lamba for num_casos
lambda_Sevilla_casos <- data_Sevilla %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Sevilla_casos

[1] 0.1933674

# Lamba for average temp
lambda_Sevilla_tmed <- data_Sevilla %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Sevilla_tmed

[1] 0.9333023

fit_model <- data_Sevilla_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~ 
                                    box_cox(tmed, lambda_Sevilla_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~ 
                                    box_cox(tmed, lambda_Sevilla_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Sevilla   <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Sevilla   arima_at1  0.924   -669. 1355. 1355. 1388. <cpl [17]> <cpl [1]>
2 Sevilla   arima_at2  0.922   -669. 1354. 1354. 1387. <cpl [18]> <cpl [0]>
3 Sevilla   Snaive     2.02      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>  0.922   -669. 1354. 1354. 1387.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>  0.924   -669. 1355. 1355. 1388.
3 Snaive                               <SNAIVE>  2.02      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    8.68     0.276
2 Sevilla   arima_at2    7.66     0.363
3 Sevilla   Snaive     757.       0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    24.1    0.0446
2 Sevilla   arima_at2    23.3    0.0555
3 Sevilla   Snaive     1110.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    37.5    0.0147
2 Sevilla   arima_at2    36.7    0.0181
3 Sevilla   Snaive     1235.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=90)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    103.     0.166
2 Sevilla   arima_at2    102.     0.182
3 Sevilla   Snaive      1692.     0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h90 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:90)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h90 <- fabletools::forecast(fit_model, new_data = data_fc_h90)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h7")

fc_h7 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(9) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE   MASE  RMSSE    ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl>   <dbl>
1 arima_at1 Sevilla   Test   7.42  11.3  8.79  19.6  24.7 0.0869 0.0717 -0.137 
2 arima_at2 Sevilla   Test   7.51  11.4  8.93  19.8  25.1 0.0882 0.0722 -0.123 
3 Snaive    Sevilla   Test  -5.85  16.0 13.7  -22.0  40.6 0.135  0.102   0.0310

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h14")

fc_h14 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(16) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE   MASE  RMSSE   ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>  <dbl>
1 arima_at1 Sevilla   Test    5.00  9.90  8.02   8.47  27.5 0.0793 0.0628 -0.116
2 arima_at2 Sevilla   Test    5.13  9.96  8.10   8.97  27.6 0.0801 0.0631 -0.108
3 Snaive    Sevilla   Test  -12.5  19.3  16.4  -57.6   66.9 0.162  0.122   0.275

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h21")

fc_h21 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(23) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE   MASE  RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl>
1 arima_at1 Sevilla   Test    6.47  11.8  8.94  11.5  26.9 0.0883 0.0745 0.191
2 arima_at2 Sevilla   Test    6.59  11.8  9.01  12.0  26.9 0.0890 0.0749 0.194
3 Snaive    Sevilla   Test  -13.9   22.1 19.1  -60.8  72.0 0.189  0.140  0.327

90 days

# Plots
fc_h90 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h90")

fc_h90 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2021-10-14", format = "%Y-%m-%d")+days(92) &
                               fecha > as.Date("2021-07-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h90")

# Accuracy
fabletools::accuracy(fc_h90, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla   Test   686. 1166.  687.  61.8  66.2  6.79  7.39 0.892
2 arima_at2 Sevilla   Test   686. 1165.  687.  61.9  66.2  6.78  7.39 0.892
3 Snaive    Sevilla   Test   665. 1161.  677.  31.7  74.6  6.69  7.36 0.892

Middel of sixth epidemiological period

Asturias

data_asturias <- covid_data %>% 
      filter(provincia == "Asturias") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_asturias

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Asturias  2020-06-15         0  16  
 2 Asturias  2020-06-16         1  15.6
 3 Asturias  2020-06-17         0  15.6
 4 Asturias  2020-06-18         0  15.1
 5 Asturias  2020-06-19         0  14.8
 6 Asturias  2020-06-20         0  16.8
 7 Asturias  2020-06-21         0  18.2
 8 Asturias  2020-06-22         0  18.2
 9 Asturias  2020-06-23         0  20  
10 Asturias  2020-06-24         0  18.2
# … with 643 more rows

data_asturias_train <- data_asturias %>% 
      filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"

data_asturias_test <- data_asturias %>% 
      filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_asturias_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"

# Lamba for num_casos
lambda_asturias_casos <- data_asturias %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_asturias_casos

[1] 0.2278229

# Lamba for average temp
lambda_asturias_tmed <- data_asturias %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_asturias_tmed

[1] 1.056074

fit_model <- data_asturias_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~ 
                                    box_cox(tmed, lambda_asturias_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_asturias_casos) ~ 
                                    box_cox(tmed, lambda_asturias_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_asturias_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Asturias  <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Asturias  arima_at1  0.917   -816. 1648. 1648. 1683. <cpl [17]> <cpl [1]>
2 Asturias  arima_at2  0.895   -810. 1631. 1631. 1653. <cpl [7]>  <cpl [8]>
3 Asturias  Snaive     2.53      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(0,1,1)(1,0,1)[7] errors>  0.895   -810. 1631. 1631. 1653.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>  0.917   -816. 1648. 1648. 1683.
3 Snaive                               <SNAIVE>  2.53      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1    13.8    0.0544
2 Asturias  arima_at2    13.0    0.0717
3 Asturias  Snaive      981.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1    38.1  0.000503
2 Asturias  arima_at2    34.9  0.00150 
3 Asturias  Snaive     1393.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1    59.1 0.0000173
2 Asturias  arima_at2    48.9 0.000522 
3 Asturias  Snaive     1470.  0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=57)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Asturias  arima_at1   100.   0.000364
2 Asturias  arima_at2    78.5  0.0308  
3 Asturias  Snaive     1527.   0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_asturias_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_asturias_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_asturias_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h57 <- data_asturias_test %>% 
      select(-num_casos) %>% 
      slice(1:57)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h57 <- fabletools::forecast(fit_model, new_data = data_fc_h57)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h7")

fc_h7 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Asturias  Test  -174.  274.  253. -25.3  30.3  2.61  1.25  0.192
2 arima_at2 Asturias  Test  -256.  335.  318. -33.9  37.9  3.29  1.52  0.159
3 Snaive    Asturias  Test  -570.  586.  570. -62.8  62.8  5.89  2.66 -0.550

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h14")

fc_h14 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias  Test  -273.  334.  313.  -44.4  46.9  3.23  1.52 0.375
2 arima_at2 Asturias  Test  -395.  449.  426.  -61.1  63.1  4.40  2.04 0.443
3 Snaive    Asturias  Test  -769.  812.  769. -107.  107.   7.94  3.69 0.299

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h21")

fc_h21 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias  Test  -359.  414.  385.  -75.2  76.8  3.98  1.88 0.534
2 arima_at2 Asturias  Test  -495.  545.  516.  -98.4  99.8  5.34  2.48 0.595
3 Snaive    Asturias  Test  -920.  980.  920. -168.  168.   9.50  4.46 0.437

57 days

# Plots
fc_h57 %>%
      autoplot(
            data_asturias_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h57")

fc_h57 %>%
      autoplot(
            data_asturias %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Asturias - forecast h57")

# Accuracy
fabletools::accuracy(fc_h57, data_asturias)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Asturias  Test   -583.  634.  592.  -Inf   Inf  6.12  2.88 0.720
2 arima_at2 Asturias  Test   -694.  735.  702.  -Inf   Inf  7.25  3.34 0.712
3 Snaive    Asturias  Test  -1282. 1359. 1282.  -Inf   Inf 13.3   6.18 0.471

Barcelona

data_Barcelona <- covid_data %>% 
      filter(provincia == "Barcelona") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Barcelona

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Barcelona 2020-06-15        62  21.2
 2 Barcelona 2020-06-16        66  20.8
 3 Barcelona 2020-06-17        70  20.3
 4 Barcelona 2020-06-18        68  18.8
 5 Barcelona 2020-06-19        65  20.4
 6 Barcelona 2020-06-20        53  21.8
 7 Barcelona 2020-06-21        38  22.8
 8 Barcelona 2020-06-22        69  23.8
 9 Barcelona 2020-06-23        95  23.4
10 Barcelona 2020-06-24        44  23.6
# … with 643 more rows

data_Barcelona_train <- data_Barcelona %>% 
      filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Barcelona_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"

data_Barcelona_test <- data_Barcelona %>% 
      filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Barcelona_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"

# Lamba for num_casos
lambda_Barcelona_casos <- data_Barcelona %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Barcelona_casos

[1] 0.1434101

# Lamba for average temp
lambda_Barcelona_tmed <- data_Barcelona %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Barcelona_tmed

[1] 1.098957

fit_model <- data_Barcelona_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~ 
                                    box_cox(tmed, lambda_Barcelona_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Barcelona_casos) ~ 
                                    box_cox(tmed, lambda_Barcelona_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Barcelona_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Barcelona <LM w/ ARIMA(1,0,2)(0,1,1)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
1 Barcelona arima_at1  0.342   -521. 1054. 1054. 1080. <cpl [1]> <cpl [9]>
2 Barcelona arima_at2  0.328   -510. 1035. 1035. 1070. <cpl [4]> <cpl [8]>
3 Barcelona Snaive     1.26      NA    NA    NA    NA  <NULL>    <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,0,1)(0,1,1)[7] errors>  0.328   -510. 1035. 1035. 1070.
2 arima_… <LM w/ ARIMA(1,0,2)(0,1,1)[7] errors>  0.342   -521. 1054. 1054. 1080.
3 Snaive                               <SNAIVE>  1.26      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat   lb_pvalue
  <chr>     <chr>       <dbl>       <dbl>
1 Barcelona arima_at1    35.9 0.00000742 
2 Barcelona arima_at2    43.1 0.000000313
3 Barcelona Snaive     1654.  0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat   lb_pvalue
  <chr>     <chr>       <dbl>       <dbl>
1 Barcelona arima_at1    54.6 0.00000102 
2 Barcelona arima_at2    59.0 0.000000174
3 Barcelona Snaive     2253.  0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat   lb_pvalue
  <chr>     <chr>       <dbl>       <dbl>
1 Barcelona arima_at1    60.9 0.00000917 
2 Barcelona arima_at2    70.9 0.000000253
3 Barcelona Snaive     2362.  0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=57)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Barcelona arima_at1    164.  2.71e-12
2 Barcelona arima_at2    162.  4.88e-12
3 Barcelona Snaive      2651.  0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h57 <- data_Barcelona_test %>% 
      select(-num_casos) %>% 
      slice(1:57)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h57 <- fabletools::forecast(fit_model, new_data = data_fc_h57)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h7")

fc_h7 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE    ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
1 arima_at1 Barcelona Test  -4666. 4908. 4666. -63.7  63.7  5.86 2.51  -0.0314
2 arima_at2 Barcelona Test  -1704. 1754. 1704. -22.3  22.3  2.14 0.896 -0.328 
3 Snaive    Barcelona Test  -7870. 8139. 7870. -99.0  99.0  9.88 4.16   0.551

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h14")

fc_h14 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type     ME   RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl>  <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test  -6738.  7266. 6738. -136.  136.   8.46 3.71  0.459
2 arima_at2 Barcelona Test  -1843.  1947. 1843.  -33.4  33.4  2.31 0.994 0.180
3 Snaive    Barcelona Test  -9985. 10606. 9985. -188.  188.  12.5  5.42  0.548

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h21")

fc_h21 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type      ME   RMSE    MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>   <dbl>  <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test   -8063.  8669.  8063. -206.  206.  10.1  4.43  0.569
2 arima_at2 Barcelona Test   -1589.  1743.  1589.  -33.3  33.3  2.00 0.890 0.446
3 Snaive    Barcelona Test  -11275. 12018. 11275. -272.  272.  14.2  6.14  0.602

57 days

# Plots
fc_h57 %>%
      autoplot(
            data_Barcelona_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h57")

fc_h57 %>%
      autoplot(
            data_Barcelona %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Barcelona - forecast h57")

# Accuracy
fabletools::accuracy(fc_h57, data_Barcelona)

# A tibble: 3 × 11
  .model    provincia .type      ME   RMSE    MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>   <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Barcelona Test  -12180. 13056. 12180.  -Inf   Inf 15.3  6.67  0.704
2 arima_at2 Barcelona Test    -582.  1198.   942.  -Inf   Inf  1.18 0.612 0.806
3 Snaive    Barcelona Test  -14809. 15796. 14809.  -Inf   Inf 18.6  8.06  0.592

Madrid

data_Madrid <- covid_data %>% 
      filter(provincia == "Madrid") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Madrid

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Madrid    2020-06-15       153  20.7
 2 Madrid    2020-06-16        91  21  
 3 Madrid    2020-06-17        93  20.2
 4 Madrid    2020-06-18        85  21.9
 5 Madrid    2020-06-19        78  22.6
 6 Madrid    2020-06-20        67  23.4
 7 Madrid    2020-06-21        42  25  
 8 Madrid    2020-06-22        60  27.3
 9 Madrid    2020-06-23        68  28.4
10 Madrid    2020-06-24        49  28  
# … with 643 more rows

data_Madrid_train <- data_Madrid %>% 
      filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Madrid_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"

data_Madrid_test <- data_Madrid %>% 
      filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Madrid_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"

# Lamba for num_casos
lambda_Madrid_casos <- data_Madrid %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Madrid_casos

[1] 0.06260614

# Lamba for average temp
lambda_Madrid_tmed <- data_Madrid %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Madrid_tmed

[1] 0.936519

fit_model <- data_Madrid_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~ 
                                    box_cox(tmed, lambda_Madrid_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Madrid_casos) ~ 
                                    box_cox(tmed, lambda_Madrid_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Madrid_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Madrid    <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots  
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>    
1 Madrid    arima_at1  0.133   -240.  496.  496.  531. <cpl [2]> <cpl [16]>
2 Madrid    arima_at2  0.133   -240.  496.  496.  531. <cpl [2]> <cpl [16]>
3 Madrid    Snaive     0.412     NA    NA    NA    NA  <NULL>    <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors>  0.133   -240.  496.  496.  531.
2 arima_… <LM w/ ARIMA(2,0,2)(0,1,2)[7] errors>  0.133   -240.  496.  496.  531.
3 Snaive                               <SNAIVE>  0.412     NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    24.2   0.00104
2 Madrid    arima_at2    24.2   0.00104
3 Madrid    Snaive     1617.    0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    37.9  0.000530
2 Madrid    arima_at2    37.9  0.000530
3 Madrid    Snaive     2282.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Madrid    arima_at1    47.4  0.000825
2 Madrid    arima_at2    47.4  0.000825
3 Madrid    Snaive     2422.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=57)

# A tibble: 3 × 4
  provincia .model    lb_stat  lb_pvalue
  <chr>     <chr>       <dbl>      <dbl>
1 Madrid    arima_at1    122. 0.00000139
2 Madrid    arima_at2    122. 0.00000139
3 Madrid    Snaive      2762. 0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h57 <- data_Madrid_test %>% 
      select(-num_casos) %>% 
      slice(1:57)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h57 <- fabletools::forecast(fit_model, new_data = data_fc_h57)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h7")

fc_h7 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Madrid    Test    464.  796.  632.   8.60  11.7 0.846 0.446 0.0623
2 arima_at2 Madrid    Test    464.  796.  632.   8.60  11.7 0.846 0.446 0.0623
3 Snaive    Madrid    Test  -1836. 2070. 1836. -36.2   36.2 2.46  1.16  0.541

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h14")

fc_h14 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid    Test    179.  613.  463.   3.20  10.8 0.620 0.343 0.206
2 arima_at2 Madrid    Test    179.  613.  463.   3.20  10.8 0.620 0.343 0.206
3 Snaive    Madrid    Test  -2914. 3386. 2914. -77.1   77.1 3.90  1.90  0.466

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h21")

fc_h21 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid    Test   -216.  839.  654.  -16.1  26.0 0.875 0.470 0.503
2 arima_at2 Madrid    Test   -216.  839.  654.  -16.1  26.0 0.875 0.470 0.503
3 Snaive    Madrid    Test  -3903. 4584. 3903. -158.  158.  5.23  2.57  0.574

57 days

# Plots
fc_h57 %>%
      autoplot(
            data_Madrid_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h57")

fc_h57 %>%
      autoplot(
            data_Madrid %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Madrid - forecast h57")

# Accuracy
fabletools::accuracy(fc_h57, data_Madrid)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Madrid    Test  -1509. 2183. 1670. -2106. 2110.  2.24  1.22 0.698
2 arima_at2 Madrid    Test  -1509. 2183. 1670. -2106. 2110.  2.24  1.22 0.698
3 Snaive    Madrid    Test  -6507. 7446. 6507. -4965. 4965.  8.71  4.17 0.627

Malaga

data_Malaga <- covid_data %>% 
      filter(provincia == "Málaga") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Malaga

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Málaga    2020-06-15         1  27.2
 2 Málaga    2020-06-16         1  27.8
 3 Málaga    2020-06-17         2  26.9
 4 Málaga    2020-06-18         2  21.6
 5 Málaga    2020-06-19         1  24.7
 6 Málaga    2020-06-20         4  22.6
 7 Málaga    2020-06-21         4  22.7
 8 Málaga    2020-06-22         6  25  
 9 Málaga    2020-06-23         8  25.6
10 Málaga    2020-06-24        65  24.5
# … with 643 more rows

data_Malaga_train <- data_Malaga %>% 
      filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Malaga_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"

data_Malaga_test <- data_Malaga %>% 
      filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Malaga_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"

# Lamba for num_casos
lambda_Malaga_casos <- data_Malaga %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Malaga_casos

[1] 0.2312255

# Lamba for average temp
lambda_Malaga_tmed <- data_Malaga %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Malaga_tmed

[1] 0.6288527

fit_model <- data_Malaga_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~ 
                                    box_cox(tmed, lambda_Malaga_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Malaga_casos) ~ 
                                    box_cox(tmed, lambda_Malaga_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Malaga_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Málaga    <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Málaga    arima_at1  0.650   -715. 1446. 1446. 1481. <cpl [18]> <cpl [0]>
2 Málaga    arima_at2  0.628   -706. 1425. 1425. 1456. <cpl [10]> <cpl [7]>
3 Málaga    Snaive     2.41      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(3,1,0)(1,0,1)[7] errors>  0.628   -706. 1425. 1425. 1456.
2 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>  0.650   -715. 1446. 1446. 1481.
3 Snaive                               <SNAIVE>  2.41      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    8.29    0.308 
2 Málaga    arima_at2   15.2     0.0335
3 Málaga    Snaive    1621.      0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    28.4   0.0124 
2 Málaga    arima_at2    35.7   0.00115
3 Málaga    Snaive     2376.    0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    47.6  0.000771
2 Málaga    arima_at2    48.3  0.000621
3 Málaga    Snaive     2555.   0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=57)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Málaga    arima_at1    100. 0.000338 
2 Málaga    arima_at2    112. 0.0000197
3 Málaga    Snaive      3422. 0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h57 <- data_Malaga_test %>% 
      select(-num_casos) %>% 
      slice(1:57)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h57 <- fabletools::forecast(fit_model, new_data = data_fc_h57)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h7")

fc_h7 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Málaga    Test    45.0  168.  128.  -1.35  21.2 0.965 0.680 0.0875
2 arima_at2 Málaga    Test   -17.6  158.  112. -11.6   21.6 0.851 0.640 0.209 
3 Snaive    Málaga    Test  -170.   240.  191. -33.7   35.9 1.45  0.968 0.246

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h14")

fc_h14 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE     MPE  MAPE  MASE RMSSE   ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl> <dbl>  <dbl>
1 arima_at1 Málaga    Test   149.   266.  204.  11.6    26.7  1.54 1.08  0.282 
2 arima_at2 Málaga    Test    66.4  214.  169.  -0.972  25.7  1.28 0.866 0.260 
3 Snaive    Málaga    Test  -141.   223.  170. -28.9    31.8  1.28 0.899 0.0239

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h21")

fc_h21 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga    Test   137.   249.  183.  12.1   24.5  1.38 1.01  0.388
2 arima_at2 Málaga    Test    48.9  195.  148.  -2.29  23.1  1.12 0.786 0.354
3 Snaive    Málaga    Test  -208.   279.  227. -41.4   43.4  1.72 1.13  0.172

57 days

# Plots
fc_h57 %>%
      autoplot(
            data_Malaga_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h57")

fc_h57 %>%
      autoplot(
            data_Malaga %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Malaga - forecast h57")

# Accuracy
fabletools::accuracy(fc_h57, data_Malaga)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Málaga    Test    36.7  237.  175.  -Inf   Inf  1.33 0.957 0.556
2 arima_at2 Málaga    Test   -24.7  208.  158.  -Inf   Inf  1.19 0.842 0.501
3 Snaive    Málaga    Test  -436.   525.  443.  -Inf   Inf  3.35 2.12  0.513

Sevilla

data_Sevilla <- covid_data %>% 
      filter(provincia == "Sevilla") %>% 
      filter(fecha > as.Date("2020-06-14", format = "%Y-%m-%d")) %>% 
      select(provincia, fecha, num_casos, tmed) %>% 
      drop_na() %>% 
      as_tsibble(key = provincia, index = fecha)
data_Sevilla

# A tsibble: 653 x 4 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos  tmed
   <chr>     <date>         <dbl> <dbl>
 1 Sevilla   2020-06-15         2  23.1
 2 Sevilla   2020-06-16         1  24.0
 3 Sevilla   2020-06-17         0  24.9
 4 Sevilla   2020-06-18         0  25.8
 5 Sevilla   2020-06-19         0  26.6
 6 Sevilla   2020-06-20         0  27.5
 7 Sevilla   2020-06-21         0  28.4
 8 Sevilla   2020-06-22         1  29.3
 9 Sevilla   2020-06-23         0  30.2
10 Sevilla   2020-06-24         1  29.4
# … with 643 more rows

data_Sevilla_train <- data_Sevilla %>% 
      filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Sevilla_train$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2020-06-15" "2020-11-10" "2021-04-08" "2021-04-08" "2021-09-04" "2022-01-31"

data_Sevilla_test <- data_Sevilla %>% 
      filter(fecha > as.Date("2022-01-31", format = "%Y-%m-%d"))
summary(data_Sevilla_test$fecha)

        Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
"2022-02-01" "2022-02-15" "2022-03-01" "2022-03-01" "2022-03-15" "2022-03-29"

# Lamba for num_casos
lambda_Sevilla_casos <- data_Sevilla %>%
      features(num_casos, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Sevilla_casos

[1] 0.1933674

# Lamba for average temp
lambda_Sevilla_tmed <- data_Sevilla %>%
      features(tmed, features = guerrero) %>%
      pull(lambda_guerrero)
lambda_Sevilla_tmed

[1] 0.9333023

fit_model <- data_Sevilla_train %>%
      model(
            arima_at1 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~ 
                                    box_cox(tmed, lambda_Sevilla_tmed)),
            arima_at2 = ARIMA(box_cox(num_casos, lambda_Sevilla_casos) ~ 
                                    box_cox(tmed, lambda_Sevilla_tmed),
                              stepwise = FALSE, approx = FALSE),
            Snaive = SNAIVE(box_cox(num_casos, lambda_Sevilla_casos))
      )

# Show and report model
fit_model

# A mable: 1 x 4
# Key:     provincia [1]
  provincia                             arima_at1
  <chr>                                   <model>
1 Sevilla   <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>
# … with 2 more variables: arima_at2 <model>, Snaive <model>

glance(fit_model)

# A tibble: 3 × 9
  provincia .model    sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>     <chr>      <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 Sevilla   arima_at1  0.961   -832. 1679. 1679. 1714. <cpl [17]> <cpl [1]>
2 Sevilla   arima_at2  0.961   -831. 1679. 1679. 1714. <cpl [18]> <cpl [0]>
3 Sevilla   Snaive     2.23      NA    NA    NA    NA  <NULL>     <NULL>

fit_model %>% 
      pivot_longer(-provincia,
                   names_to = ".model",
                   values_to = "orders") %>% 
      left_join(glance(fit_model), by = c("provincia", ".model")) %>% 
      arrange(AICc) %>% 
      select(.model:BIC)

# A mable: 3 x 7
# Key:     .model [3]
  .model                                 orders sigma2 log_lik   AIC  AICc   BIC
  <chr>                                 <model>  <dbl>   <dbl> <dbl> <dbl> <dbl>
1 arima_… <LM w/ ARIMA(4,1,0)(2,0,0)[7] errors>  0.961   -831. 1679. 1679. 1714.
2 arima_… <LM w/ ARIMA(3,1,1)(2,0,0)[7] errors>  0.961   -832. 1679. 1679. 1714.
3 Snaive                               <SNAIVE>  2.23      NA    NA    NA    NA

# We use a Ljung-Box test >> large p-value, confirms residuals are similar / considered to white noise
fit_model %>% 
      select(Snaive) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at1) %>% 
      gg_tsresiduals()

fit_model %>% 
      select(arima_at2) %>% 
      gg_tsresiduals()

augment(fit_model) %>%
      features(.innov, ljung_box, lag=7)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    6.88     0.441
2 Sevilla   arima_at2    6.06     0.533
3 Sevilla   Snaive     965.       0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=14)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    27.0    0.0190
2 Sevilla   arima_at2    26.4    0.0232
3 Sevilla   Snaive     1403.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=21)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    37.9    0.0132
2 Sevilla   arima_at2    37.3    0.0155
3 Sevilla   Snaive     1510.     0

augment(fit_model) %>%
      features(.innov, ljung_box, lag=57)

# A tibble: 3 × 4
  provincia .model    lb_stat lb_pvalue
  <chr>     <chr>       <dbl>     <dbl>
1 Sevilla   arima_at1    76.9    0.0406
2 Sevilla   arima_at2    76.4    0.0439
3 Sevilla   Snaive     1813.     0

# To predict future values of infections, we need to pass the model future/prediction values of the complementary variables tmed and mobility
# We are going to select those from the test dataset
data_fc_h7 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:7)
data_fc_h14 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:14)
data_fc_h21 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:21)
data_fc_h57 <- data_Sevilla_test %>% 
      select(-num_casos) %>% 
      slice(1:57)

# Significant spikes out of 30 is still consistent with white noise
# To be sure, use a Ljung-Box test, which has a large p-value, confirming that
# the residuals are similar to white noise.
# Note that the alternative models also passed this test.
# 
# Forecast
fc_h7  <- fabletools::forecast(fit_model, new_data = data_fc_h7)
fc_h14 <- fabletools::forecast(fit_model, new_data = data_fc_h14)
fc_h21 <- fabletools::forecast(fit_model, new_data = data_fc_h21)
fc_h57 <- fabletools::forecast(fit_model, new_data = data_fc_h57)

7 days

# Plots
fc_h7 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h7")

fc_h7 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(7) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h7")

# Accuracy
fabletools::accuracy(fc_h7, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE    MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla   Test    35.3  79.3  53.4   4.72  9.49 0.373 0.291 0.152
2 arima_at2 Sevilla   Test    37.0  79.8  53.3   5.02  9.48 0.372 0.292 0.149
3 Snaive    Sevilla   Test  -305.  368.  311.  -48.0  49.4  2.17  1.35  0.523

14 days

# Plots
fc_h14 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h14")

fc_h14 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(14) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h14")

# Accuracy
fabletools::accuracy(fc_h14, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla   Test   139.  228.  158.  15.1  21.4  1.10 0.834 0.436
2 arima_at2 Sevilla   Test   141.  229.  158.  15.5  21.5  1.11 0.839 0.435
3 Snaive    Sevilla   Test  -297.  338.  299. -50.4  51.1  2.09 1.24  0.411

21 days

# Plots
fc_h21 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h21")

fc_h21 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(21) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h21")

# Accuracy
fabletools::accuracy(fc_h21, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type    ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla   Test   128.  214.  157.  11.9  26.2  1.10 0.784 0.502
2 arima_at2 Sevilla   Test   130.  215.  158.  12.3  26.3  1.11 0.789 0.501
3 Snaive    Sevilla   Test  -380.  430.  381. -76.5  77.0  2.67 1.58  0.507

57 days

# Plots
fc_h57 %>%
      autoplot(
            data_Sevilla_test %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h57")

fc_h57 %>%
      autoplot(
            data_Sevilla %>% 
                  filter(fecha <= as.Date("2022-01-31", format = "%Y-%m-%d")+days(57) &
                               fecha > as.Date("2022-01-01", format = "%Y-%m-%d"))
      ) +
      labs(y = "Nº cases", title = "Sevilla - forecast h57")

# Accuracy
fabletools::accuracy(fc_h57, data_Sevilla)

# A tibble: 3 × 11
  .model    provincia .type     ME  RMSE   MAE   MPE  MAPE  MASE RMSSE  ACF1
  <chr>     <chr>     <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 arima_at1 Sevilla   Test   -36.1  203.  151.  -Inf   Inf  1.06 0.744 0.710
2 arima_at2 Sevilla   Test   -34.1  203.  151.  -Inf   Inf  1.05 0.743 0.709
3 Snaive    Sevilla   Test  -737.   850.  738.  -Inf   Inf  5.16 3.11  0.642