ACF and PACF plots

Load packages and data:

pacman::p_load(
      here,      # file locator
      tidyverse, # data management and ggplot2 graphics
      skimr,     # get overview of data
      janitor,   # produce and adorn tabulations and cross-tabulations
      tsibble,
      fable,
      feasts
)

covid_data <- readRDS(here("data", "clean", "final_covid_data.rds"))

The ACF and PACF plots should be considered together.

For the AR process in the ARIMA, we expect that the ACF plot will gradually decrease and simultaneously the PACF should have a sharp drop after p significant lags.

To define a MA process in the ARIMA, we expect the opposite from the ACF and PACF plots, meaning that: the ACF should show a sharp drop after a certain q number of lags while PACF should show a geometric or gradual decreasing trend.

On the other hand, if both ACF and PACF plots demonstrate a gradual decreasing pattern, then the ARMA process should be considered for modeling.

Asturias

data_asturias <- covid_data %>% 
      filter(provincia == "Asturias") %>% 
      select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
             mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>% 
      # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
      drop_na(-mob_flujo) %>%  
      as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 774 x 12 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos num_hosp  tmed mob_grocery_pharmacy mob_parks
   <chr>     <date>         <dbl>    <dbl> <dbl>                <dbl>     <dbl>
 1 Asturias  2020-02-15         0        0  12.2                   -1        20
 2 Asturias  2020-02-16         0        0  16.2                    1        11
 3 Asturias  2020-02-17         0        2   9.8                    0       -13
 4 Asturias  2020-02-18         0        0   8.8                    1        11
 5 Asturias  2020-02-19         0        0   8.6                    1        29
 6 Asturias  2020-02-20         0        0   9.9                    0        32
 7 Asturias  2020-02-21         0        0  11.2                   -1        18
 8 Asturias  2020-02-22         0        0  10.5                    1        29
 9 Asturias  2020-02-23         1        0   9.2                    6        23
10 Asturias  2020-02-24         0        0   8.6                    5        48
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
#   mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
#   mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
data_asturias %>%
      ACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Asturias - ACF Nº Cases")

# PACF
data_asturias %>%
      PACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Asturias - PACF Nº Cases")

Barcelona

data_barcelona <- covid_data %>% 
      filter(provincia == "Barcelona") %>% 
      select(provincia, fecha,num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
             mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>% 
      # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
      drop_na(-mob_flujo) %>%  
      as_tsibble(key = provincia, index = fecha)
data_barcelona
# A tsibble: 774 x 12 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos num_hosp  tmed mob_grocery_pharmacy mob_parks
   <chr>     <date>         <dbl>    <dbl> <dbl>                <dbl>     <dbl>
 1 Barcelona 2020-02-15        12        4  12.8                   -3        14
 2 Barcelona 2020-02-16         2        4  14.5                   -8         2
 3 Barcelona 2020-02-17         1        5  14.6                    1        11
 4 Barcelona 2020-02-18         5        7  11.5                    1         8
 5 Barcelona 2020-02-19         1        8  11.8                    1         6
 6 Barcelona 2020-02-20        11       10  10.4                    1         9
 7 Barcelona 2020-02-21        15        8  11.9                   -1        11
 8 Barcelona 2020-02-22         8       10  12                     -1        25
 9 Barcelona 2020-02-23         7        8  12.9                    0        26
10 Barcelona 2020-02-24         8        4  13.6                    1        21
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
#   mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
#   mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
data_barcelona %>%
      ACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Barcelona - ACF Nº Cases")

data_barcelona %>%
      PACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Barcelona - PACF Nº Cases")

Madrid

data_madrid <- covid_data %>% 
      filter(provincia == "Madrid") %>% 
      select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
             mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>% 
      # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
      drop_na(-mob_flujo) %>%  
      as_tsibble(key = provincia, index = fecha)
data_madrid
# A tsibble: 774 x 12 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos num_hosp  tmed mob_grocery_pharmacy mob_parks
   <chr>     <date>         <dbl>    <dbl> <dbl>                <dbl>     <dbl>
 1 Madrid    2020-02-15        13        4   8.6                   -2        31
 2 Madrid    2020-02-16        13        2   9.6                    4        34
 3 Madrid    2020-02-17        19        9   9.1                    3        10
 4 Madrid    2020-02-18        14       13   8.5                    1        11
 5 Madrid    2020-02-19        12       14   8.6                    1        18
 6 Madrid    2020-02-20        25       15   8                      1        18
 7 Madrid    2020-02-21        26       18  10.4                    1        22
 8 Madrid    2020-02-22        29        8  10.8                   -1        53
 9 Madrid    2020-02-23        44        8  11.6                    6        56
10 Madrid    2020-02-24        56       18  10.8                    6        35
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
#   mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
#   mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
data_madrid %>%
      ACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Madrid - ACF Nº Cases")

# PACF
data_madrid %>%
      PACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Madrid - PACF Nº Cases")

Málaga

data_malaga <- covid_data %>% 
      filter(provincia == "Málaga") %>% 
      select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
             mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>% 
      # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
      drop_na(-mob_flujo) %>%  
      as_tsibble(key = provincia, index = fecha)
data_malaga
# A tsibble: 774 x 12 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos num_hosp  tmed mob_grocery_pharmacy mob_parks
   <chr>     <date>         <dbl>    <dbl> <dbl>                <dbl>     <dbl>
 1 Málaga    2020-02-15         1        0  13.5                    0        29
 2 Málaga    2020-02-16         0        0  14                      4        16
 3 Málaga    2020-02-17         0        0  17.1                    1        14
 4 Málaga    2020-02-18         0        0  15.4                    0        -1
 5 Málaga    2020-02-19         0        2  15.9                    0        -1
 6 Málaga    2020-02-20         1        0  14.9                    1        13
 7 Málaga    2020-02-21         3        2  13.6                   -1         3
 8 Málaga    2020-02-22         2        1  14.2                   -1        25
 9 Málaga    2020-02-23         2        0  13.2                   10        15
10 Málaga    2020-02-24         2        1  14                      0        22
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
#   mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
#   mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
data_malaga %>%
      ACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Malaga - ACF Nº Cases")

# PACF
data_malaga %>%
      PACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Malaga - PACF Nº Cases")

Sevilla

data_sevilla <- covid_data %>% 
      filter(provincia == "Sevilla") %>% 
      select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
             mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) %>% 
      # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
      drop_na(-mob_flujo) %>%   
      as_tsibble(key = provincia, index = fecha)
data_sevilla
# A tsibble: 774 x 12 [1D]
# Key:       provincia [1]
   provincia fecha      num_casos num_hosp  tmed mob_grocery_pharmacy mob_parks
   <chr>     <date>         <dbl>    <dbl> <dbl>                <dbl>     <dbl>
 1 Sevilla   2020-02-15         0        1  15.9                   -5        34
 2 Sevilla   2020-02-16         0        0  15.3                    1        15
 3 Sevilla   2020-02-17         0        0  16.1                    0        14
 4 Sevilla   2020-02-18         1        1  17.4                   -1        16
 5 Sevilla   2020-02-19         0        1  15.9                    0        14
 6 Sevilla   2020-02-20         1        1  16                     -1        13
 7 Sevilla   2020-02-21         0        0  15.8                   -1        18
 8 Sevilla   2020-02-22         0        0  16.6                   -2        33
 9 Sevilla   2020-02-23         0        0  16.2                    5        20
10 Sevilla   2020-02-24         2        0  16.3                    1        23
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
#   mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
#   mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
data_sevilla %>%
      ACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Sevilla - ACF Nº Cases")

# PACF
data_sevilla %>%
      PACF(num_casos, lag_max = 30) %>%
      autoplot() +
      labs(title=" Sevilla - PACF Nº Cases")