::p_load(
pacman# file locator
here, # data management and ggplot2 graphics
tidyverse, # get overview of data
skimr, # produce and adorn tabulations and cross-tabulations
janitor,
tsibble,
fable,
feasts
)
<- readRDS(here("data", "clean", "final_covid_data.rds")) covid_data
ACF and PACF plots
Load packages and data:
The ACF and PACF plots should be considered together.
For the AR process in the ARIMA, we expect that the ACF plot will gradually decrease and simultaneously the PACF should have a sharp drop after p significant lags.
To define a MA process in the ARIMA, we expect the opposite from the ACF and PACF plots, meaning that: the ACF should show a sharp drop after a certain q number of lags while PACF should show a geometric or gradual decreasing trend.
On the other hand, if both ACF and PACF plots demonstrate a gradual decreasing pattern, then the ARMA process should be considered for modeling.
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-02-15 0 0 12.2 -1 20
2 Asturias 2020-02-16 0 0 16.2 1 11
3 Asturias 2020-02-17 0 2 9.8 0 -13
4 Asturias 2020-02-18 0 0 8.8 1 11
5 Asturias 2020-02-19 0 0 8.6 1 29
6 Asturias 2020-02-20 0 0 9.9 0 32
7 Asturias 2020-02-21 0 0 11.2 -1 18
8 Asturias 2020-02-22 0 0 10.5 1 29
9 Asturias 2020-02-23 1 0 9.2 6 23
10 Asturias 2020-02-24 0 0 8.6 5 48
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
%>%
data_asturias ACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Asturias - ACF Nº Cases")
# PACF
%>%
data_asturias PACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Asturias - PACF Nº Cases")
Barcelona
<- covid_data %>%
data_barcelona filter(provincia == "Barcelona") %>%
select(provincia, fecha,num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-02-15 12 4 12.8 -3 14
2 Barcelona 2020-02-16 2 4 14.5 -8 2
3 Barcelona 2020-02-17 1 5 14.6 1 11
4 Barcelona 2020-02-18 5 7 11.5 1 8
5 Barcelona 2020-02-19 1 8 11.8 1 6
6 Barcelona 2020-02-20 11 10 10.4 1 9
7 Barcelona 2020-02-21 15 8 11.9 -1 11
8 Barcelona 2020-02-22 8 10 12 -1 25
9 Barcelona 2020-02-23 7 8 12.9 0 26
10 Barcelona 2020-02-24 8 4 13.6 1 21
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
%>%
data_barcelona ACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Barcelona - ACF Nº Cases")
%>%
data_barcelona PACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Barcelona - PACF Nº Cases")
Madrid
<- covid_data %>%
data_madrid filter(provincia == "Madrid") %>%
select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-02-15 13 4 8.6 -2 31
2 Madrid 2020-02-16 13 2 9.6 4 34
3 Madrid 2020-02-17 19 9 9.1 3 10
4 Madrid 2020-02-18 14 13 8.5 1 11
5 Madrid 2020-02-19 12 14 8.6 1 18
6 Madrid 2020-02-20 25 15 8 1 18
7 Madrid 2020-02-21 26 18 10.4 1 22
8 Madrid 2020-02-22 29 8 10.8 -1 53
9 Madrid 2020-02-23 44 8 11.6 6 56
10 Madrid 2020-02-24 56 18 10.8 6 35
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
%>%
data_madrid ACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Madrid - ACF Nº Cases")
# PACF
%>%
data_madrid PACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Madrid - PACF Nº Cases")
Málaga
<- covid_data %>%
data_malaga filter(provincia == "Málaga") %>%
select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-02-15 1 0 13.5 0 29
2 Málaga 2020-02-16 0 0 14 4 16
3 Málaga 2020-02-17 0 0 17.1 1 14
4 Málaga 2020-02-18 0 0 15.4 0 -1
5 Málaga 2020-02-19 0 2 15.9 0 -1
6 Málaga 2020-02-20 1 0 14.9 1 13
7 Málaga 2020-02-21 3 2 13.6 -1 3
8 Málaga 2020-02-22 2 1 14.2 -1 25
9 Málaga 2020-02-23 2 0 13.2 10 15
10 Málaga 2020-02-24 2 1 14 0 22
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
%>%
data_malaga ACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Malaga - ACF Nº Cases")
# PACF
%>%
data_malaga PACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Malaga - PACF Nº Cases")
Sevilla
<- covid_data %>%
data_sevilla filter(provincia == "Sevilla") %>%
select(provincia, fecha, num_casos, num_hosp, tmed, mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-02-15 0 1 15.9 -5 34
2 Sevilla 2020-02-16 0 0 15.3 1 15
3 Sevilla 2020-02-17 0 0 16.1 0 14
4 Sevilla 2020-02-18 1 1 17.4 -1 16
5 Sevilla 2020-02-19 0 1 15.9 0 14
6 Sevilla 2020-02-20 1 1 16 -1 13
7 Sevilla 2020-02-21 0 0 15.8 -1 18
8 Sevilla 2020-02-22 0 0 16.6 -2 33
9 Sevilla 2020-02-23 0 0 16.2 5 20
10 Sevilla 2020-02-24 2 0 16.3 1 23
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
# ACF
%>%
data_sevilla ACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Sevilla - ACF Nº Cases")
# PACF
%>%
data_sevilla PACF(num_casos, lag_max = 30) %>%
autoplot() +
labs(title=" Sevilla - PACF Nº Cases")