::p_load(
pacman# file locator
here, # data management and ggplot2 graphics
tidyverse, # get overview of data
skimr, # produce and adorn tabulations and cross-tabulations
janitor,
tsibble,
fable,
feasts
)
<- readRDS(here("data", "clean", "final_covid_data.rds")) covid_data
STL decomposition / Transformations
Load packages and data:
Asturias
<- covid_data %>%
data_asturias filter(provincia == "Asturias") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_asturias
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Asturias 2020-02-15 0 0 12.2 -1 20
2 Asturias 2020-02-16 0 0 16.2 1 11
3 Asturias 2020-02-17 0 2 9.8 0 -13
4 Asturias 2020-02-18 0 0 8.8 1 11
5 Asturias 2020-02-19 0 0 8.6 1 29
6 Asturias 2020-02-20 0 0 9.9 0 32
7 Asturias 2020-02-21 0 0 11.2 -1 18
8 Asturias 2020-02-22 0 0 10.5 1 29
9 Asturias 2020-02-23 1 0 9.2 6 23
10 Asturias 2020-02-24 0 0 8.6 5 48
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
%>%
data_asturias model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
autoplot() +
labs(title = "Asturias STL") +
theme_bw()
%>%
data_asturias model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
as_tsibble() %>%
autoplot(num_casos, color = "grey") +
geom_line(aes(y = season_adjust), color = "red") +
geom_line(aes(y = trend), color = "blue") +
labs(title="Asturias season adjusted and trend") +
theme_bw()
<- data_asturias %>%
lambda_asturias features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_asturias
[1] 0.2253939
%>%
data_asturias gg_tsdisplay(
box_cox(num_casos, lambda_asturias),
plot_type = 'partial', lag = 30
+
) labs(title = "Asturias - Variance stb")
%>%
data_asturias gg_tsdisplay(
difference(box_cox(num_casos, lambda_asturias)),
plot_type = 'partial', lag = 30
+
) labs(title = "Asturias - Difference once")
%>%
data_asturias gg_tsdisplay(
difference(box_cox(num_casos, lambda_asturias)) %>% difference(),
plot_type = 'partial', lag = 30
+
) labs(title="Asturias - Difference double")
%>%
data_asturias model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(season_adjust, plot_type = 'partial', lag = 30) +
labs(title="Asturias - Orginal STL season_adjust")
%>%
data_asturias model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(trend, plot_type = 'partial', lag = 30) +
labs(title="Asturias - Orginal STL trend")
%>%
data_asturias model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
select(trend) %>%
gg_tsdisplay(difference(trend), plot_type = 'partial', lag = 30) +
labs(title="Asturias - Orginal STL trend first difference")
%>%
data_asturias model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
select(trend) %>%
gg_tsdisplay(difference(difference(trend)), plot_type = 'partial', lag = 30) +
labs(title="Asturias - Orginal STL trend second difference")
Barcelona
<- covid_data %>%
data_barcelona filter(provincia == "Barcelona") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_barcelona
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Barcelona 2020-02-15 12 4 12.8 -3 14
2 Barcelona 2020-02-16 2 4 14.5 -8 2
3 Barcelona 2020-02-17 1 5 14.6 1 11
4 Barcelona 2020-02-18 5 7 11.5 1 8
5 Barcelona 2020-02-19 1 8 11.8 1 6
6 Barcelona 2020-02-20 11 10 10.4 1 9
7 Barcelona 2020-02-21 15 8 11.9 -1 11
8 Barcelona 2020-02-22 8 10 12 -1 25
9 Barcelona 2020-02-23 7 8 12.9 0 26
10 Barcelona 2020-02-24 8 4 13.6 1 21
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
%>%
data_barcelona model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
autoplot() +
labs(title = "Barcelona STL") +
theme_bw()
%>%
data_barcelona model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
as_tsibble() %>%
autoplot(num_casos, color = "grey") +
geom_line(aes(y = season_adjust), color = "red") +
geom_line(aes(y = trend), color = "blue") +
labs(title="Barcelona season adjusted and trend") +
theme_bw()
<- data_barcelona %>%
lambda_barcelona features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_barcelona
[1] 0.1052672
%>%
data_barcelona gg_tsdisplay(
box_cox(num_casos, lambda_barcelona),
plot_type = 'partial', lag = 30
+
) labs(title = "Barcelona - Variance stb")
%>%
data_barcelona gg_tsdisplay(
difference(box_cox(num_casos, lambda_barcelona)),
plot_type = 'partial', lag = 30
+
) labs(title = "Barcelona - Difference once")
%>%
data_barcelona gg_tsdisplay(
difference(box_cox(num_casos, lambda_barcelona)) %>% difference(),
plot_type = 'partial', lag = 30
+
) labs(title="Barcelona - Difference double")
%>%
data_barcelona model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(season_adjust, plot_type = 'partial', lag = 30) +
labs(title="Barcelona - Orginal STL season_adjust")
%>%
data_barcelona model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(trend, plot_type = 'partial', lag = 30) +
labs(title="Barcelona - Orginal STL trend")
%>%
data_barcelona model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(trend), plot_type = 'partial', lag = 30) +
labs(title="Barcelona - Orginal STL trend first difference")
%>%
data_barcelona model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(difference(trend)), plot_type = 'partial', lag = 30) +
labs(title="Barcelona - Orginal STL trend second difference")
Madrid
<- covid_data %>%
data_madrid filter(provincia == "Madrid") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_madrid
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Madrid 2020-02-15 13 4 8.6 -2 31
2 Madrid 2020-02-16 13 2 9.6 4 34
3 Madrid 2020-02-17 19 9 9.1 3 10
4 Madrid 2020-02-18 14 13 8.5 1 11
5 Madrid 2020-02-19 12 14 8.6 1 18
6 Madrid 2020-02-20 25 15 8 1 18
7 Madrid 2020-02-21 26 18 10.4 1 22
8 Madrid 2020-02-22 29 8 10.8 -1 53
9 Madrid 2020-02-23 44 8 11.6 6 56
10 Madrid 2020-02-24 56 18 10.8 6 35
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
%>%
data_madrid model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
autoplot() +
labs(title = "Madrid STL") +
theme_bw()
%>%
data_madrid model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
as_tsibble() %>%
autoplot(num_casos, color = "grey") +
geom_line(aes(y = season_adjust), color = "red") +
geom_line(aes(y = trend), color = "blue") +
labs(title="Madrid season adjusted and trend") +
theme_bw()
<- data_madrid %>%
lambda_madrid features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_madrid
[1] 0.04702742
%>%
data_madrid gg_tsdisplay(
box_cox(num_casos, lambda_madrid),
plot_type = 'partial', lag = 30
+
) labs(title = "Madrid - Variance stb")
%>%
data_madrid gg_tsdisplay(
difference(box_cox(num_casos, lambda_madrid)),
plot_type = 'partial', lag = 30
+
) labs(title = "Madrid - Difference once")
%>%
data_madrid gg_tsdisplay(
difference(box_cox(num_casos, lambda_madrid)) %>% difference(),
plot_type = 'partial', lag = 30
+
) labs(title="Madrid - Difference double")
%>%
data_madrid model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(season_adjust, plot_type = 'partial', lag = 30) +
labs(title="Madrid - Orginal STL season_adjust")
%>%
data_madrid model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(trend, plot_type = 'partial', lag = 30) +
labs(title="Madrid - Orginal STL trend")
%>%
data_madrid model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(trend), plot_type = 'partial', lag = 30) +
labs(title="Madrid - Orginal STL trend first difference")
%>%
data_madrid model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(difference(trend)), plot_type = 'partial', lag = 30) +
labs(title="Madrid - Orginal STL trend second difference")
Malaga
<- covid_data %>%
data_malaga filter(provincia == "Málaga") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_malaga
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Málaga 2020-02-15 1 0 13.5 0 29
2 Málaga 2020-02-16 0 0 14 4 16
3 Málaga 2020-02-17 0 0 17.1 1 14
4 Málaga 2020-02-18 0 0 15.4 0 -1
5 Málaga 2020-02-19 0 2 15.9 0 -1
6 Málaga 2020-02-20 1 0 14.9 1 13
7 Málaga 2020-02-21 3 2 13.6 -1 3
8 Málaga 2020-02-22 2 1 14.2 -1 25
9 Málaga 2020-02-23 2 0 13.2 10 15
10 Málaga 2020-02-24 2 1 14 0 22
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
%>%
data_malaga model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
autoplot() +
labs(title = "Málaga STL") +
theme_bw()
%>%
data_malaga model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
as_tsibble() %>%
autoplot(num_casos, color = "grey") +
geom_line(aes(y = season_adjust), color = "red") +
geom_line(aes(y = trend), color = "blue") +
labs(title="Málaga season adjusted and trend") +
theme_bw()
<- data_malaga %>%
lambda_malaga features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_malaga
[1] 0.2118375
%>%
data_malaga gg_tsdisplay(
box_cox(num_casos, lambda_malaga),
plot_type = 'partial', lag = 30
+
) labs(title = "Málaga - Variance stb")
%>%
data_malaga gg_tsdisplay(
difference(box_cox(num_casos, lambda_malaga)),
plot_type = 'partial', lag = 30
+
) labs(title = "Málaga - Difference once")
%>%
data_malaga gg_tsdisplay(
difference(box_cox(num_casos, lambda_malaga)) %>% difference(),
plot_type = 'partial', lag = 30
+
) labs(title="Málaga - Difference double")
%>%
data_malaga model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(season_adjust, plot_type = 'partial', lag = 30) +
labs(title="Málaga - Orginal STL season_adjust")
%>%
data_malaga model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(trend, plot_type = 'partial', lag = 30) +
labs(title="Málaga - Orginal STL trend")
%>%
data_malaga model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(trend), plot_type = 'partial', lag = 30) +
labs(title="Málaga - Orginal STL trend first difference")
%>%
data_malaga model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(difference(trend)), plot_type = 'partial', lag = 30) +
labs(title="Málaga - Orginal STL trend second difference")
Sevilla
<- covid_data %>%
data_sevilla filter(provincia == "Sevilla") %>%
select(provincia, fecha, num_casos, num_hosp, tmed,mob_grocery_pharmacy, mob_parks,
%>%
mob_residential, mob_retail_recreation, mob_transit_stations, mob_workplaces, mob_flujo) # Drop NAs dates except for mob_flujo since the data finish in 2021 while other sources are ut to 31/03/2022
drop_na(-mob_flujo) %>%
as_tsibble(key = provincia, index = fecha)
data_sevilla
# A tsibble: 774 x 12 [1D]
# Key: provincia [1]
provincia fecha num_casos num_hosp tmed mob_grocery_pharmacy mob_parks
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Sevilla 2020-02-15 0 1 15.9 -5 34
2 Sevilla 2020-02-16 0 0 15.3 1 15
3 Sevilla 2020-02-17 0 0 16.1 0 14
4 Sevilla 2020-02-18 1 1 17.4 -1 16
5 Sevilla 2020-02-19 0 1 15.9 0 14
6 Sevilla 2020-02-20 1 1 16 -1 13
7 Sevilla 2020-02-21 0 0 15.8 -1 18
8 Sevilla 2020-02-22 0 0 16.6 -2 33
9 Sevilla 2020-02-23 0 0 16.2 5 20
10 Sevilla 2020-02-24 2 0 16.3 1 23
# … with 764 more rows, and 5 more variables: mob_residential <dbl>,
# mob_retail_recreation <dbl>, mob_transit_stations <dbl>,
# mob_workplaces <dbl>, mob_flujo <dbl>
%>%
data_sevilla model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
autoplot() +
labs(title = "Sevilla STL") +
theme_bw()
%>%
data_sevilla model(STL(num_casos ~ season(window = 7) + trend(window = 7))) %>%
components() %>%
as_tsibble() %>%
autoplot(num_casos, color = "grey") +
geom_line(aes(y = season_adjust), color = "red") +
geom_line(aes(y = trend), color = "blue") +
labs(title="Sevilla season adjusted and trend") +
theme_bw()
<- data_sevilla %>%
lambda_sevilla features(num_casos, features = guerrero) %>%
pull(lambda_guerrero)
lambda_sevilla
[1] 0.2009975
%>%
data_sevilla gg_tsdisplay(
box_cox(num_casos, lambda_sevilla),
plot_type = 'partial', lag = 30
+
) labs(title = "Sevilla - Variance stb")
%>%
data_sevilla gg_tsdisplay(
difference(box_cox(num_casos, lambda_sevilla)),
plot_type = 'partial', lag = 30
+
) labs(title = "Sevilla - Difference once")
%>%
data_sevilla gg_tsdisplay(
difference(box_cox(num_casos, lambda_sevilla)) %>% difference(),
plot_type = 'partial', lag = 30
+
) labs(title="Sevilla - Difference double")
%>%
data_sevilla model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(season_adjust, plot_type = 'partial', lag = 30) +
labs(title="Sevilla - Orginal STL season_adjust")
%>%
data_sevilla model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(trend, plot_type = 'partial', lag = 30) +
labs(title="Sevilla - Orginal STL trend")
%>%
data_sevilla model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(trend), plot_type = 'partial', lag = 30) +
labs(title="Sevilla - Orginal STL trend first difference")
%>%
data_sevilla model(
STL(num_casos ~ season(window = 7) + trend(window = 7),
robust = TRUE)
%>%
) components() %>%
gg_tsdisplay(difference(difference(trend)), plot_type = 'partial', lag = 30) +
labs(title="Sevilla - Orginal STL trend second difference")