/ deaths-by-week.R
deaths-by-week.R
1 # Deaths by week 2 3 options("max.print" = 1e4) 4 5 library(eurostat) 6 library(data.table) 7 library(ggplot2) 8 library(ISOcodes) 9 10 # Country names 11 cntry <- as.data.table(ISO_3166_1) 12 cntry[, .(geo = Alpha_2, cntry = Name)] 13 14 15 # Population 16 # https://ec.europa.eu/eurostat/databrowser/view/demo_gind/ 17 18 dat.pop <- get_eurostat(id = tolower("DEMO_GIND")) 19 setDT(dat.pop) 20 21 dat.pop 22 dat.pop[, .N, keyby = .(indic_de)] 23 24 dat.pop[, year := year(TIME_PERIOD)] 25 26 dat.pop[indic_de == "JAN", .(year, geo, pop = values)] 27 28 29 # https://ec.europa.eu/eurostat/web/products-datasets/-/demo_r_mwk_ts 30 dat <- get_eurostat(id = "demo_r_mwk_ts", time_format = "raw") 31 setDT(dat) 32 33 dat 34 35 sapply(dat, class) 36 dat[, time := TIME_PERIOD] 37 38 dat[, .N, keyby = .(sex)] 39 dat[, .N, keyby = .(unit)] 40 41 dat[, .N, keyby = .(geo)] 42 dat[, .N, keyby = .(time)] 43 44 dat[, year := as.integer(substr(time, 1, 4))] 45 dat[, week := as.integer(substr(time, 7, 8))] 46 47 dat[, .N, keyby = .(year)] 48 dat[, .(n = .N, Y = sum(values)), keyby = .(year)][, P := prop.table(Y)][] 49 50 dat[, .N, keyby = .(week)] 51 dat[, .(n = .N, Y = sum(values)), keyby = .(week)][, P := prop.table(Y)][] 52 53 dcast.data.table(data = dat[sex == "T" & week < 99], 54 formula = geo ~ year, fun.aggregate = length) 55 dat[geo == "DE", .N, keyby = .(week)] 56 57 dat[week > 53] 58 dat[week > 52] 59 60 dat[grepl("W99", time)] 61 dat <- dat[!grepl("W99", time)] 62 63 dat[, y2020 := factor(x = ifelse(year < 2020L, 0, year - 2019L), 64 levels = 0:5, 65 labels = c("Y<2020", "Y=2020", "Y=2021", "Y=2022", "Y=2023", "Y=2024"))] 66 dat[, .N, keyby = .(y2020)] 67 68 69 # Merge population total 70 dat1 <- merge(dat[sex == "T"], 71 dat.pop[indic_de == "JAN", .(year, geo, pop = values)], 72 by = c("year", "geo"), all.x = T) 73 74 # While latest population data isn't released, let's simply assume same as last year 75 current_year <- format(Sys.time(), "%Y") 76 last_year <- as.character(as.integer(current_year) - 1) 77 78 if (all(is.na(dat1[year == current_year & geo == "DE"]$pop))) { 79 dat1[year == current_year & geo == "DE"]$pop <- dat1[year == last_year & geo == "DE"]$pop[1] 80 } 81 82 dat1[, death.rate := values / pop * 1e6] 83 84 dat1[geo != "AD"][order(death.rate)] 85 86 dat1[geo == "EL", geo := "GR"] 87 dat1[geo == "UK", geo := "GB"] 88 89 dat2 <- merge(dat1, cntry[, .(geo = Alpha_2, cntry = Name)], 90 by = "geo", all.x = T) 91 92 dat2[is.na(cntry), .N, keyby = .(geo, cntry)] 93 94 setorder(dat2, geo, time) 95 96 dat2[order(time)] 97 dat2[geo == "LV"][order(time)] 98 99 dat2[time == max(time)] 100 101 # Filter total and !AD 102 dat3 <- dat2[sex == "T" & geo != "AD"] 103 104 last_obs <- dat3[, max(time)] 105 cat(last_obs, "\n") 106 107 pl1 <- ggplot(data = dat3, 108 mapping = aes(x = week, y = values, group = year, 109 colour = y2020, alpha = year)) + 110 geom_line() + 111 geom_point(data = dat3[year >= 2020], shape = 20) + 112 geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) + 113 scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) + 114 # scale_colour_brewer(palette = "Dark2") + 115 facet_wrap(~ cntry, scales = "free_y") + 116 ggtitle("Deaths by week (total count)", 117 paste("Datasource: Eurostat [DEMO_R_MWK_TS],", 118 "last observation:", last_obs, 119 "date:", Sys.Date())) + 120 theme_bw() 121 122 pl2 <- ggplot(data = dat3[year <= dat.pop[, max(year)]], 123 mapping = aes(x = week, y = death.rate, group = year, 124 colour = y2020, alpha = year)) + 125 geom_line() + 126 geom_point(data = dat3[year >= 2020 & year <= dat.pop[, max(year)]], 127 shape = 20) + 128 geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) + 129 scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) + 130 # scale_colour_brewer(palette = "Paired") + 131 facet_wrap(~ cntry) + 132 ggtitle("Death rate by week (per 1,000,000 individuals)", 133 paste("Datasource: Eurostat [DEMO_R_MWK_TS], [DEMO_GIND],", 134 "last observation:", last_obs, 135 "date:", Sys.Date())) + 136 theme_bw() 137 138 pl3 <- ggplot(data = dat3[year <= dat.pop[, max(year)]], 139 mapping = aes(x = week, y = death.rate, group = year, 140 colour = y2020, alpha = year)) + 141 geom_line() + 142 geom_point(data = dat3[year >= 2020 & year <= dat.pop[, max(year)]], 143 shape = 20) + 144 geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) + 145 scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) + 146 # scale_colour_brewer(palette = "Paired") + 147 facet_wrap(~ cntry, scales = "free_y") + 148 ggtitle("Death rate by week (per 1,000,000 individuals)", 149 paste("Datasource: Eurostat [DEMO_R_MWK_TS], [DEMO_GIND],", 150 "last observation:", last_obs, 151 "date:", Sys.Date())) + 152 theme_bw() 153 154 ggsave(filename = "dths-by-wk-total.png", 155 plot = pl1, width = 16, height = 9) 156 ggsave(filename = "dths-by-wk-rate.png", 157 plot = pl2, width = 16, height = 9) 158 ggsave(filename = "dths-by-wk-rate-free-y.png", 159 plot = pl3, width = 16, height = 9) 160 161 cairo_pdf(filename = "dths-by-wk.pdf", width = 16, height = 9, onefile = TRUE) 162 print(pl1) 163 print(pl2) 164 print(pl3) 165 dev.off() 166 167 fwrite(x = dat3, file = "data.csv") 168