/ deaths-by-week.R
deaths-by-week.R
  1  # Deaths by week
  2  
  3  options("max.print" = 1e4)
  4  
  5  library(eurostat)
  6  library(data.table)
  7  library(ggplot2)
  8  library(ISOcodes)
  9  
 10  # Country names
 11  cntry <- as.data.table(ISO_3166_1)
 12  cntry[, .(geo = Alpha_2, cntry = Name)]
 13  
 14  
 15  # Population
 16  # https://ec.europa.eu/eurostat/databrowser/view/demo_gind/
 17  
 18  dat.pop <- get_eurostat(id = tolower("DEMO_GIND"))
 19  setDT(dat.pop)
 20  
 21  dat.pop
 22  dat.pop[, .N, keyby = .(indic_de)]
 23  
 24  dat.pop[, year := year(TIME_PERIOD)]
 25  
 26  dat.pop[indic_de == "JAN", .(year, geo, pop = values)]
 27  
 28  
 29  # https://ec.europa.eu/eurostat/web/products-datasets/-/demo_r_mwk_ts
 30  dat <- get_eurostat(id = "demo_r_mwk_ts", time_format = "raw")
 31  setDT(dat)
 32  
 33  dat
 34  
 35  sapply(dat, class)
 36  dat[, time := TIME_PERIOD]
 37  
 38  dat[, .N, keyby = .(sex)]
 39  dat[, .N, keyby = .(unit)]
 40  
 41  dat[, .N, keyby = .(geo)]
 42  dat[, .N, keyby = .(time)]
 43  
 44  dat[, year := as.integer(substr(time, 1, 4))]
 45  dat[, week := as.integer(substr(time, 7, 8))]
 46  
 47  dat[, .N, keyby = .(year)]
 48  dat[, .(n = .N, Y = sum(values)), keyby = .(year)][, P := prop.table(Y)][]
 49  
 50  dat[, .N, keyby = .(week)]
 51  dat[, .(n = .N, Y = sum(values)), keyby = .(week)][, P := prop.table(Y)][]
 52  
 53  dcast.data.table(data = dat[sex == "T" & week < 99],
 54                   formula = geo ~ year, fun.aggregate = length)
 55  dat[geo == "DE", .N, keyby = .(week)]
 56  
 57  dat[week > 53]
 58  dat[week > 52]
 59  
 60  dat[grepl("W99", time)]
 61  dat <- dat[!grepl("W99", time)]
 62  
 63  dat[, y2020 := factor(x = ifelse(year < 2020L, 0, year - 2019L),
 64                        levels = 0:5,
 65                        labels = c("Y<2020", "Y=2020", "Y=2021", "Y=2022", "Y=2023", "Y=2024"))]
 66  dat[, .N, keyby = .(y2020)]
 67  
 68  
 69  # Merge population total
 70  dat1 <- merge(dat[sex == "T"],
 71                dat.pop[indic_de == "JAN", .(year, geo, pop = values)],
 72                by = c("year", "geo"), all.x = T)
 73  
 74  # While latest population data isn't released, let's simply assume same as last year
 75  current_year <- format(Sys.time(), "%Y")
 76  last_year <- as.character(as.integer(current_year) - 1)
 77  
 78  if (all(is.na(dat1[year == current_year & geo == "DE"]$pop))) {
 79    dat1[year == current_year & geo == "DE"]$pop <- dat1[year == last_year & geo == "DE"]$pop[1]
 80  }
 81  
 82  dat1[, death.rate := values / pop * 1e6]
 83  
 84  dat1[geo != "AD"][order(death.rate)]
 85  
 86  dat1[geo == "EL", geo := "GR"]
 87  dat1[geo == "UK", geo := "GB"]
 88  
 89  dat2 <- merge(dat1, cntry[, .(geo = Alpha_2, cntry = Name)],
 90                by = "geo", all.x = T)
 91  
 92  dat2[is.na(cntry), .N, keyby = .(geo, cntry)]
 93  
 94  setorder(dat2, geo, time)
 95  
 96  dat2[order(time)]
 97  dat2[geo == "LV"][order(time)]
 98  
 99  dat2[time == max(time)]
100  
101  # Filter total and !AD
102  dat3 <- dat2[sex == "T" & geo != "AD"]
103  
104  last_obs <- dat3[, max(time)]
105  cat(last_obs, "\n")
106  
107  pl1 <- ggplot(data = dat3,
108                mapping = aes(x = week, y = values, group = year,
109                              colour = y2020, alpha = year)) +
110    geom_line() +
111    geom_point(data = dat3[year >= 2020], shape = 20) +
112    geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) +
113    scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) +
114    # scale_colour_brewer(palette = "Dark2") +
115    facet_wrap(~ cntry, scales = "free_y") +
116    ggtitle("Deaths by week (total count)",
117            paste("Datasource: Eurostat [DEMO_R_MWK_TS],",
118                  "last observation:", last_obs,
119                  "date:", Sys.Date())) +
120    theme_bw()
121  
122  pl2 <- ggplot(data = dat3[year <= dat.pop[, max(year)]],
123                mapping = aes(x = week, y = death.rate, group = year,
124                              colour = y2020, alpha = year)) +
125    geom_line() +
126    geom_point(data = dat3[year >= 2020 & year <= dat.pop[, max(year)]],
127               shape = 20) +
128    geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) +
129    scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) +
130    # scale_colour_brewer(palette = "Paired") +
131    facet_wrap(~ cntry) +
132    ggtitle("Death rate by week (per 1,000,000 individuals)",
133            paste("Datasource: Eurostat [DEMO_R_MWK_TS], [DEMO_GIND],",
134                  "last observation:", last_obs,
135                  "date:", Sys.Date())) +
136    theme_bw()
137  
138  pl3 <- ggplot(data = dat3[year <= dat.pop[, max(year)]],
139                mapping = aes(x = week, y = death.rate, group = year,
140                              colour = y2020, alpha = year)) +
141    geom_line() +
142    geom_point(data = dat3[year >= 2020 & year <= dat.pop[, max(year)]],
143               shape = 20) +
144    geom_vline(xintercept = 13 * 1:4, colour = "red", alpha = .2) +
145    scale_x_continuous(breaks = 13 * 1:4, minor_breaks = 1:53) +
146    # scale_colour_brewer(palette = "Paired") +
147    facet_wrap(~ cntry, scales = "free_y") +
148    ggtitle("Death rate by week (per 1,000,000 individuals)",
149            paste("Datasource: Eurostat [DEMO_R_MWK_TS], [DEMO_GIND],",
150                  "last observation:", last_obs,
151                  "date:", Sys.Date())) +
152    theme_bw()
153  
154  ggsave(filename = "dths-by-wk-total.png",
155         plot = pl1, width = 16, height = 9)
156  ggsave(filename = "dths-by-wk-rate.png",
157         plot = pl2, width = 16, height = 9)
158  ggsave(filename = "dths-by-wk-rate-free-y.png",
159         plot = pl3, width = 16, height = 9)
160  
161  cairo_pdf(filename = "dths-by-wk.pdf", width = 16, height = 9, onefile = TRUE)
162  print(pl1)
163  print(pl2)
164  print(pl3)
165  dev.off()
166  
167  fwrite(x = dat3, file = "data.csv")
168