## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:data.table':
## 
##     between, first, last

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## ! epidatr cache is being used (set env var EPIDATR_USE_CACHE=FALSE if not intended).
## ℹ The cache directory is ~/.cache/R/epidatr.
## ℹ The cache will be cleared after 42 days and will be pruned if it exceeds 1024 MB.
## ℹ The log of cache transactions is stored at ~/.cache/R/epidatr/logfile.txt.
## Loading required package: epidatasets
## 
## Loading required package: parsnip
## 
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## 
## Registered S3 method overwritten by 'epipredict':
##   method            from   
##   print.step_naomit recipes
## 
## 
## Attaching package: 'epipredict'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## 
## Attaching package: 'epiprocess'
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## 
## Attaching package: 'ggplot2'
## 
## 
## The following object is masked from 'package:epipredict':
## 
##     layer
## 
## 
## here() starts at /home/dskel/repos/delphi/exploration-tooling
## 
## 
## Attaching package: 'hubValidations'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## 
## 
## Attaching package: 'lubridate'
## 
## 
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## 
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## 
## 
## Attaching package: 'plotly'
## 
## 
## The following object is masked from 'package:paws.storage':
## 
##     config
## 
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## 
## The following object is masked from 'package:graphics':
## 
##     layout
## 
## 
## 
## Attaching package: 'purrr'
## 
## 
## The following object is masked from 'package:magrittr':
## 
##     set_names
## 
## 
## The following object is masked from 'package:jsonlite':
## 
##     flatten
## 
## 
## The following object is masked from 'package:data.table':
## 
##     transpose
## 
## 
## qs2 0.1.5
## 
## Loading required package: SparseM
## 
## 
## Attaching package: 'recipes'
## 
## 
## The following object is masked from 'package:stats':
## 
##     step
## 
## 
## 
## Attaching package: 'renv'
## 
## 
## The following object is masked from 'package:recipes':
## 
##     update
## 
## 
## The following object is masked from 'package:purrr':
## 
##     modify
## 
## 
## The following object is masked from 'package:pak':
## 
##     lockfile_create
## 
## 
## The following object is masked from 'package:languageserver':
## 
##     run
## 
## 
## The following objects are masked from 'package:stats':
## 
##     embed, update
## 
## 
## The following objects are masked from 'package:utils':
## 
##     history, upgrade
## 
## 
## The following objects are masked from 'package:base':
## 
##     autoload, load, remove, use
## 
## 
## 
## Attaching package: 'rlang'
## 
## 
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
## 
## 
## The following object is masked from 'package:magrittr':
## 
##     set_names
## 
## 
## The following objects are masked from 'package:jsonlite':
## 
##     flatten, unbox
## 
## 
## The following object is masked from 'package:hubValidations':
## 
##     is_error
## 
## 
## The following object is masked from 'package:data.table':
## 
##     :=
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     has_name
## 
## 
## 
## Attaching package: 'scales'
## 
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
## 
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## 
## 
## Attaching package: 'stringr'
## 
## 
## The following object is masked from 'package:recipes':
## 
##     fixed
## 
## 
## 
## Attaching package: 'testthat'
## 
## 
## The following object is masked from 'package:targets':
## 
##     matches
## 
## 
## The following object is masked from 'package:tarchetypes':
## 
##     matches
## 
## 
## The following objects are masked from 'package:rlang':
## 
##     is_false, is_null, is_true
## 
## 
## The following objects are masked from 'package:readr':
## 
##     edition_get, local_edition
## 
## 
## The following object is masked from 'package:purrr':
## 
##     is_null
## 
## 
## The following objects are masked from 'package:magrittr':
## 
##     equals, is_less_than, not
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     matches
## 
## 
## The following object is masked from 'package:crew':
## 
##     matches
## 
## 
## 
## Attaching package: 'tibble'
## 
## 
## The following object is masked from 'package:assertthat':
## 
##     has_name
## 
## 
## 
## Attaching package: 'tidyr'
## 
## 
## The following object is masked from 'package:testthat':
## 
##     matches
## 
## 
## The following object is masked from 'package:magrittr':
## 
##     extract

The scores on the first forecast day for COVID are all unusually bad, bad enough we initially thought it was a bug. This notebook is a demonstration that these forecasts are somewhat reasonable given the context, and separated out because it would otherwise be distracting. The primary reason is that it has an unusual amount of revision. In comparison, the flu season delayed their initial forecast by a day, which allowed a new data revision to be used, which explains why flu doesn’t have this problem.

Revision Behavior

First, getting the necessary archive, forecasts, and scores, and plotting the versions around the first forecast day 2024-11-20 (the week of 2024-11-23):

covid_scores <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "scores"))
covid_forecasts <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "joined_forecasts_and_ensembles"))
covid_archive <- qs2::qs_read(here::here("covid_hosp_prod", "objects", "nhsn_archive_data"))
text_size <- 6
covid_archive$DT %>%
  filter(time_value < as.Date("2024-11-23") + 4*7, time_value > "2024-09-01") %>%
  as_epi_archive() %>%
  autoplot(.versions = c(as.Date("2024-11-20"), covid_archive$versions_end)) +
  geom_vline(aes(xintercept = as.Date("2024-11-23")))

Most locations have a significantly different version on 2024-11-20, some by as much as 4 times the final version.

# Building a function to plot the different forecasters
plot_problem_day <- function(forecaster, text_size = 6) {
  covid_scores %>% filter(forecast_date == "2024-11-23") %>% arrange(wis)
covid_forecasts %>% filter(forecaster =="climate_base") %>% filter(forecast_date == "2024-11-23")
  cmu_timeseries_fc <- covid_forecasts %>% filter(forecaster ==.env$forecaster) %>% filter(forecast_date == "2024-11-23")
  cmu_timeseries_wide <- cmu_timeseries_fc %>%
    pivot_wider(names_from = "quantile", values_from = "value")
  covid_archive$DT %>% filter(time_value < as.Date("2024-11-23") + 4*7, time_value > "2024-09-01") %>% as_epi_archive() %>% autoplot() +
    geom_vline(aes(xintercept = as.Date("2024-11-23"))) +
    geom_ribbon(data = cmu_timeseries_wide, aes(x = target_end_date, ymin = `0.1`, ymax = `0.9`), alpha = 0.3) +
    geom_ribbon(data = cmu_timeseries_wide, aes(x = target_end_date, ymin = `0.25`, ymax = `0.75`), alpha = 0.3) +
    geom_line(data = cmu_timeseries_wide, aes(x = target_end_date, y = `0.5`)) +
    facet_wrap(~geo_value, scale = "free")
}

Windowed seasonal

plot_problem_day("windowed_seasonal")

which is extrapolating out in a straight line from trends that are reporting artifacts.

Covidhub-baseline

Covidhub baseline forms a good sanity check, since it is forecasting out from the versioned data (which explains why it is ~as bad as windowed_seasonal_extra_sources)

plot_problem_day("CovidHub-baseline")

Linear

plot_problem_day("linear")

which is extrapolating out in a straight line from trends that are reporting artifacts.

Climate

plot_problem_day("climate_base")

This one is wrong simply because this season was unusually low at this point.

CMU-TimeSeries

This is a bit odd, since the forecaster we were using at the time was a simple average of the linear and climate forecasters, and so is off because of a combination of the reasons the linear and climate forecasts are off.

plot_problem_day("CMU-TimeSeries")