library(tidyverse)
library(qcc)
library(ggnewscale)
load("Data/fits_rolling_KFAS")
1. Obtain trend estimates
At least 10 + \(n\) WWTP observations, \(n \geq 1\) sub-sewershed observations
Read in cleaned WWTP series and obtain online trend estimates through the date of the first sub-sewershed observation. For this tutorial, we will read in the fitted value object from Algorithm 1, since it contains the observed and fitted values for all series. The algorithm is demonstrated with the Lift station B series compared to the online estimates of the WWTP. Results for all 4 lift station comparisons are at the end.
2. Handle missing data in LS series
Replace any missing sub-sewershed observations with WWTP online trend estimate for corresponding date.
# Subset to WWTP
<- fits_rolling_KFAS$`WWTP` %>% dplyr::filter(date> "2021-08-30"& fit == "filter")
mu_t
# Observed LS series and dates
<- fits_rolling_KFAS$`Lift station B` %>% dplyr::filter(date> "2021-08-30" & fit == "filter")
y_t_all
#Replace missing values in lift station series
$ts_missing, "obs"] <- dplyr::left_join(y_t_all[y_t_all$ts_missing,], mu_t, by = "date") %>% dplyr::pull(est.y)
y_t_all[y_t_all
# Keep just the series of observations and filled-in missing values
<- y_t_all %>% dplyr::pull(obs)
y_t
# Just the online estimates for WWTP
<- fits_rolling_KFAS$`WWTP` %>% dplyr::filter(date> "2021-08-30"& fit == "filter") %>% dplyr::pull(est) mu_t
3. Create difference time series
Create difference time series of sub-sewershed observed copies/liter (\(\log10\)) - WWTP Online Trend Estimate.
## compute the raw differences (numerator of d_tilde)
<- y_t-mu_t diff
4. Standardize the difference series
Compute the (approximate) standard deviation of the q1. Standardize the difference series by dividing by the standard deviation.
<- fits_rolling_KFAS$`Lift station B` %>%
var_y ::filter(date > "2021-08-30" & fit == "filter")%>%
dplyr::mutate(var_est = sigv^2) %>% select(var_est)
dplyr<- fits_rolling_KFAS$`WWTP` %>%
var_mu ::filter(date > "2021-08-30" & fit == "filter") %>%
dplyr::mutate(var_est = ((upr-est)/2)^2) %>%
dplyr::select(var_est)
dplyr
## compute the estimated covariance (scaled product of variances)
<- cor(y_t, mu_t, use ="pairwise.complete.obs")
cor_estimate <- as.numeric(cor_estimate)*sqrt(var_y$var_est)*sqrt(var_mu$var_est)
cov_est
## compute approximate variance using covariance (this is the correct variance for the numerator of our equation)
<- var_y$var_est + var_mu$var_est -2*cov_est
var_est
## standardize
<- diff/sqrt(var_est) standardized_diff
5. Construct EWMA chart
Construct EWMA chart for the standardized difference series.
# compute lag 1 autocorrelation of standardized difference series
<- acf(standardized_diff, plot=F, na.action = na.pass)$acf[2] ## we could do something fancier
lag1_est
# use qcc package to make ewma plot
<- qcc::ewma(standardized_diff, center = 0, sd = 1,
out lambda = lag1_est, nsigmas = 3, sizes = 1, plot = F)
## put NAs where we had missing values for either series
$y[is.na(y_t)] <- NA
out$data[is.na(y_t),1]<- NA
out
<- dplyr::filter(fits_rolling_KFAS$WWTP,
dates > "2021-08-30" & fit == "filter") %>% dplyr::pull(date)
date
# create plot
<- data.frame(x = dates,
dat ewma = out$y,
y = out$data[,1],
col = out$x %in% out$violations,
lwr = out$limits[20,1],
upr = out$limits[20,2])
<- data.frame(x = dat$x, y = out$data[,1], col = "black")
obs_dat <- ggplot2::ggplot(dat, aes(x = x, y = ewma)) +
p ::geom_vline(xintercept = NULL,
ggplot2col = "darkgrey",
lwd = 1) +
::geom_line()+
ggplot2::geom_point(aes(col = dat$col), size = 3) +
ggplot2::scale_color_manual(values = c(1,2), label = c("No", "Yes"), name = "Separation?") +
ggplot2::new_scale_color() +
ggnewscale::geom_point(data = obs_dat, aes(x = x, y = y, col =col), shape = 3) +
ggplot2::scale_color_manual(values = "black", label = "Observed \nStandardized \nDifference", name = "") +
ggplot2::geom_hline(aes(yintercept = out$limits[20,1]), lty = 2) +
ggplot2::geom_hline(aes(yintercept = out$limits[20,2]), lty = 2) +
ggplot2::geom_hline(aes(yintercept = 0), lty = 1) +
ggplot2::ggtitle("Lift station A - WWTP")+
ggplot2::xlab("Date") + ggplot2::ylab("Standardized Difference in Series") +
ggplot2::theme_minimal()
ggplot2
p
6. Inspect EWMA chart
Inspect EWMA chart for separation. Red dots outside the dotted lines indicate a statistically significant deviation. Note that all the above code can be run by calling the custom ww_ewma.r
function.
source("Code/ww_ewma.r")
<- fits_rolling_KFAS$`WWTP` %>% dplyr::filter(fit == "filter" & date > "2021-08-30")
mu2
<- fits_rolling_KFAS %>% dplyr::bind_rows() %>%
ewma_plots ::filter(name != "WWTP" & fit == "filter" & date > "2021-08-30") %>%
dplyr::group_nest(name, keep = T) %>%
dplyr::deframe() %>%
tibble::map(., ~ {
purrrww_ewma(.x, mu2, paste(.x$name[1], "-", mu2$name[1]))
})