diff --git a/DESCRIPTION b/DESCRIPTION index 5750afb7..6456d907 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,7 @@ Imports: checkmate, matrixStats (>= 0.52), parallel, - posterior (>= 1.5.0), + posterior (>= 1.7.0), stats Suggests: bayesplot (>= 1.7.0), diff --git a/R/gpdfit.R b/R/gpdfit.R index 7bc7c312..6ced85e7 100644 --- a/R/gpdfit.R +++ b/R/gpdfit.R @@ -29,71 +29,10 @@ #' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. #' gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { - # See section 4 of Zhang and Stephens (2009) - if (sort_x) { - x <- sort.int(x) - } - N <- length(x) - prior <- 3 - M <- min_grid_pts + floor(sqrt(N)) - jj <- seq_len(M) - xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample - theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar - l_theta <- N * lx(theta, x) # profile log-lik - w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize - theta_hat <- sum(theta * w_theta) - k <- mean.default(log1p(-theta_hat * x)) - sigma <- -k / theta_hat - - if (wip) { - k <- adjust_k_wip(k, n = N) - } - - if (is.na(k)) { - k <- Inf - } - - nlist(k, sigma) -} - - -# internal ---------------------------------------------------------------- - -lx <- function(a,x) { - a <- -a - k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1)) - log(a / k) - k - 1 -} - -#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This -#' will stabilize estimates for very small Monte Carlo sample sizes and low neff -#' cases. -#' -#' @noRd -#' @param k Scalar khat estimate. -#' @param n Integer number of tail samples used to fit GPD. -#' @return Scalar adjusted khat estimate. -#' -adjust_k_wip <- function(k, n) { - a <- 10 - n_plus_a <- n + a - k * n / n_plus_a + a * 0.5 / n_plus_a -} - - -#' Inverse CDF of generalized Pareto distribution -#' (assuming location parameter is 0) -#' -#' @noRd -#' @param p Vector of probabilities. -#' @param k Scalar shape parameter. -#' @param sigma Scalar scale parameter. -#' @return Vector of quantiles. -#' -qgpd <- function(p, k, sigma) { - if (is.nan(sigma) || sigma <= 0) { - return(rep(NaN, length(p))) - } - - sigma * expm1(-k * log1p(-p)) / k + posterior::gpdfit( + x = x, + wip = wip, + min_grid_pts = min_grid_pts, + sort_x = sort_x + ) } diff --git a/R/psis.R b/R/psis.R index ce5207d8..463d8caf 100644 --- a/R/psis.R +++ b/R/psis.R @@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) { exp_cutoff <- exp(cutoff) # save time not sorting since x already sorted - fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) + fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) k <- fit$k sigma <- fit$sigma if (is.finite(k)) { p <- (seq_len(len) - 0.5) / len - qq <- qgpd(p, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff tail <- log(qq) } else { tail <- x diff --git a/R/psislw.R b/R/psislw.R index 9ed5ab15..808e657f 100644 --- a/R/psislw.R +++ b/R/psislw.R @@ -72,11 +72,11 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4, # body and gPd smoothed tail tail_ord <- order(x_tail) exp_cutoff <- exp(cutoff) - fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) + fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) k <- fit$k sigma <- fit$sigma prb <- (seq_len(tail_len) - 0.5) / tail_len - qq <- qgpd(prb, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff smoothed_tail <- rep.int(0, tail_len) smoothed_tail[tail_ord] <- log(qq) x_new <- x diff --git a/tests/testthat/test_gpdfit.R b/tests/testthat/test_gpdfit.R index df40a914..b25a3865 100644 --- a/tests/testthat/test_gpdfit.R +++ b/tests/testthat/test_gpdfit.R @@ -11,11 +11,3 @@ test_that("gpdfit returns correct result", { expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize") }) -test_that("qgpd returns the correct result ", { - probs <- seq(from = 0, to = 1, by = 0.25) - q1 <- qgpd(probs, k = 1, sigma = 1) - expect_equal(q1, c(0, 1 / 3, 1, 3, Inf)) - - q2 <- qgpd(probs, k = 1, sigma = 0) - expect_true(all(is.nan(q2))) -})