From 9f9e9adc7b77f8f9cbf8ab86b392ee840f0a2f86 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 19:54:19 +0300 Subject: [PATCH 1/8] use posterior::gpdfit --- R/psis.R | 2 +- R/psislw.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/psis.R b/R/psis.R index ce5207d8..68233eb8 100644 --- a/R/psis.R +++ b/R/psis.R @@ -254,7 +254,7 @@ psis_smooth_tail <- function(x, cutoff) { exp_cutoff <- exp(cutoff) # save time not sorting since x already sorted - fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) + fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE) k <- fit$k sigma <- fit$sigma if (is.finite(k)) { diff --git a/R/psislw.R b/R/psislw.R index 9ed5ab15..ff870389 100644 --- a/R/psislw.R +++ b/R/psislw.R @@ -72,7 +72,7 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4, # body and gPd smoothed tail tail_ord <- order(x_tail) exp_cutoff <- exp(cutoff) - fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) + fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80) k <- fit$k sigma <- fit$sigma prb <- (seq_len(tail_len) - 0.5) / tail_len From 05c69e1f466468c7d2e80214e9a8b0f5917f051c Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 20:04:23 +0300 Subject: [PATCH 2/8] use posterior::qgeneralized_pareto()g --- R/psis.R | 2 +- R/psislw.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/psis.R b/R/psis.R index 68233eb8..463d8caf 100644 --- a/R/psis.R +++ b/R/psis.R @@ -259,7 +259,7 @@ psis_smooth_tail <- function(x, cutoff) { sigma <- fit$sigma if (is.finite(k)) { p <- (seq_len(len) - 0.5) / len - qq <- qgpd(p, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff tail <- log(qq) } else { tail <- x diff --git a/R/psislw.R b/R/psislw.R index ff870389..808e657f 100644 --- a/R/psislw.R +++ b/R/psislw.R @@ -76,7 +76,7 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4, k <- fit$k sigma <- fit$sigma prb <- (seq_len(tail_len) - 0.5) / tail_len - qq <- qgpd(prb, k, sigma) + exp_cutoff + qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff smoothed_tail <- rep.int(0, tail_len) smoothed_tail[tail_ord] <- log(qq) x_new <- x From aae6c17b1820f578ca930d070bbe9ba731fe4b84 Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 20:04:44 +0300 Subject: [PATCH 3/8] delete gpd functions --- R/gpdfit.R | 99 ------------------------------------------------------ 1 file changed, 99 deletions(-) delete mode 100644 R/gpdfit.R diff --git a/R/gpdfit.R b/R/gpdfit.R deleted file mode 100644 index 7bc7c312..00000000 --- a/R/gpdfit.R +++ /dev/null @@ -1,99 +0,0 @@ -#' Estimate parameters of the Generalized Pareto distribution -#' -#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of -#' the generalized Pareto distribution (GPD), assuming the location parameter is -#' 0. By default the fit uses a prior for \eqn{k}, which will stabilize -#' estimates for very small sample sizes (and low effective sample sizes in the -#' case of MCMC samples). The weakly informative prior is a Gaussian prior -#' centered at 0.5. -#' -#' @export -#' @param x A numeric vector. The sample from which to estimate the parameters. -#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly -#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`. -#' @param min_grid_pts The minimum number of grid points used in the fitting -#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`. -#' @param sort_x If `TRUE` (the default), the first step in the fitting -#' algorithm is to sort the elements of `x`. If `x` is already -#' sorted in ascending order then `sort_x` can be set to `FALSE` to -#' skip the initial sorting step. -#' @return A named list with components `k` and `sigma`. -#' -#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & -#' Stephens (2009). -#' -#' @seealso [psis()], [pareto-k-diagnostic] -#' -#' @references -#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method -#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. -#' -gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { - # See section 4 of Zhang and Stephens (2009) - if (sort_x) { - x <- sort.int(x) - } - N <- length(x) - prior <- 3 - M <- min_grid_pts + floor(sqrt(N)) - jj <- seq_len(M) - xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample - theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar - l_theta <- N * lx(theta, x) # profile log-lik - w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize - theta_hat <- sum(theta * w_theta) - k <- mean.default(log1p(-theta_hat * x)) - sigma <- -k / theta_hat - - if (wip) { - k <- adjust_k_wip(k, n = N) - } - - if (is.na(k)) { - k <- Inf - } - - nlist(k, sigma) -} - - -# internal ---------------------------------------------------------------- - -lx <- function(a,x) { - a <- -a - k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1)) - log(a / k) - k - 1 -} - -#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This -#' will stabilize estimates for very small Monte Carlo sample sizes and low neff -#' cases. -#' -#' @noRd -#' @param k Scalar khat estimate. -#' @param n Integer number of tail samples used to fit GPD. -#' @return Scalar adjusted khat estimate. -#' -adjust_k_wip <- function(k, n) { - a <- 10 - n_plus_a <- n + a - k * n / n_plus_a + a * 0.5 / n_plus_a -} - - -#' Inverse CDF of generalized Pareto distribution -#' (assuming location parameter is 0) -#' -#' @noRd -#' @param p Vector of probabilities. -#' @param k Scalar shape parameter. -#' @param sigma Scalar scale parameter. -#' @return Vector of quantiles. -#' -qgpd <- function(p, k, sigma) { - if (is.nan(sigma) || sigma <= 0) { - return(rep(NaN, length(p))) - } - - sigma * expm1(-k * log1p(-p)) / k -} From 099398b7cd874c0957eb5d4f7454beef16debfda Mon Sep 17 00:00:00 2001 From: Aki Vehtari Date: Fri, 17 Oct 2025 21:35:14 +0300 Subject: [PATCH 4/8] remove gpdfit doc and export --- NAMESPACE | 1 - man/gpdfit.Rd | 44 -------------------------------------------- 2 files changed, 45 deletions(-) delete mode 100644 man/gpdfit.Rd diff --git a/NAMESPACE b/NAMESPACE index 4d1a9118..d0fae6fb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,7 +106,6 @@ export(example_loglik_array) export(example_loglik_matrix) export(extract_log_lik) export(find_model_names) -export(gpdfit) export(is.kfold) export(is.loo) export(is.psis) diff --git a/man/gpdfit.Rd b/man/gpdfit.Rd deleted file mode 100644 index 8cb61d12..00000000 --- a/man/gpdfit.Rd +++ /dev/null @@ -1,44 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/gpdfit.R -\name{gpdfit} -\alias{gpdfit} -\title{Estimate parameters of the Generalized Pareto distribution} -\usage{ -gpdfit(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) -} -\arguments{ -\item{x}{A numeric vector. The sample from which to estimate the parameters.} - -\item{wip}{Logical indicating whether to adjust \eqn{k} based on a weakly -informative Gaussian prior centered on 0.5. Defaults to \code{TRUE}.} - -\item{min_grid_pts}{The minimum number of grid points used in the fitting -algorithm. The actual number used is \code{min_grid_pts + floor(sqrt(length(x)))}.} - -\item{sort_x}{If \code{TRUE} (the default), the first step in the fitting -algorithm is to sort the elements of \code{x}. If \code{x} is already -sorted in ascending order then \code{sort_x} can be set to \code{FALSE} to -skip the initial sorting step.} -} -\value{ -A named list with components \code{k} and \code{sigma}. -} -\description{ -Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of -the generalized Pareto distribution (GPD), assuming the location parameter is -0. By default the fit uses a prior for \eqn{k}, which will stabilize -estimates for very small sample sizes (and low effective sample sizes in the -case of MCMC samples). The weakly informative prior is a Gaussian prior -centered at 0.5. -} -\details{ -Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & -Stephens (2009). -} -\references{ -Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method -for the generalized Pareto distribution. \emph{Technometrics} \strong{51}, 316-325. -} -\seealso{ -\code{\link[=psis]{psis()}}, \link{pareto-k-diagnostic} -} From 21bdd28f6ba6d230bf6e2c38cf7c800b11d1cb22 Mon Sep 17 00:00:00 2001 From: VisruthSK Date: Tue, 7 Apr 2026 10:33:29 -0700 Subject: [PATCH 5/8] Deleted gpdfit tests --- tests/testthat/_snaps/gpdfit.md | 15 --------------- tests/testthat/test_gpdfit.R | 21 --------------------- 2 files changed, 36 deletions(-) delete mode 100644 tests/testthat/_snaps/gpdfit.md delete mode 100644 tests/testthat/test_gpdfit.R diff --git a/tests/testthat/_snaps/gpdfit.md b/tests/testthat/_snaps/gpdfit.md deleted file mode 100644 index 348f3512..00000000 --- a/tests/testthat/_snaps/gpdfit.md +++ /dev/null @@ -1,15 +0,0 @@ -# gpdfit returns correct result - - WAoAAAACAAQFAAACAwAAAAIOAAAAAj+cD4qKVTgaP/BK8xJCK3sAAAQCAAAAAQAEAAkAAAAF - bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ - ---- - - WAoAAAACAAQFAAACAwAAAAIOAAAAAj+yA4g2tkbvP/BK8xJCK3sAAAQCAAAAAQAEAAkAAAAF - bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ - ---- - - WAoAAAACAAQFAAACAwAAAAIOAAAAAj+yA5BUlFrHP/BK8oexSVIAAAQCAAAAAQAEAAkAAAAF - bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ - diff --git a/tests/testthat/test_gpdfit.R b/tests/testthat/test_gpdfit.R deleted file mode 100644 index df40a914..00000000 --- a/tests/testthat/test_gpdfit.R +++ /dev/null @@ -1,21 +0,0 @@ -test_that("gpdfit returns correct result", { - set.seed(123) - x <- rexp(100) - gpdfit_val_old <- unlist(gpdfit(x, wip = FALSE, min_grid_pts = 80)) - expect_snapshot_value(gpdfit_val_old, style = "serialize") - - gpdfit_val_wip <- unlist(gpdfit(x, wip = TRUE, min_grid_pts = 80)) - expect_snapshot_value(gpdfit_val_wip, style = "serialize") - - gpdfit_val_wip_default_grid <- unlist(gpdfit(x, wip = TRUE)) - expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize") -}) - -test_that("qgpd returns the correct result ", { - probs <- seq(from = 0, to = 1, by = 0.25) - q1 <- qgpd(probs, k = 1, sigma = 1) - expect_equal(q1, c(0, 1 / 3, 1, 3, Inf)) - - q2 <- qgpd(probs, k = 1, sigma = 0) - expect_true(all(is.nan(q2))) -}) From dce7e3c4a7b599290057fbdb367c53043326a6c6 Mon Sep 17 00:00:00 2001 From: Visruth Srimath Kandali Date: Tue, 7 Apr 2026 13:10:01 -0700 Subject: [PATCH 6/8] Update DESCRIPTION --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5750afb7..6456d907 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -37,7 +37,7 @@ Imports: checkmate, matrixStats (>= 0.52), parallel, - posterior (>= 1.5.0), + posterior (>= 1.7.0), stats Suggests: bayesplot (>= 1.7.0), From 1b2678e5509750fce057f44fd07c97987cc0eced Mon Sep 17 00:00:00 2001 From: VisruthSK Date: Tue, 7 Apr 2026 18:10:02 -0700 Subject: [PATCH 7/8] Revert "Deleted gpdfit tests" This reverts commit 21bdd28f6ba6d230bf6e2c38cf7c800b11d1cb22. --- tests/testthat/_snaps/gpdfit.md | 15 +++++++++++++++ tests/testthat/test_gpdfit.R | 21 +++++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 tests/testthat/_snaps/gpdfit.md create mode 100644 tests/testthat/test_gpdfit.R diff --git a/tests/testthat/_snaps/gpdfit.md b/tests/testthat/_snaps/gpdfit.md new file mode 100644 index 00000000..348f3512 --- /dev/null +++ b/tests/testthat/_snaps/gpdfit.md @@ -0,0 +1,15 @@ +# gpdfit returns correct result + + WAoAAAACAAQFAAACAwAAAAIOAAAAAj+cD4qKVTgaP/BK8xJCK3sAAAQCAAAAAQAEAAkAAAAF + bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ + +--- + + WAoAAAACAAQFAAACAwAAAAIOAAAAAj+yA4g2tkbvP/BK8xJCK3sAAAQCAAAAAQAEAAkAAAAF + bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ + +--- + + WAoAAAACAAQFAAACAwAAAAIOAAAAAj+yA5BUlFrHP/BK8oexSVIAAAQCAAAAAQAEAAkAAAAF + bmFtZXMAAAAQAAAAAgAEAAkAAAABawAEAAkAAAAFc2lnbWEAAAD+ + diff --git a/tests/testthat/test_gpdfit.R b/tests/testthat/test_gpdfit.R new file mode 100644 index 00000000..df40a914 --- /dev/null +++ b/tests/testthat/test_gpdfit.R @@ -0,0 +1,21 @@ +test_that("gpdfit returns correct result", { + set.seed(123) + x <- rexp(100) + gpdfit_val_old <- unlist(gpdfit(x, wip = FALSE, min_grid_pts = 80)) + expect_snapshot_value(gpdfit_val_old, style = "serialize") + + gpdfit_val_wip <- unlist(gpdfit(x, wip = TRUE, min_grid_pts = 80)) + expect_snapshot_value(gpdfit_val_wip, style = "serialize") + + gpdfit_val_wip_default_grid <- unlist(gpdfit(x, wip = TRUE)) + expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize") +}) + +test_that("qgpd returns the correct result ", { + probs <- seq(from = 0, to = 1, by = 0.25) + q1 <- qgpd(probs, k = 1, sigma = 1) + expect_equal(q1, c(0, 1 / 3, 1, 3, Inf)) + + q2 <- qgpd(probs, k = 1, sigma = 0) + expect_true(all(is.nan(q2))) +}) From ccb9893435c28379b78ad23e0461d22a85b5686f Mon Sep 17 00:00:00 2001 From: VisruthSK Date: Tue, 7 Apr 2026 21:24:50 -0700 Subject: [PATCH 8/8] Exported gpdfit, removed qgpd --- NAMESPACE | 1 + R/gpdfit.R | 38 +++++++++++++++++++++++++++++++ man/gpdfit.Rd | 44 ++++++++++++++++++++++++++++++++++++ tests/testthat/test_gpdfit.R | 8 ------- 4 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 R/gpdfit.R create mode 100644 man/gpdfit.Rd diff --git a/NAMESPACE b/NAMESPACE index d0fae6fb..4d1a9118 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -106,6 +106,7 @@ export(example_loglik_array) export(example_loglik_matrix) export(extract_log_lik) export(find_model_names) +export(gpdfit) export(is.kfold) export(is.loo) export(is.psis) diff --git a/R/gpdfit.R b/R/gpdfit.R new file mode 100644 index 00000000..6ced85e7 --- /dev/null +++ b/R/gpdfit.R @@ -0,0 +1,38 @@ +#' Estimate parameters of the Generalized Pareto distribution +#' +#' Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of +#' the generalized Pareto distribution (GPD), assuming the location parameter is +#' 0. By default the fit uses a prior for \eqn{k}, which will stabilize +#' estimates for very small sample sizes (and low effective sample sizes in the +#' case of MCMC samples). The weakly informative prior is a Gaussian prior +#' centered at 0.5. +#' +#' @export +#' @param x A numeric vector. The sample from which to estimate the parameters. +#' @param wip Logical indicating whether to adjust \eqn{k} based on a weakly +#' informative Gaussian prior centered on 0.5. Defaults to `TRUE`. +#' @param min_grid_pts The minimum number of grid points used in the fitting +#' algorithm. The actual number used is `min_grid_pts + floor(sqrt(length(x)))`. +#' @param sort_x If `TRUE` (the default), the first step in the fitting +#' algorithm is to sort the elements of `x`. If `x` is already +#' sorted in ascending order then `sort_x` can be set to `FALSE` to +#' skip the initial sorting step. +#' @return A named list with components `k` and `sigma`. +#' +#' @details Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & +#' Stephens (2009). +#' +#' @seealso [psis()], [pareto-k-diagnostic] +#' +#' @references +#' Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method +#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325. +#' +gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) { + posterior::gpdfit( + x = x, + wip = wip, + min_grid_pts = min_grid_pts, + sort_x = sort_x + ) +} diff --git a/man/gpdfit.Rd b/man/gpdfit.Rd new file mode 100644 index 00000000..8cb61d12 --- /dev/null +++ b/man/gpdfit.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/gpdfit.R +\name{gpdfit} +\alias{gpdfit} +\title{Estimate parameters of the Generalized Pareto distribution} +\usage{ +gpdfit(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) +} +\arguments{ +\item{x}{A numeric vector. The sample from which to estimate the parameters.} + +\item{wip}{Logical indicating whether to adjust \eqn{k} based on a weakly +informative Gaussian prior centered on 0.5. Defaults to \code{TRUE}.} + +\item{min_grid_pts}{The minimum number of grid points used in the fitting +algorithm. The actual number used is \code{min_grid_pts + floor(sqrt(length(x)))}.} + +\item{sort_x}{If \code{TRUE} (the default), the first step in the fitting +algorithm is to sort the elements of \code{x}. If \code{x} is already +sorted in ascending order then \code{sort_x} can be set to \code{FALSE} to +skip the initial sorting step.} +} +\value{ +A named list with components \code{k} and \code{sigma}. +} +\description{ +Given a sample \eqn{x}, Estimate the parameters \eqn{k} and \eqn{\sigma} of +the generalized Pareto distribution (GPD), assuming the location parameter is +0. By default the fit uses a prior for \eqn{k}, which will stabilize +estimates for very small sample sizes (and low effective sample sizes in the +case of MCMC samples). The weakly informative prior is a Gaussian prior +centered at 0.5. +} +\details{ +Here the parameter \eqn{k} is the negative of \eqn{k} in Zhang & +Stephens (2009). +} +\references{ +Zhang, J., and Stephens, M. A. (2009). A new and efficient estimation method +for the generalized Pareto distribution. \emph{Technometrics} \strong{51}, 316-325. +} +\seealso{ +\code{\link[=psis]{psis()}}, \link{pareto-k-diagnostic} +} diff --git a/tests/testthat/test_gpdfit.R b/tests/testthat/test_gpdfit.R index df40a914..b25a3865 100644 --- a/tests/testthat/test_gpdfit.R +++ b/tests/testthat/test_gpdfit.R @@ -11,11 +11,3 @@ test_that("gpdfit returns correct result", { expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize") }) -test_that("qgpd returns the correct result ", { - probs <- seq(from = 0, to = 1, by = 0.25) - q1 <- qgpd(probs, k = 1, sigma = 1) - expect_equal(q1, c(0, 1 / 3, 1, 3, Inf)) - - q2 <- qgpd(probs, k = 1, sigma = 0) - expect_true(all(is.nan(q2))) -})