Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Imports:
checkmate,
matrixStats (>= 0.52),
parallel,
posterior (>= 1.5.0),
posterior (>= 1.7.0),
stats
Suggests:
bayesplot (>= 1.7.0),
Expand Down
73 changes: 6 additions & 67 deletions R/gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,71 +29,10 @@
#' for the generalized Pareto distribution. *Technometrics* **51**, 316-325.
#'
gpdfit <- function(x, wip = TRUE, min_grid_pts = 30, sort_x = TRUE) {
# See section 4 of Zhang and Stephens (2009)
if (sort_x) {
x <- sort.int(x)
}
N <- length(x)
prior <- 3
M <- min_grid_pts + floor(sqrt(N))
jj <- seq_len(M)
xstar <- x[floor(N / 4 + 0.5)] # first quartile of sample
theta <- 1 / x[N] + (1 - sqrt(M / (jj - 0.5))) / prior / xstar
l_theta <- N * lx(theta, x) # profile log-lik
w_theta <- exp(l_theta - matrixStats::logSumExp(l_theta)) # normalize
theta_hat <- sum(theta * w_theta)
k <- mean.default(log1p(-theta_hat * x))
sigma <- -k / theta_hat

if (wip) {
k <- adjust_k_wip(k, n = N)
}

if (is.na(k)) {
k <- Inf
}

nlist(k, sigma)
}


# internal ----------------------------------------------------------------

lx <- function(a,x) {
a <- -a
k <- vapply(a, FUN = function(a_i) mean(log1p(a_i * x)), FUN.VALUE = numeric(1))
log(a / k) - k - 1
}

#' Adjust k based on weakly informative prior, Gaussian centered on 0.5. This
#' will stabilize estimates for very small Monte Carlo sample sizes and low neff
#' cases.
#'
#' @noRd
#' @param k Scalar khat estimate.
#' @param n Integer number of tail samples used to fit GPD.
#' @return Scalar adjusted khat estimate.
#'
adjust_k_wip <- function(k, n) {
a <- 10
n_plus_a <- n + a
k * n / n_plus_a + a * 0.5 / n_plus_a
}


#' Inverse CDF of generalized Pareto distribution
#' (assuming location parameter is 0)
#'
#' @noRd
#' @param p Vector of probabilities.
#' @param k Scalar shape parameter.
#' @param sigma Scalar scale parameter.
#' @return Vector of quantiles.
#'
qgpd <- function(p, k, sigma) {
if (is.nan(sigma) || sigma <= 0) {
return(rep(NaN, length(p)))
}

sigma * expm1(-k * log1p(-p)) / k
posterior::gpdfit(
x = x,
wip = wip,
min_grid_pts = min_grid_pts,
sort_x = sort_x
)
}
4 changes: 2 additions & 2 deletions R/psis.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,12 @@ psis_smooth_tail <- function(x, cutoff) {
exp_cutoff <- exp(cutoff)

# save time not sorting since x already sorted
fit <- gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
fit <- posterior::gpdfit(exp(x) - exp_cutoff, sort_x = FALSE)
k <- fit$k
sigma <- fit$sigma
if (is.finite(k)) {
p <- (seq_len(len) - 0.5) / len
qq <- qgpd(p, k, sigma) + exp_cutoff
qq <- posterior::qgeneralized_pareto(p, 0, sigma, k) + exp_cutoff
tail <- log(qq)
} else {
tail <- x
Expand Down
4 changes: 2 additions & 2 deletions R/psislw.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ psislw <- function(lw, wcp = 0.2, wtrunc = 3/4,
# body and gPd smoothed tail
tail_ord <- order(x_tail)
exp_cutoff <- exp(cutoff)
fit <- gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
fit <- posterior::gpdfit(exp(x_tail) - exp_cutoff, wip=FALSE, min_grid_pts = 80)
k <- fit$k
sigma <- fit$sigma
prb <- (seq_len(tail_len) - 0.5) / tail_len
qq <- qgpd(prb, k, sigma) + exp_cutoff
qq <- posterior::qgeneralized_pareto(prb, 0, sigma, k) + exp_cutoff
smoothed_tail <- rep.int(0, tail_len)
smoothed_tail[tail_ord] <- log(qq)
x_new <- x
Expand Down
8 changes: 0 additions & 8 deletions tests/testthat/test_gpdfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,3 @@ test_that("gpdfit returns correct result", {
expect_snapshot_value(gpdfit_val_wip_default_grid, style = "serialize")
})

test_that("qgpd returns the correct result ", {
probs <- seq(from = 0, to = 1, by = 0.25)
q1 <- qgpd(probs, k = 1, sigma = 1)
expect_equal(q1, c(0, 1 / 3, 1, 3, Inf))

q2 <- qgpd(probs, k = 1, sigma = 0)
expect_true(all(is.nan(q2)))
})
Loading