stan-dev · paul-buerkner · Mar 28, 2025 · Nov 12, 2024 · Mar 6, 2025 · Mar 14, 2025
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,7 +11,8 @@ Authors@R: c(person("Paul-Christian", "Bürkner", email = "paul.buerkner@gmail.c
              person("Ben", "Lambert", role = c("ctb")),
              person("Ozan", "Adıgüzel", role = c("ctb")), 
              person("Jacob", "Socolar", role = c("ctb")),
-             person("Noa", "Kallioinen", role = c("ctb")))
+             person("Noa", "Kallioinen", role = c("ctb")),
+             person("Teemu", "Säilynoja", role = c("ctb")))
 Description: Provides useful tools for both users and developers of packages 
   for fitting Bayesian models or working with output from Bayesian models. 
   The primary goals of the package are to: 

diff --git a/NAMESPACE b/NAMESPACE
@@ -242,6 +242,9 @@ S3method(pareto_min_ss,rvar)
 S3method(pareto_smooth,default)
 S3method(pareto_smooth,rvar)
 S3method(pillar_shaft,rvar)
+S3method(pit,default)
+S3method(pit,draws_matrix)
+S3method(pit,rvar)
 S3method(print,draws_array)
 S3method(print,draws_df)
 S3method(print,draws_list)
@@ -474,6 +477,7 @@ export(pareto_khat)
 export(pareto_khat_threshold)
 export(pareto_min_ss)
 export(pareto_smooth)
+export(pit)
 export(ps_convergence_rate)
 export(ps_khat_threshold)
 export(ps_min_ss)

diff --git a/NEWS.md b/NEWS.md
@@ -8,6 +8,7 @@
 
 * Convert lists of matrices to `draws_array` objects.
 * Improve the documentation in various places.
+* Implement `pit()` for draws and rvar objects. LOO-PIT can be computed using `weights`.
 
 # posterior 1.6.0
 

diff --git a/R/pit.R b/R/pit.R
@@ -0,0 +1,179 @@
+#' Probability integral transform
+#'
+#' Probability integral transform (PIT). LOO-PIT is given by a weighted sample.
+#'
+#' @name pit
+#' 
+#' @param x (draws) A [`draws_matrix`] object or one coercible to a
+#'   `draws_matrix` object, or an [`rvar`] object.
+#'
+#' @param y (observations) A 1D vector, or an array of dim(x), if x is `rvar`.
+#'   Each element of `y` corresponds to a variable in `x`.
+#'
+#' @param weights A matrix of weights for each draw and variable. `weights`
+#'   should have one column per variable in `x`, and `ndraws(x)` rows.
+#'
+#' @param log (logical) Are the weights passed already on the log scale? The
+#'   default is `FALSE`, that is, expecting `weights` to be on the standard
+#'   (non-log) scale.
+#'
+#' @template args-methods-dots
+#'
+#' @details The `pit()` function computes the probability integral transform of
+#'   `y` using the empirical cumulative distribution computed from the samples
+#'   in `x`. For continuous valued `y` and `x`, the PIT for the elements of `y`
+#'   is computed as the empirical cumulative distribution value:
+#'
+#'     PIT(y_i) = Pr(x_i < y_i),
+#'
+#'   where x_i, is the corresponding set of draws in `x`. For `draws` objects,
+#'   this corresponds to the draws of the *i*th variable, and for `rvar`
+#'   the elements of `y` and `x` are matched.
+#'
+#'   The draws in `x` can further be provided (log-)weights in
+#    `weights`, which enables for example the computation of LOO-PITs.
+#'
+#'   If `y` and `x` are discrete, randomisation is used to obtain continuous PIT
+#'   values. (see, e.g., Czado, C., Gneiting, T., Held, L.: Predictive model
+#'   assessment for count data.  Biometrics 65(4), 1254–1261 (2009).)
+#'
+#' @return A numeric vector of length `length(y)` containing the PIT values, or
+#'   an array of shape `dim(y)`, if `x` is an `rvar`.
+
+#' @examples
+#' # PIT for a draws object
+#' x <- example_draws()
+#' # Create a vector of observations
+#' y <- rnorm(nvariables(x), 5, 5)
+#' pit(x, y)
+#'
+#' # Compute weighted PIT (for example LOO-PIT)
+#' weights <- matrix(runif(length(x)), ncol = nvariables(x))
+#'
+#' pit(x, y, weights)
+#'
+#' # PIT for an rvar
+#' x <- rvar(example_draws())
+#' # Create an array of observations with the same dimensions as x.
+#' y_arr <- array(rnorm(length(x), 5, 5), dim = dim(x))
+#' pit(x, y_arr)
+#'
+NULL
+
+#' @rdname pit
+#' @export
+pit <- function(x, y, ...) UseMethod("pit")
+
+#' @rdname pit
+#' @export
+pit.default <- function(x, y, weights = NULL, log = FALSE, ...) {
+  x <- as_draws_matrix(x)
+  if (!is.null(weights)) {
+    weights <- as_draws_matrix(weights)
+  }
+  pit(x, y, weights, log)
+}
+
+#' @rdname pit
+#' @export
+pit.draws_matrix <- function(x, y, weights = NULL, log = FALSE, ...) {
+  y <- validate_y(y, x)
+  if (!is.null(weights)) {
+    weights <- sapply(seq_len(nvariables(x)), \(var_idx) {
+      validate_weights(weights[, var_idx], x[, var_idx], log)
+    })
+    weights <- normalize_log_weights(weights)
+  }
+  pit <- vapply(seq_len(ncol(x)), function(j) {
+    sel_min <- x[, j] < y[j]
+    if (!any(sel_min)) {
+      pit <- 0
+    } else {
+      if (is.null(weights)) {
+        pit <- mean(sel_min)
+      } else {
+        pit <- exp(log_sum_exp(weights[sel_min, j]))
+      }
+    }
+
+    sel_sup <- x[, j] == y[j]
+    if (any(sel_sup)) {
+      # randomized PIT for discrete y (see, e.g., Czado, C., Gneiting, T.,
+      # Held, L.: Predictive model assessment for count data.
+      # Biometrics 65(4), 1254–1261 (2009).)
+      if (is.null(weights)) {
+        pit_sup <- pit + mean(sel_sup)
+      } else {
+        pit_sup <- pit + exp(log_sum_exp(weights[sel_sup, j]))
+      }
+
+      pit <- runif(1, pit, pit_sup)
+    }
+    pit
+  }, FUN.VALUE = 1.0)
+
+  if (any(pit > 1 + 1e-10)) {
+    warning_no_call(
+      paste(
+        "Some PIT values larger than 1. ",
+        "This is usually due to numerical inaccuracies. ",
+        "Largest value: ",
+        max(pit),
+        "\nRounding PIT > 1 to 1.",
+        sep = ""
+      )
+    )
+  }
+
+  setNames(pmin(1, pit), variables(x))
+}
+
+#' @rdname pit
+#' @export
+pit.rvar <- function(x, y, weights = NULL, log = FALSE, ...) {
+  y <- validate_y(y, x)
+  if (is.null(weights)) {
+    out <- array(
+      runif(length(y), Pr(x < y), Pr(x <= y)),
+      dim(x),
+      dimnames(x)
+    )
+  } else {
+    out <- array(
+      data = pit(
+        x = as_draws_matrix(c(x)),
+        y = c(y),
+        weights = weights,
+        log = log
+      ),
+      dim = dim(x),
+      dimnames = dimnames(x)
+    )
+  }
+  out
+}
+
+# internal ----------------------------------------------------------------
+
+validate_y <- function(y, x = NULL) {
+  if (!is.numeric(y)) {
+    stop_no_call("`y` must be numeric.")
+  }
+  if (anyNA(y)) {
+    stop_no_call("NAs not allowed in `y`.")
+  }
+  if (is_rvar(x)) {
+    if (length(x) != length(y) || any(dim(y) != dim(x))) {
+      stop_no_call("`dim(y)` must match `dim(x)`.")
+    }
+  } else if (is_draws(x)) {
+    if (!is.vector(y, mode = "numeric") || length(y) != nvariables(x)) {
+      stop_no_call("`y` must be a vector of length `nvariables(x)`.")
+    }
+  }
+  y
+}
+
+normalize_log_weights <- function(log_weights) {
+  apply(log_weights, 2, function(col) col - log_sum_exp(col))
+}
diff --git a/R/weight_draws.R b/R/weight_draws.R
@@ -184,6 +184,9 @@ validate_weights <- function(weights, draws, log = FALSE) {
   if (length(weights) != ndraws(draws)) {
     stop_no_call("Number of weights must match the number of draws.")
   }
+  if (any(weights == Inf)) {
+    stop_no_call("Weights must not be positive infinite.")
+  }
   if (!log) {
     if (any(weights < 0)) {
       stop_no_call("Weights must be non-negative.")

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -121,6 +121,7 @@ reference:
       - entropy
       - dissent
       - modal_category
+      - pit
   - title: "Functionality specific to the rvar datatype"
     desc: >
       The `draws_rvar` format (a structured list of `rvar` objects) has the same

diff --git a/man/pit.Rd b/man/pit.Rd
diff --git a/man/posterior-package.Rd b/man/posterior-package.Rd
-Original file line number
+Diff line change
@@ Expand Up / @@ -121,6 +121,7 @@ reference: @@
           - entropy
           - dissent
           - modal_category
+          - pit
       - title: "Functionality specific to the rvar datatype"
         desc: >
           The `draws_rvar` format (a structured list of `rvar` objects) has the same
@@ Expand Down @@