Skip to content
149 changes: 68 additions & 81 deletions R/expected_event.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,99 +141,86 @@ expected_event <- function(
# Report error where there is >=2 strata
if(length(unique(fail_rate$stratum)) >= 2 || length(unique(enroll_rate$stratum)) >= 2) {stop("Please calculate the expected event by stratum, see examples. ")}

# Divide the time line into sub-intervals ----
# Compute breakpoints for the timeline ----
# Enrollment breakpoints (time since start of enrollment)
enroll_breaks <- c(0, cumsum(enroll_rate$duration))
# Failure/dropout rate breakpoints (time on study)
fail_breaks <- c(0, cumsum(fail_rate$duration))

## by piecewise enrollment rates
df_1 <- data.frame(start_enroll = c(0, cumsum(enroll_rate$duration)))
df_1$end_fail <- total_duration - df_1$start_enroll
df_1 <- df_1[df_1$end_fail > 0, ]
# Map enrollment start times to end_fail (time at risk)
start_enroll_1 <- enroll_breaks
end_fail_1 <- total_duration - start_enroll_1
keep <- end_fail_1 > 0
start_enroll_1 <- start_enroll_1[keep]
end_fail_1 <- end_fail_1[keep]

## by piecewise failure & dropout rates
df_2 <- data.frame(
end_fail = cumsum(fail_rate$duration),
fail_rate_var = fail_rate$fail_rate,
dropout_rate_var = fail_rate$dropout_rate
)
df_2$start_enroll <- total_duration - df_2$end_fail

temp <- cumsum(fail_rate$duration)
if (temp[length(temp)] < total_duration) {
df_2 <- df_2[-nrow(df_2), ]
# Map failure breakpoints to start_enroll times
end_fail_2 <- cumsum(fail_rate$duration)
start_enroll_2 <- total_duration - end_fail_2
temp_last <- end_fail_2[length(end_fail_2)]
if (temp_last < total_duration) {
end_fail_2 <- end_fail_2[-length(end_fail_2)]
start_enroll_2 <- start_enroll_2[-length(start_enroll_2)]
} else {
df_2 <- df_2[df_2$start_enroll > 0, ]
keep2 <- start_enroll_2 > 0
end_fail_2 <- end_fail_2[keep2]
start_enroll_2 <- start_enroll_2[keep2]
}

# Create 3 step functions (sf) ----
# Step function to define enrollment rates over time
sf_enroll_rate <- stats::stepfun(c(0, cumsum(enroll_rate$duration)),
c(0, enroll_rate$rate, 0),
right = FALSE
)
# step function to define failure rates over time
start_fail <- c(0, cumsum(fail_rate$duration))
fail_rate_last <- nrow(fail_rate)
sf_fail_rate <- stats::stepfun(start_fail,
c(0, fail_rate$fail_rate, fail_rate$fail_rate[fail_rate_last]),
right = FALSE
)
# step function to define dropout rates over time
sf_dropout_rate <- stats::stepfun(start_fail,
c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[fail_rate_last]),
right = FALSE
)
# Union of all breakpoints (sorted by end_fail)
all_end_fail <- sort(unique(c(end_fail_1, end_fail_2)))
all_start_enroll <- total_duration - all_end_fail
n_intervals <- length(all_end_fail)

# Use fast step functions for rate lookups (right = FALSE means right-continuous)
sf_enroll <- stepfun2(enroll_breaks, c(0, enroll_rate$rate, 0))
sf_fail <- stepfun2(fail_breaks, c(0, fail_rate$fail_rate, fail_rate$fail_rate[nrow(fail_rate)]))
sf_dropout <- stepfun2(fail_breaks, c(0, fail_rate$dropout_rate, fail_rate$dropout_rate[nrow(fail_rate)]))

# Compute interval properties
end_enroll <- c(total_duration, all_start_enroll[-n_intervals])
start_fail <- c(0, all_end_fail[-n_intervals])
duration <- end_enroll - all_start_enroll

fail_rate_var <- sf_fail(start_fail)
dropout_rate_var <- sf_dropout(start_fail)
enroll_rate_var <- sf_enroll(all_start_enroll)

# Compute q and big_q (survival through interval)
q <- exp(-duration * (fail_rate_var + dropout_rate_var))
big_q <- c(1, cumprod(q[-n_intervals]))

# Compute g and big_g (accumulated enrollment, reversed order)
g <- enroll_rate_var * duration
# big_g needs cumulative sum in reverse order of start_fail
rev_g <- rev(g)
rev_big_g <- c(0, cumsum(rev_g[-n_intervals]))
big_g <- rev(rev_big_g)

# combine sub-intervals from enroll + failure + dropout #
# impute the NA by step functions
df <- merge(df_1, df_2, by = c("start_enroll", "end_fail"), all = TRUE, sort = FALSE)
df <- df[order(df$end_fail), ]
df$end_enroll <- fastlag(df$start_enroll, first = total_duration)
df$start_fail <- fastlag(df$end_fail)
df$duration <- df$end_enroll - df$start_enroll
df$fail_rate_var <- sf_fail_rate(df$start_fail)
df$dropout_rate_var <- sf_dropout_rate(df$start_fail)
df$enroll_rate_var <- sf_enroll_rate(df$start_enroll)
# create 2 auxiliary variable for failure & dropout rate
# q: number of expected events in a sub-interval
# big_q: cumulative product of q (pool all sub-intervals)
df$q <- exp(-df$duration * (df$fail_rate_var + df$dropout_rate_var))
df$big_q <- fastlag(cumprod(df$q), first = 1)
df <- df[order(df$start_fail, decreasing = TRUE), ]
# create another 2 auxiliary variable for enroll rate
# g: number of expected subjects in a sub-interval
# big_g: cumulative sum of g (pool all sub-intervals)
df$g <- df$enroll_rate_var * df$duration
df$big_g <- fastlag(cumsum(df$g))
df <- df[order(df$start_fail), ]
# compute expected events as nbar in a sub-interval
df$d <- ifelse(
df$fail_rate_var == 0,
0,
df$big_q * (1 - df$q) * df$fail_rate_var / (df$fail_rate_var + df$dropout_rate_var)
)
df$nbar <- ifelse(
df$fail_rate_var == 0,
0,
df$big_g * df$d +
(df$fail_rate_var * df$big_q * df$enroll_rate_var) /
(df$fail_rate_var + df$dropout_rate_var) *
(df$duration - (1 - df$q) / (df$fail_rate_var + df$dropout_rate_var))
)
# Compute expected events (nbar) per interval
rate_sum <- fail_rate_var + dropout_rate_var
nz <- fail_rate_var != 0 # non-zero failure rate mask
d <- numeric(n_intervals)
nbar <- numeric(n_intervals)
d[nz] <- big_q[nz] * (1 - q[nz]) * fail_rate_var[nz] / rate_sum[nz]
nbar[nz] <- big_g[nz] * d[nz] +
(fail_rate_var[nz] * big_q[nz] * enroll_rate_var[nz]) / rate_sum[nz] *
(duration[nz] - (1 - q[nz]) / rate_sum[nz])

# Output results ----
if (simple) {
ans <- sum(df$nbar)
ans <- sum(nbar)
} else {
sf_start_fail <- stats::stepfun(start_fail, c(0, start_fail), right = FALSE)
# Map each interval to its failure rate period start
period_id <- fail_breaks[findInterval(start_fail, fail_breaks)]
# Aggregate by period
unique_periods <- unique(period_id)
ans <- data.frame(
fail_rate = df$fail_rate_var,
event = df$nbar,
start_fail = sf_start_fail(df$start_fail)
t = unique_periods,
fail_rate = sf_fail(unique_periods),
event = vapply(unique_periods, function(p) sum(nbar[period_id == p]), numeric(1))
)
ans <- lapply(split(ans, ~start_fail), function(s) {
data.frame(t = s$start_fail[1], fail_rate = s$fail_rate[1], event = sum(s$event))
})
ans <- do.call(rbind, ans)
row.names(ans) <- NULL
rownames(ans) <- NULL
}
return(ans)
}
2 changes: 1 addition & 1 deletion R/expected_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,5 +118,5 @@ expected_time <- function(
tryCatch(uniroot(event_diff, interval, check.conv = TRUE), error = function(e) {
stop("solution not found (", e$message, ")")
})
ans |> select(-n)
ans[, names(ans) != "n", drop = FALSE]
}
105 changes: 52 additions & 53 deletions R/gs_design_ahr.R
Original file line number Diff line number Diff line change
Expand Up @@ -254,31 +254,30 @@ gs_design_ahr <- function(

y$analysis <- n_analysis

y <- rbind(
expected_time(
enroll_rate = enroll_rate, fail_rate = fail_rate,
ratio = ratio, target_event = info_frac[n_analysis - i] * final_event,
interval = c(.01, next_time)
) |>
mutate(theta = -log(ahr), analysis = n_analysis - i),
y
et <- expected_time(
enroll_rate = enroll_rate, fail_rate = fail_rate,
ratio = ratio, target_event = info_frac[n_analysis - i] * final_event,
interval = c(.01, next_time)
)
et$theta <- -log(et$ahr)
et$analysis <- n_analysis - i
y <- rbind(et, y)

next_time <- y$time[1]
# If the planned info_frac input by the user > event fraction
# Equivalently, the planned info_frac happens later than planned calendar time
# We will wait until the planned info_frac arrives
} else if (info_frac[n_analysis - i] > if_alt[n_analysis - i]) {
y[n_analysis - i, ] <- expected_time(
et <- expected_time(
enroll_rate = enroll_rate, fail_rate = fail_rate,
ratio = ratio, target_event = info_frac[n_analysis - i] * final_event,
interval = c(.01, next_time)
) |>
dplyr::transmute(
analysis = n_analysis - i, time,
event, ahr, theta = -log(ahr),
info, info0
)
)
y[n_analysis - i, ] <- data.frame(
analysis = n_analysis - i, time = et$time,
event = et$event, ahr = et$ahr, theta = -log(et$ahr),
info = et$info, info0 = et$info0
)

next_time <- y$time[n_analysis - i]
}
Expand Down Expand Up @@ -313,44 +312,42 @@ gs_design_ahr <- function(
)
)

allout <- allout |>
# Add `~hr at bound`, `hr generic` and `nominal p`
mutate(
"~hr at bound" = exp(-z / sqrt(info0)),
"nominal p" = pnorm(-z)
) |>
# Add `time`, `event`, `ahr`, `n` from gs_info_ahr call above
full_join(y |> select(-c(info, info0, theta)),
by = "analysis"
) |>
# Select variables to be output
select(c(
"analysis", "bound", "time", "n", "event", "z",
"probability", "probability0", "ahr", "theta",
"info", "info0", "info_frac", "~hr at bound", "nominal p"
)) |>
# Arrange the output table
arrange(analysis, desc(bound))

inflac_fct <- (allout |> filter(analysis == n_analysis, bound == "upper"))$info /
(y |> filter(analysis == n_analysis))$info
# Add computed columns
allout[["~hr at bound"]] <- exp(-allout$z / sqrt(allout$info0))
allout[["nominal p"]] <- pnorm(-allout$z)

# Merge time, event, ahr, n from gs_info_ahr call above
y_merge <- y[, setdiff(names(y), c("info", "info0", "theta")), drop = FALSE]
allout <- merge(allout, y_merge, by = "analysis", all.x = TRUE)

# Select and reorder columns
out_cols <- c("analysis", "bound", "time", "n", "event", "z",
"probability", "probability0", "ahr", "theta",
"info", "info0", "info_frac", "~hr at bound", "nominal p")
allout <- allout[, out_cols, drop = FALSE]

# Sort: by analysis, then upper before lower
allout <- allout[order(allout$analysis, allout$bound != "upper"), ]
rownames(allout) <- NULL

inflac_fct <- allout$info[allout$analysis == n_analysis & allout$bound == "upper"] /
y$info[y$analysis == n_analysis]
allout$event <- allout$event * inflac_fct
allout$n <- allout$n * inflac_fct


# Get bounds to output ----
bound <- allout |>
select(all_of(c(
"analysis", "bound", "probability", "probability0",
"z", "~hr at bound", "nominal p"
))) |>
arrange(analysis, desc(bound))
bound <- allout[, c("analysis", "bound", "probability", "probability0",
"z", "~hr at bound", "nominal p"), drop = FALSE]
bound <- bound[order(bound$analysis, bound$bound != "upper"), ]
rownames(bound) <- NULL

# Output spending time to the bounds table
info0 <- (allout |> filter(bound == "upper"))$info0
info <- (allout |> filter(bound == "upper"))$info
info0_final <- (allout |> filter(analysis == n_analysis, bound == "upper"))$info0
info_final <- (allout |> filter(analysis == n_analysis, bound == "upper"))$info
upper_rows <- allout$bound == "upper"
info0 <- allout$info0[upper_rows]
info <- allout$info[upper_rows]
info0_final <- allout$info0[allout$analysis == n_analysis & upper_rows]
info_final <- allout$info[allout$analysis == n_analysis & upper_rows]

bound$spending_time <- NA

Expand Down Expand Up @@ -383,11 +380,10 @@ gs_design_ahr <- function(
}

# Get analysis summary to output ----
analysis <- allout |>
select(analysis, time, n, event, ahr, theta, info, info0, info_frac) |>
mutate(info_frac0 = event / last_(event)) |>
unique() |>
arrange(analysis)
analysis <- unique(allout[, c("analysis", "time", "n", "event", "ahr", "theta", "info", "info0", "info_frac"), drop = FALSE])
analysis$info_frac0 <- analysis$event / last_(analysis$event)
analysis <- analysis[order(analysis$analysis), ]
rownames(analysis) <- NULL

# Get input parameter to output ----
input <- list(
Expand All @@ -403,13 +399,16 @@ gs_design_ahr <- function(
)

# Return the output ----
enroll_rate_out <- enroll_rate
enroll_rate_out$rate <- enroll_rate_out$rate * inflac_fct

ans <- structure(
list(
design = "ahr",
input = input,
enroll_rate = enroll_rate |> mutate(rate = rate * inflac_fct),
enroll_rate = enroll_rate_out,
fail_rate = fail_rate,
bound = bound |> filter(!is.infinite(z)),
bound = bound[is.finite(bound$z), ],
analysis = analysis
),
class = "gs_design",
Expand Down
22 changes: 10 additions & 12 deletions R/gs_design_npe.R
Original file line number Diff line number Diff line change
Expand Up @@ -360,19 +360,17 @@ gs_design_npe <- function(
binding = binding, r = r, tol = tol
)

# combine probability under H0 and H1
suppressMessages(
ans <- ans_h1 |>
full_join(
ans_h0 |>
select(analysis, bound, probability) |>
rename(probability0 = probability)
)
# combine probability under H0 and H1 via direct merge on analysis+bound
ans_h0_sub <- data.frame(
analysis = ans_h0$analysis,
bound = ans_h0$bound,
probability0 = ans_h0$probability,
stringsAsFactors = FALSE
)
ans <- merge(as.data.frame(ans_h1), ans_h0_sub, by = c("analysis", "bound"), all.x = TRUE)

ans <- ans |> select(analysis, bound, z, probability, probability0, theta, info_frac, info, info0, info1)

ans <- ans |> arrange(analysis)
ans <- ans[order(ans$analysis, ans$bound != "upper"), c("analysis", "bound", "z", "probability", "probability0", "theta", "info_frac", "info", "info0", "info1")]
rownames(ans) <- NULL

return(ans)
return(tibble::as_tibble(ans))
}
3 changes: 2 additions & 1 deletion R/gs_info_ahr.R
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ gs_info_ahr <- function(enroll_rate = define_enroll_rate(duration = c(2, 2, 10),
if (!is.null(analysis_time)) {
# calculate events given the `analysis_time`
avehr <- ahr(enroll_rate = enroll_rate, fail_rate = fail_rate,
ratio = ratio, total_duration = analysis_time) |> select(-n)
ratio = ratio, total_duration = analysis_time)
avehr$n <- NULL
# check if the above events >= targeted events
for (i in seq_along(event)) {
if (avehr$event[i] < event[i]) {
Expand Down
Loading
Loading