diff --git a/packages/opencode/src/cli/cmd/stats.ts b/packages/opencode/src/cli/cmd/stats.ts index 34af56ad7a90..a505a854f47f 100644 --- a/packages/opencode/src/cli/cmd/stats.ts +++ b/packages/opencode/src/cli/cmd/stats.ts @@ -1,3 +1,5 @@ +import fs from "fs" +import path from "path" import type { Argv } from "yargs" import { cmd } from "./cmd" import { Session } from "../../session" @@ -7,6 +9,7 @@ import { SessionTable } from "../../session/session.sql" import { Project } from "../../project" import { Instance } from "../../project/instance" import { AppRuntime } from "@/effect/app-runtime" +import { Global } from "../../global" interface SessionStats { totalSessions: number @@ -80,6 +83,7 @@ export const StatsCommand = cmd({ } displayStats(stats, args.tools, modelLimit) + displayRateLimits() }) }, }) @@ -411,3 +415,52 @@ function formatNumber(num: number): string { } return num.toString() } + +function displayRateLimits() { + const width = 56 + function renderRow(label: string, value: string): string { + const availableWidth = width - 1 + const paddingNeeded = availableWidth - label.length - value.length + const padding = Math.max(0, paddingNeeded) + return `│${label}${" ".repeat(padding)}${value} │` + } + + const jsonPath = path.join(Global.Path.config, "opencode.json") + let data: any + try { + data = JSON.parse(fs.readFileSync(jsonPath, "utf8")) + } catch { + return + } + const providers = data?.provider + if (!providers || typeof providers !== "object") return + + const rows: Array<{ id: string; perMinute?: number; perDay?: number; tokensPerMinute?: number; tokensPerDay?: number }> = [] + for (const [id, cfg] of Object.entries(providers)) { + const rl = cfg?.options?.rateLimit + if (!rl) continue + rows.push({ + id, + perMinute: rl.perMinute, + perDay: rl.perDay, + tokensPerMinute: rl.tokensPerMinute, + tokensPerDay: rl.tokensPerDay, + }) + } + if (rows.length === 0) return + + console.log("┌────────────────────────────────────────────────────────┐") + console.log("│ RATE LIMITS (learned/set) │") + console.log("├────────────────────────────────────────────────────────┤") + for (const r of rows) { + console.log(`│ ${r.id.padEnd(54)} │`) + if (r.perMinute !== undefined) console.log(renderRow(" Requests/min", formatNumber(r.perMinute))) + if (r.perDay !== undefined) console.log(renderRow(" Requests/day", formatNumber(r.perDay))) + if (r.tokensPerMinute !== undefined) console.log(renderRow(" Tokens/min", formatNumber(r.tokensPerMinute))) + if (r.tokensPerDay !== undefined) console.log(renderRow(" Tokens/day", formatNumber(r.tokensPerDay))) + console.log("├────────────────────────────────────────────────────────┤") + } + process.stdout.write("\x1B[1A") + console.log("└────────────────────────────────────────────────────────┘") + console.log() +} diff --git a/packages/opencode/src/config/provider.ts b/packages/opencode/src/config/provider.ts index 49dd2a3760aa..f405529769e5 100644 --- a/packages/opencode/src/config/provider.ts +++ b/packages/opencode/src/config/provider.ts @@ -110,9 +110,15 @@ export class Info extends Schema.Class("ProviderConfig")({ perDay: Schema.optional(PositiveInt).annotate({ description: "Learned or user-set request limit per 24 hours.", }), + tokensPerMinute: Schema.optional(PositiveInt).annotate({ + description: "Learned or user-set token limit per 60 seconds.", + }), + tokensPerDay: Schema.optional(PositiveInt).annotate({ + description: "Learned or user-set token limit per 24 hours.", + }), }).annotate({ description: - "Request-rate limits for this provider. Populated automatically the first time a 429 response is received, or can be set manually.", + "Request- and token-rate limits for this provider. Populated automatically the first time a 429 response is received, or can be set manually.", }), ), }), diff --git a/packages/opencode/src/provider/error.ts b/packages/opencode/src/provider/error.ts index 37299df59598..a16d8f447e43 100644 --- a/packages/opencode/src/provider/error.ts +++ b/packages/opencode/src/provider/error.ts @@ -182,14 +182,20 @@ export function parseAPICallError(input: { providerID: ProviderID; error: APICal } const metadata = input.error.url ? { url: input.error.url } : undefined - if (input.error.statusCode === 429) { + const is429 = input.error.statusCode === 429 + if (is429) { RateLimit.onRateLimitError(input.providerID) } + const friendlyMessage = is429 ? `Rate limit hit on ${input.providerID} — retrying` : m return { type: "api_error", - message: m, + message: friendlyMessage, statusCode: input.error.statusCode, - isRetryable: input.providerID.startsWith("openai") ? isOpenAiErrorRetryable(input.error) : input.error.isRetryable, + isRetryable: is429 + ? true + : input.providerID.startsWith("openai") + ? isOpenAiErrorRetryable(input.error) + : input.error.isRetryable, responseHeaders: input.error.responseHeaders, responseBody: input.error.responseBody, metadata, diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index fc83c4a56bbd..8400f28f40d5 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -28,7 +28,7 @@ import { withStatics } from "@/util/schema" import * as ProviderTransform from "./transform" import { ModelID, ProviderID } from "./schema" -import { RateLimit } from "./rate-limit" +import { RateLimit, RateLimitError, formatGateMessage } from "./rate-limit" const log = Log.create({ service: "provider" }) @@ -1443,6 +1443,11 @@ const layer: Layer.Layer< const chunkTimeout = options["chunkTimeout"] delete options["chunkTimeout"] + if (options["rateLimit"] && typeof options["rateLimit"] === "object") { + RateLimit.configure(model.providerID, options["rateLimit"] as any) + } + delete options["rateLimit"] + options["fetch"] = async (input: any, init?: BunFetchRequestInit) => { const fetchFn = customFetch ?? fetch const opts = init ?? {} @@ -1472,7 +1477,19 @@ const layer: Layer.Layer< } } - RateLimit.tick(model.providerID) + const estimate = RateLimit.estimateRequestTokens(opts.body) + const gate = RateLimit.check(model.providerID, estimate) + if (!gate.ok) { + throw new RateLimitError({ + providerID: model.providerID, + reason: gate.reason, + limit: gate.limit, + current: gate.current, + resetAt: gate.resetAt, + message: formatGateMessage(model.providerID, gate), + }) + } + RateLimit.tick(model.providerID, estimate) const res = await fetchFn(input, { ...opts, // @ts-ignore see here: https://github.com/oven-sh/bun/issues/16682 diff --git a/packages/opencode/src/provider/rate-limit.ts b/packages/opencode/src/provider/rate-limit.ts index a72b81e96ded..57280166847c 100644 --- a/packages/opencode/src/provider/rate-limit.ts +++ b/packages/opencode/src/provider/rate-limit.ts @@ -1,7 +1,10 @@ import fs from "fs" import path from "path" +import z from "zod" +import { NamedError } from "@opencode-ai/shared/util/error" import { Global } from "../global" import { Log } from "../util" +import { estimate as estimateTokens } from "../util/token" import type { ProviderID } from "./schema" const log = Log.create({ service: "provider.rate-limit" }) @@ -12,10 +15,18 @@ type HeaderSnapshot = { resetAt?: number } +type TokenEntry = { + t: number + count: number + pending: boolean +} + type State = { minute: number[] day: number[] - learned: { perMinute?: number; perDay?: number } + tokensMinute: TokenEntry[] + tokensDay: TokenEntry[] + learned: { perMinute?: number; perDay?: number; tokensPerMinute?: number; tokensPerDay?: number } headers?: { requests?: HeaderSnapshot; tokens?: HeaderSnapshot } loggedHeaders: boolean } @@ -25,7 +36,14 @@ const state = new Map() function ensure(providerID: ProviderID): State { const existing = state.get(providerID) if (existing) return existing - const next: State = { minute: [], day: [], learned: {}, loggedHeaders: false } + const next: State = { + minute: [], + day: [], + tokensMinute: [], + tokensDay: [], + learned: {}, + loggedHeaders: false, + } state.set(providerID, next) return next } @@ -34,13 +52,61 @@ function prune(s: State) { const now = Date.now() s.minute = s.minute.filter((t) => t > now - 60_000) s.day = s.day.filter((t) => t > now - 86_400_000) + s.tokensMinute = s.tokensMinute.filter((e) => e.t > now - 60_000) + s.tokensDay = s.tokensDay.filter((e) => e.t > now - 86_400_000) +} + +function sumTokens(entries: TokenEntry[]): number { + let total = 0 + for (const e of entries) total += e.count + return total +} + +export function estimateRequestTokens(body: unknown): number { + if (body == null) return 0 + try { + if (typeof body === "string") return estimateTokens(body) + if (body instanceof Uint8Array) return estimateTokens(new TextDecoder().decode(body)) + return estimateTokens(JSON.stringify(body)) + } catch { + return 0 + } } -export function tick(providerID: ProviderID) { +export function tick(providerID: ProviderID, estimatedTokens = 0) { const s = ensure(providerID) const now = Date.now() s.minute.push(now) s.day.push(now) + if (estimatedTokens > 0) { + const entry: TokenEntry = { t: now, count: estimatedTokens, pending: true } + s.tokensMinute.push(entry) + s.tokensDay.push({ ...entry }) + } + prune(s) +} + +export function recordUsage(providerID: ProviderID, inputTokens: number, outputTokens: number) { + const s = ensure(providerID) + const actual = Math.max(0, Math.round((inputTokens ?? 0) + (outputTokens ?? 0))) + if (actual === 0) return + const replaceOldestPending = (entries: TokenEntry[]) => { + for (const e of entries) { + if (e.pending) { + e.count = actual + e.pending = false + return true + } + } + return false + } + const now = Date.now() + if (!replaceOldestPending(s.tokensMinute)) { + s.tokensMinute.push({ t: now, count: actual, pending: false }) + } + if (!replaceOldestPending(s.tokensDay)) { + s.tokensDay.push({ t: now, count: actual, pending: false }) + } prune(s) } @@ -71,7 +137,7 @@ function parseFamily(headers: Headers, family: Array<[string, string, string]>): return undefined } -function parseReset(value: string | null): number | undefined { +export function parseReset(value: string | null | undefined): number | undefined { if (!value) return undefined const asNumber = Number.parseFloat(value) if (!Number.isNaN(asNumber)) { @@ -105,18 +171,28 @@ export function onRateLimitError(providerID: ProviderID) { prune(s) const perMinute = s.minute.length const perDay = s.day.length - if (perMinute === 0 && perDay === 0) return - s.learned.perMinute = Math.max(s.learned.perMinute ?? 0, perMinute) - s.learned.perDay = Math.max(s.learned.perDay ?? 0, perDay) + const tokensPerMinute = Math.round(sumTokens(s.tokensMinute)) + const tokensPerDay = Math.round(sumTokens(s.tokensDay)) + if (perMinute === 0 && perDay === 0 && tokensPerMinute === 0 && tokensPerDay === 0) return + if (perMinute > 0) s.learned.perMinute = Math.max(s.learned.perMinute ?? 0, perMinute) + if (perDay > 0) s.learned.perDay = Math.max(s.learned.perDay ?? 0, perDay) + if (tokensPerMinute > 0) s.learned.tokensPerMinute = Math.max(s.learned.tokensPerMinute ?? 0, tokensPerMinute) + if (tokensPerDay > 0) s.learned.tokensPerDay = Math.max(s.learned.tokensPerDay ?? 0, tokensPerDay) try { - persistLearnedLimits(providerID, s.learned.perMinute, s.learned.perDay) - log.info("learned rate limit from 429", { providerID, perMinute, perDay }) + persistLearnedLimits(providerID, s.learned) + log.info("learned rate limit from 429", { + providerID, + perMinute, + perDay, + tokensPerMinute, + tokensPerDay, + }) } catch (e) { log.warn("failed to persist learned rate limit", { providerID, error: String(e) }) } } -function persistLearnedLimits(providerID: ProviderID, perMinute: number, perDay: number) { +function persistLearnedLimits(providerID: ProviderID, learned: State["learned"]) { const jsoncPath = path.join(Global.Path.config, "opencode.jsonc") if (fs.existsSync(jsoncPath)) { log.warn("opencode.jsonc detected; skipping learned-limit write to preserve comments", { @@ -131,8 +207,13 @@ function persistLearnedLimits(providerID: ProviderID, perMinute: number, perDay: data.provider[providerID] ??= {} data.provider[providerID].options ??= {} const rateLimit = (data.provider[providerID].options.rateLimit ??= {}) - rateLimit.perMinute = Math.max(rateLimit.perMinute ?? 0, perMinute) - rateLimit.perDay = Math.max(rateLimit.perDay ?? 0, perDay) + if (learned.perMinute !== undefined) + rateLimit.perMinute = Math.max(rateLimit.perMinute ?? 0, learned.perMinute) + if (learned.perDay !== undefined) rateLimit.perDay = Math.max(rateLimit.perDay ?? 0, learned.perDay) + if (learned.tokensPerMinute !== undefined) + rateLimit.tokensPerMinute = Math.max(rateLimit.tokensPerMinute ?? 0, learned.tokensPerMinute) + if (learned.tokensPerDay !== undefined) + rateLimit.tokensPerDay = Math.max(rateLimit.tokensPerDay ?? 0, learned.tokensPerDay) fs.writeFileSync(jsonPath, JSON.stringify(data, null, 2) + "\n") } @@ -146,9 +227,156 @@ function readJsonSafe(p: string): Record { return raw.trim() === "" ? {} : JSON.parse(raw) } +export function configure( + providerID: ProviderID, + limits: { perMinute?: number; perDay?: number; tokensPerMinute?: number; tokensPerDay?: number } | undefined, +) { + if (!limits) return + const s = ensure(providerID) + if (limits.perMinute !== undefined && limits.perMinute > 0) + s.learned.perMinute = Math.max(s.learned.perMinute ?? 0, limits.perMinute) + if (limits.perDay !== undefined && limits.perDay > 0) + s.learned.perDay = Math.max(s.learned.perDay ?? 0, limits.perDay) + if (limits.tokensPerMinute !== undefined && limits.tokensPerMinute > 0) + s.learned.tokensPerMinute = Math.max(s.learned.tokensPerMinute ?? 0, limits.tokensPerMinute) + if (limits.tokensPerDay !== undefined && limits.tokensPerDay > 0) + s.learned.tokensPerDay = Math.max(s.learned.tokensPerDay ?? 0, limits.tokensPerDay) +} + +export type GateReason = "requests-minute" | "requests-day" | "tokens-minute" | "tokens-day" + +export type Gate = + | { ok: true } + | { + ok: false + reason: GateReason + limit: number + current: number + resetAt: number + } + +export const RateLimitError = NamedError.create( + "RateLimitError", + z.object({ + providerID: z.string(), + reason: z.enum(["requests-minute", "requests-day", "tokens-minute", "tokens-day"]), + limit: z.number(), + current: z.number(), + resetAt: z.number(), + message: z.string(), + }), +) +export type RateLimitError = z.infer + +export function formatGateMessage(providerID: ProviderID, gate: Extract): string { + const seconds = Math.max(1, Math.ceil((gate.resetAt - Date.now()) / 1000)) + const labels: Record = { + "requests-minute": "requests/minute", + "requests-day": "requests/day", + "tokens-minute": "tokens/minute", + "tokens-day": "tokens/day", + } + return `Rate limit on ${providerID}: ${gate.current}/${gate.limit} ${labels[gate.reason]}. Retry in ${seconds}s.` +} + +function minuteResetAt(timestamps: number[]): number { + if (timestamps.length === 0) return Date.now() + 60_000 + return timestamps[0]! + 60_000 +} + +function dayResetAt(timestamps: number[]): number { + if (timestamps.length === 0) return Date.now() + 86_400_000 + return timestamps[0]! + 86_400_000 +} + +function minuteResetAtTokens(entries: TokenEntry[]): number { + if (entries.length === 0) return Date.now() + 60_000 + return entries[0]!.t + 60_000 +} + +function dayResetAtTokens(entries: TokenEntry[]): number { + if (entries.length === 0) return Date.now() + 86_400_000 + return entries[0]!.t + 86_400_000 +} + +export function check(providerID: ProviderID, estimatedTokens = 0): Gate { + const s = ensure(providerID) + prune(s) + + const effective = { + perMinute: s.learned.perMinute, + perDay: s.learned.perDay, + tokensPerMinute: s.learned.tokensPerMinute, + tokensPerDay: s.learned.tokensPerDay, + } + + // Server-advertised remaining takes precedence if it would trip sooner + const hdrReq = s.headers?.requests + if (hdrReq?.remaining !== undefined && hdrReq.remaining <= 0) { + return { + ok: false, + reason: "requests-minute", + limit: hdrReq.limit ?? s.minute.length, + current: s.minute.length, + resetAt: hdrReq.resetAt ?? minuteResetAt(s.minute), + } + } + const hdrTok = s.headers?.tokens + if (hdrTok?.remaining !== undefined && hdrTok.remaining < estimatedTokens) { + return { + ok: false, + reason: "tokens-minute", + limit: hdrTok.limit ?? Math.round(sumTokens(s.tokensMinute)), + current: Math.round(sumTokens(s.tokensMinute)), + resetAt: hdrTok.resetAt ?? minuteResetAtTokens(s.tokensMinute), + } + } + + if (effective.perMinute && s.minute.length + 1 > effective.perMinute) { + return { + ok: false, + reason: "requests-minute", + limit: effective.perMinute, + current: s.minute.length, + resetAt: minuteResetAt(s.minute), + } + } + if (effective.perDay && s.day.length + 1 > effective.perDay) { + return { + ok: false, + reason: "requests-day", + limit: effective.perDay, + current: s.day.length, + resetAt: dayResetAt(s.day), + } + } + if (effective.tokensPerMinute && sumTokens(s.tokensMinute) + estimatedTokens > effective.tokensPerMinute) { + return { + ok: false, + reason: "tokens-minute", + limit: effective.tokensPerMinute, + current: Math.round(sumTokens(s.tokensMinute)), + resetAt: minuteResetAtTokens(s.tokensMinute), + } + } + if (effective.tokensPerDay && sumTokens(s.tokensDay) + estimatedTokens > effective.tokensPerDay) { + return { + ok: false, + reason: "tokens-day", + limit: effective.tokensPerDay, + current: Math.round(sumTokens(s.tokensDay)), + resetAt: dayResetAtTokens(s.tokensDay), + } + } + + return { ok: true } +} + export type Snapshot = { minute: { count: number; limit?: number } day: { count: number; limit?: number } + tokensMinute: { count: number; limit?: number } + tokensDay: { count: number; limit?: number } headers?: { requests?: HeaderSnapshot; tokens?: HeaderSnapshot } } @@ -158,10 +386,18 @@ export function snapshot(providerID: ProviderID): Snapshot { return { minute: { count: s.minute.length, limit: s.learned.perMinute }, day: { count: s.day.length, limit: s.learned.perDay }, + tokensMinute: { count: Math.round(sumTokens(s.tokensMinute)), limit: s.learned.tokensPerMinute }, + tokensDay: { count: Math.round(sumTokens(s.tokensDay)), limit: s.learned.tokensPerDay }, headers: s.headers, } } +export function snapshotAll(): Record { + const result: Record = {} + for (const id of state.keys()) result[id] = snapshot(id) + return result +} + export function reset(providerID?: ProviderID) { if (providerID) { state.delete(providerID) diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 20528763b8b1..2739c714d073 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -56,6 +56,8 @@ export const APIError = NamedError.create( }), ) export type APIError = z.infer +import { RateLimitError } from "@/provider/rate-limit" +export { RateLimitError } export const ContextOverflowError = NamedError.create( "ContextOverflowError", z.object({ message: z.string(), responseBody: z.string().optional() }), @@ -966,6 +968,22 @@ export function fromError( ).toObject() case OutputLengthError.isInstance(e): return e + case RateLimitError.isInstance(e): + return new APIError( + { + message: e.data.message, + statusCode: 429, + isRetryable: true, + metadata: { + providerID: e.data.providerID, + reason: e.data.reason, + limit: String(e.data.limit), + current: String(e.data.current), + resetAt: String(e.data.resetAt), + }, + }, + { cause: e }, + ).toObject() case LoadAPIKeyError.isInstance(e): return new AuthError( { diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 21f9329c6fce..7b564f89e2bc 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -16,6 +16,7 @@ import { SessionRetry } from "./retry" import { SessionStatus } from "./status" import { SessionSummary } from "./summary" import type { Provider } from "@/provider" +import { RateLimit } from "@/provider/rate-limit" import { Question } from "@/question" import { errorMessage } from "@/util/error" import { Log } from "@/util" @@ -360,6 +361,11 @@ export const layer: Layer.Layer< usage: value.usage, metadata: value.providerMetadata, }) + RateLimit.recordUsage( + ctx.model.providerID, + Number(value.usage?.inputTokens ?? 0), + Number(value.usage?.outputTokens ?? 0), + ) ctx.assistantMessage.finish = value.finishReason ctx.assistantMessage.cost += usage.cost ctx.assistantMessage.tokens = usage.tokens diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 12fd4d345d06..524b9eac7213 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -18,9 +18,16 @@ function cap(ms: number) { return Math.min(ms, RETRY_MAX_DELAY) } -export function delay(attempt: number, error?: MessageV2.APIError) { +export function delay(attempt: number, error?: MessageV2.APIError | MessageV2.RateLimitError) { + if (error && "resetAt" in (error as any).data) { + const resetAt = Number((error as any).data.resetAt) + if (Number.isFinite(resetAt)) { + const wait = resetAt - Date.now() + if (wait > 0) return cap(wait) + } + } if (error) { - const headers = error.data.responseHeaders + const headers = (error as MessageV2.APIError).data.responseHeaders if (headers) { const retryAfterMs = headers["retry-after-ms"] if (retryAfterMs) { @@ -54,6 +61,9 @@ export function delay(attempt: number, error?: MessageV2.APIError) { export function retryable(error: Err) { // context overflow errors should not be retried if (MessageV2.ContextOverflowError.isInstance(error)) return undefined + if (MessageV2.RateLimitError.isInstance(error)) { + return error.data.message + } if (MessageV2.APIError.isInstance(error)) { const status = error.data.statusCode // 5xx errors are transient server failures and should always be retried, @@ -113,7 +123,10 @@ export function policy(opts: { const message = retryable(error) if (!message) return Cause.done(meta.attempt) return Effect.gen(function* () { - const wait = delay(meta.attempt, MessageV2.APIError.isInstance(error) ? error : undefined) + const wait = delay( + meta.attempt, + MessageV2.APIError.isInstance(error) || MessageV2.RateLimitError.isInstance(error) ? error : undefined, + ) const now = yield* Clock.currentTimeMillis yield* opts.set({ attempt: meta.attempt, message, next: now + wait }) return [meta.attempt, Duration.millis(wait)] as [number, Duration.Duration] diff --git a/packages/opencode/test/rate-limit.test.ts b/packages/opencode/test/rate-limit.test.ts index b32681128716..a5686c42cbe9 100644 --- a/packages/opencode/test/rate-limit.test.ts +++ b/packages/opencode/test/rate-limit.test.ts @@ -76,4 +76,80 @@ describe("RateLimit", () => { if (fs.existsSync(jsoncPath)) fs.rmSync(jsoncPath) } }) + + test("check returns ok:true when no limits configured", () => { + const gate = RateLimit.check(provider, 1000) + expect(gate.ok).toBe(true) + }) + + test("check trips on requests-minute when at limit", () => { + RateLimit.configure(provider, { perMinute: 3 }) + RateLimit.tick(provider) + RateLimit.tick(provider) + RateLimit.tick(provider) + const gate = RateLimit.check(provider) + expect(gate.ok).toBe(false) + if (!gate.ok) { + expect(gate.reason).toBe("requests-minute") + expect(gate.limit).toBe(3) + expect(gate.current).toBe(3) + expect(gate.resetAt).toBeGreaterThan(Date.now()) + } + }) + + test("check trips on tokens-minute when estimate would exceed limit", () => { + RateLimit.configure(provider, { tokensPerMinute: 1000 }) + RateLimit.tick(provider, 900) + const gate = RateLimit.check(provider, 200) + expect(gate.ok).toBe(false) + if (!gate.ok) { + expect(gate.reason).toBe("tokens-minute") + expect(gate.limit).toBe(1000) + } + }) + + test("check respects tokens-day limit", () => { + RateLimit.configure(provider, { tokensPerDay: 500 }) + RateLimit.tick(provider, 400) + const gate = RateLimit.check(provider, 200) + expect(gate.ok).toBe(false) + if (!gate.ok) expect(gate.reason).toBe("tokens-day") + }) + + test("recordUsage replaces the oldest pending token estimate", () => { + RateLimit.tick(provider, 500) + RateLimit.recordUsage(provider, 200, 50) + const snap = RateLimit.snapshot(provider) + expect(snap.tokensMinute.count).toBe(250) + }) + + test("estimateRequestTokens handles string, bytes, and objects", () => { + expect(RateLimit.estimateRequestTokens(undefined)).toBe(0) + expect(RateLimit.estimateRequestTokens("a".repeat(40))).toBe(10) + expect(RateLimit.estimateRequestTokens(new TextEncoder().encode("b".repeat(40)))).toBe(10) + expect(RateLimit.estimateRequestTokens({ content: "hello world" })).toBeGreaterThan(0) + }) + + test("onRateLimitError persists learned token limits", () => { + RateLimit.tick(provider, 1500) + RateLimit.tick(provider, 2500) + RateLimit.onRateLimitError(provider) + const written = JSON.parse(fs.readFileSync(configPath, "utf8")) + expect(written.provider[provider].options.rateLimit.tokensPerMinute).toBe(4000) + expect(written.provider[provider].options.rateLimit.tokensPerDay).toBe(4000) + }) + + test("check prefers server-advertised remaining when it's tighter", () => { + RateLimit.recordResponse( + provider, + new Headers({ + "x-ratelimit-limit-requests": "100", + "x-ratelimit-remaining-requests": "0", + "x-ratelimit-reset-requests": "30", + }), + ) + const gate = RateLimit.check(provider) + expect(gate.ok).toBe(false) + if (!gate.ok) expect(gate.reason).toBe("requests-minute") + }) })