OpenSIN-Code · Delqhi · May 30, 2026 · May 30, 2026 · May 30, 2026
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -0,0 +1,25 @@
+name: release
+
+on:
+  push:
+    tags: ["v*"]
+
+permissions:
+  contents: read
+  id-token: write  # required for PyPI Trusted Publishing (no API token needed)
+
+jobs:
+  build-and-publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - name: Build distribution
+        run: |
+          python -m pip install --upgrade build hatchling
+          python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/.github/workflows/sin-verify.yml b/.github/workflows/sin-verify.yml
@@ -0,0 +1,33 @@
+name: sin-verify
+
+on:
+  pull_request:
+    branches: ["main"]
+
+jobs:
+  verify:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install SIN-Code Bundle
+        run: pip install "sin-code-bundle[dev]"
+
+      - name: Run test suite
+        run: pytest -q
+
+      - name: Audit chain integrity
+        run: |
+          # Passes if no audit log exists yet (clean repo).
+          python -c "
+          from pathlib import Path
+          from sin_code_bundle.policy import AuditLog
+          ok = AuditLog(Path('.')).verify_chain()
+          print('Audit chain:', 'intact' if ok else 'TAMPERED')
+          raise SystemExit(0 if ok else 1)
+          "
diff --git a/.opencode/plugin/package.json b/.opencode/plugin/package.json
@@ -0,0 +1,9 @@
+{
+  "name": "sin-opencode-plugin",
+  "version": "0.1.0",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "@opencode-ai/plugin": "^0.4.0"
+  }
+}
diff --git a/.opencode/plugin/sin.ts b/.opencode/plugin/sin.ts
@@ -0,0 +1,236 @@
+/**
+ * SIN-Code Bundle — opencode plugin
+ *
+ * Turns the AGENTS.md doctrine into an *enforced* protocol:
+ *   - after every file edit  -> run semantic_diff + architectural_debt
+ *   - before a session ends   -> require a GREEN Oracle verification
+ *   - on a tripped ADW breaker -> hard-stop the agent
+ *
+ * Docs: https://opencode.ai/docs/plugins
+ *
+ * The plugin talks to the SIN MCP tools that opencode already loaded via
+ * `opencode.json` (mcp.sin). It does not shell out to `sin` itself; instead it
+ * reads/writes a small session ledger under `.sin/session/` so the gate state
+ * survives across tool calls.
+ */
+
+import type { Plugin } from "@opencode-ai/plugin"
+import { mkdir, readFile, writeFile } from "node:fs/promises"
+import { join } from "node:path"
+
+// --------------------------------------------------------------------------- //
+// Config (overridable via env)
+// --------------------------------------------------------------------------- //
+const SIN_DIR = ".sin"
+const SESSION_DIR = join(SIN_DIR, "session")
+const LEDGER = join(SESSION_DIR, "gate.json")
+
+const RISK_BLOCK_LEVEL = (process.env.SIN_RISK_BLOCK ?? "high").toLowerCase()
+const DEBT_BREAKER = Number(process.env.SIN_DEBT_BREAKER ?? "85") // 0-100
+const ENFORCE = (process.env.SIN_ENFORCE ?? "1") !== "0"
+
+type RiskLevel = "low" | "medium" | "high"
+
+interface Ledger {
+  /** files edited but not yet verified green */
+  dirty: string[]
+  /** last Oracle verdict: "pass" | "fail" | "unknown" */
+  oracle: "pass" | "fail" | "unknown"
+  /** last architectural debt score 0-100 */
+  debt: number
+  /** highest risk seen since last green verification */
+  risk: RiskLevel
+  /** human-readable reasons accumulated for the current gate */
+  notes: string[]
+  updatedAt: string
+}
+
+const EMPTY_LEDGER: Ledger = {
+  dirty: [],
+  oracle: "unknown",
+  debt: 0,
+  risk: "low",
+  notes: [],
+  updatedAt: new Date(0).toISOString(),
+}
+
+// --------------------------------------------------------------------------- //
+// Ledger persistence
+// --------------------------------------------------------------------------- //
+async function readLedger(): Promise<Ledger> {
+  try {
+    const raw = await readFile(LEDGER, "utf8")
+    return { ...EMPTY_LEDGER, ...(JSON.parse(raw) as Partial<Ledger>) }
+  } catch {
+    return { ...EMPTY_LEDGER }
+  }
+}
+
+async function writeLedger(ledger: Ledger): Promise<void> {
+  ledger.updatedAt = new Date().toISOString()
+  await mkdir(SESSION_DIR, { recursive: true })
+  await writeFile(LEDGER, JSON.stringify(ledger, null, 2), "utf8")
+}
+
+const RISK_ORDER: Record<RiskLevel, number> = { low: 0, medium: 1, high: 2 }
+function maxRisk(a: RiskLevel, b: RiskLevel): RiskLevel {
+  return RISK_ORDER[a] >= RISK_ORDER[b] ? a : b
+}
+
+// --------------------------------------------------------------------------- //
+// Helpers to call the SIN MCP tools through the opencode client
+// --------------------------------------------------------------------------- //
+async function callSin(
+  client: any,
+  tool: string,
+  args: Record<string, unknown>,
+): Promise<any> {
+  try {
+    return await client.tool.call({ server: "sin", tool, arguments: args })
+  } catch (err) {
+    // Subsystem may be unavailable (graceful degradation). Never crash the agent.
+    return { ok: false, error: String(err) }
+  }
+}
+
+function parseRisk(result: any): RiskLevel {
+  const r = String(result?.risk ?? result?.risk_level ?? "low").toLowerCase()
+  if (r === "high" || r === "critical") return "high"
+  if (r === "medium" || r === "moderate") return "medium"
+  return "low"
+}
+
+function parseDebt(result: any): number {
+  const d = Number(result?.score ?? result?.debt ?? result?.complexity ?? 0)
+  return Number.isFinite(d) ? d : 0
+}
+
+function parseOracle(result: any): "pass" | "fail" | "unknown" {
+  const v = String(result?.verdict ?? result?.status ?? "").toLowerCase()
+  if (v === "pass" || v === "passed" || v === "green" || result?.ok === true)
+    return "pass"
+  if (v === "fail" || v === "failed" || v === "red" || result?.ok === false)
+    return "fail"
+  return "unknown"
+}
+
+// --------------------------------------------------------------------------- //
+// Plugin
+// --------------------------------------------------------------------------- //
+export const SinPlugin: Plugin = async ({ client, $ }) => {
+  return {
+    /**
+     * After any file edit: assess the change semantically and update debt.
+     * This is the "review" + "guard debt" steps of the SIN loop, automated.
+     */
+    "file.edited": async ({ file }) => {
+      if (!file) return
+      const ledger = await readLedger()
+
+      // 1) semantic diff against git HEAD for this file
+      const diff = await callSin(client, "semantic_diff", {
+        file_a: `git:HEAD:${file}`,
+        file_b: file,
+      })
+      const risk = parseRisk(diff)
+      ledger.risk = maxRisk(ledger.risk, risk)
+
+      // 2) architectural debt snapshot
+      const debt = await callSin(client, "architectural_debt", {})
+      ledger.debt = parseDebt(debt)
+
+      // any edit invalidates the previous green verification
+      ledger.oracle = "unknown"
+      if (!ledger.dirty.includes(file)) ledger.dirty.push(file)
+
+      const note = `edited ${file} (risk=${risk}, debt=${ledger.debt})`
+      ledger.notes.push(note)
+      await writeLedger(ledger)
+
+      // 3) ADW breaker: hard stop
+      if (ENFORCE && ledger.debt >= DEBT_BREAKER) {
+        throw new Error(
+          `[SIN] ADW breaker tripped: debt ${ledger.debt} >= ${DEBT_BREAKER}. ` +
+            `Stop adding code and refactor. Re-run architectural_debt after refactor.`,
+        )
+      }
+
+      // 4) risk gate: warn loudly (does not stop the edit, stops "done")
+      if (RISK_ORDER[risk] >= RISK_ORDER[RISK_BLOCK_LEVEL as RiskLevel]) {
+        await client.session.log?.({
+          level: "warn",
+          message:
+            `[SIN] High-risk change in ${file}. Justify it and run ` +
+            `verify_tests before reporting done.`,
+        })
+      }
+    },
+
+    /**
+     * Before a tool runs: if the agent tries to "finish" while the gate is not
+     * green, intercept and force a verification first.
+     */
+    "tool.execute.before": async ({ tool }, output) => {
+      if (!ENFORCE) return
+      const name = (tool ?? "").toLowerCase()
+      const isFinishSignal =
+        name.includes("done") ||
+        name.includes("finish") ||
+        name.includes("complete")
+      if (!isFinishSignal) return
+
+      const ledger = await readLedger()
+      if (ledger.dirty.length === 0) return
+
+      if (ledger.oracle !== "pass") {
+        throw new Error(
+          `[SIN] Cannot report done: Oracle verification is "${ledger.oracle}". ` +
+            `Files awaiting green verification: ${ledger.dirty.join(", ")}. ` +
+            `Run the SIN "verify_tests" tool until it returns pass.`,
+        )
+      }
+      // gate is green -> reset ledger for next task
+      await writeLedger({ ...EMPTY_LEDGER })
+    },
+
+    /**
+     * After a verification tool runs: record the Oracle verdict so the finish
+     * gate can open. We watch for verify_tests / prove / verify_change results.
+     */
+    "tool.execute.after": async ({ tool }, output) => {
+      const name = (tool ?? "").toLowerCase()
+      const isVerify =
+        name.includes("verify") || name.includes("prove") || name.includes("oracle")
+      if (!isVerify) return
+
+      const ledger = await readLedger()
+      const verdict = parseOracle(output?.result ?? output)
+      ledger.oracle = verdict
+      if (verdict === "pass") {
+        ledger.dirty = []
+        ledger.risk = "low"
+        ledger.notes.push("oracle: PASS")
+      } else if (verdict === "fail") {
+        ledger.notes.push("oracle: FAIL")
+      }
+      await writeLedger(ledger)
+    },
+
+    /**
+     * Session idle: gentle reminder if there is unverified work on the table.
+     */
+    "session.idle": async () => {
+      const ledger = await readLedger()
+      if (ledger.dirty.length > 0 && ledger.oracle !== "pass") {
+        await client.session.log?.({
+          level: "info",
+          message:
+            `[SIN] ${ledger.dirty.length} file(s) edited without a green ` +
+            `verification. Run verify_tests before finishing.`,
+        })
+      }
+    },
+  }
+}
+
+export default SinPlugin
diff --git a/BENCHMARKS.md b/BENCHMARKS.md
@@ -0,0 +1,43 @@
+# SIN-Code Benchmarks
+
+We measure one thing: **does exposing the SIN tools improve an agent's
+resolved-rate?** The harness (`sin bench`) runs the same task set twice — once
+with SIN tools disabled (`control`) and once enabled (`sin`) — and reports the
+delta in percentage points.
+
+## Reproduce
+
+```bash
+pip install "sin-code-bundle[bench]"
+
+# Smoke test (no LLM cost — validates the clone/apply/test pipeline)
+sin bench --runner dry --limit 5
+
+# Full A/B on SWE-bench Lite with opencode
+sin bench --runner opencode --limit 100 --out report.json
+```
+
+## Methodology
+
+- **Dataset:** SWE-bench Lite (`princeton-nlp/SWE-bench_Lite`, test split).
+- **Arms:** `control` (SIN_ENFORCE=0) vs `sin` (SIN_ENFORCE=1, MCP tools loaded).
+- **Resolved:** patch applies cleanly AND all FAIL_TO_PASS tests pass.
+- **Isolation:** each task runs in a fresh git clone at `base_commit`.
+
+## Results
+
+| Arm | Resolved | Rate | Mean time |
+|-----|----------|------|-----------|
+| control | *TBD* | *TBD* | *TBD* |
+| sin | *TBD* | *TBD* | *TBD* |
+| **delta** | | ***TBD* pp** | |
+
+> Fill this table from `report.json` after a full run and commit the
+> `report.json` alongside the version tag so results are auditable.
+
+## Interpretation
+
+A positive delta means the SIN tools (impact analysis, semantic diff, Oracle
+verification) caused the agent to produce more correct patches. The harness is
+runner-agnostic — the same JSON report can compare opencode, codex, and hermes
+on identical tasks.