flanglet · flanglet · Jun 12, 2026 · Jun 10, 2026
diff --git a/.github/scripts/msvc_build.cmd b/.github/scripts/msvc_build.cmd
@@ -0,0 +1,58 @@
+@echo off
+setlocal EnableExtensions
+
+if "%~4"=="" (
+  echo Usage: msvc_build.cmd ROOT OUT_DIR TARGET ARCH
+  echo TARGET is testTransforms or kanzi
+  exit /b 2
+)
+
+set "ROOT=%~f1"
+set "OUT_DIR=%~f2"
+set "TARGET=%~3"
+set "ARCH=%~4"
+set "OBJ_DIR=%OUT_DIR%\obj-%TARGET%-%ARCH%"
+
+for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do set "VSINSTALL=%%i"
+if "%VSINSTALL%"=="" (
+  echo Visual Studio installation not found
+  exit /b 1
+)
+
+call "%VSINSTALL%\Common7\Tools\VsDevCmd.bat" -arch=%ARCH% -host_arch=x64
+if errorlevel 1 exit /b %errorlevel%
+
+if not exist "%OUT_DIR%" mkdir "%OUT_DIR%"
+if not exist "%OBJ_DIR%" mkdir "%OBJ_DIR%"
+
+pushd "%ROOT%"
+if errorlevel 1 exit /b %errorlevel%
+
+git ls-files "src/*.cpp" "src/*/*.cpp" | findstr /v /i /c:"src/test/" /c:"src/app/" > "%OUT_DIR%\kanzi_lib_sources.rsp"
+if errorlevel 1 exit /b %errorlevel%
+
+git ls-files "src/*.cpp" "src/*/*.cpp" | findstr /v /i /c:"src/test/" > "%OUT_DIR%\kanzi_cli_sources.rsp"
+if errorlevel 1 exit /b %errorlevel%
+
+if /i "%TARGET%"=="testTransforms" goto build_testTransforms
+if /i "%TARGET%"=="kanzi" goto build_kanzi
+
+popd
+echo Unknown target: %TARGET%
+exit /b 2
+
+:build_testTransforms
+(
+  echo src\test\TestTransforms.cpp
+  type "%OUT_DIR%\kanzi_lib_sources.rsp"
+) > "%OUT_DIR%\test_transform_sources.rsp"
+cl /nologo /EHsc /MT /O2 /DNDEBUG /D_CRT_SECURE_NO_WARNINGS /DTestTransforms_main=main /GR- /std:c++17 /Zc:__cplusplus /I "%ROOT%\src" /Fo"%OBJ_DIR%\\" /Fe"%OUT_DIR%\testTransforms.exe" @"%OUT_DIR%\test_transform_sources.rsp"
+set "RC=%ERRORLEVEL%"
+popd
+exit /b %RC%
+
+:build_kanzi
+cl /nologo /EHsc /MT /O2 /DNDEBUG /D_CRT_SECURE_NO_WARNINGS /GR- /std:c++17 /Zc:__cplusplus /I "%ROOT%\src" /Fo"%OBJ_DIR%\\" /Fe"%OUT_DIR%\kanzi.exe" @"%OUT_DIR%\kanzi_cli_sources.rsp"
+set "RC=%ERRORLEVEL%"
+popd
+exit /b %RC%
diff --git a/.github/scripts/stream_roundtrip.py b/.github/scripts/stream_roundtrip.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+import argparse
+import hashlib
+import random
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+SAMPLE_SIZE = 4 * 1024 * 1024
+CHUNK_SIZE = 256 * 1024
+LEVELS = tuple(range(1, 10))
+
+
+def sha256(path):
+    digest = hashlib.sha256()
+
+    with path.open("rb") as source:
+        for chunk in iter(lambda: source.read(1024 * 1024), b""):
+            digest.update(chunk)
+
+    return digest.hexdigest()
+
+
+def write_random(path):
+    rng = random.Random(0x4B414E5A49)
+    remaining = SAMPLE_SIZE
+
+    with path.open("wb") as target:
+        while remaining > 0:
+            size = min(CHUNK_SIZE, remaining)
+            target.write(bytes(rng.getrandbits(8) for _ in range(size)))
+            remaining -= size
+
+
+def write_structured(path):
+    line = (
+        b"kanzi stream transform check 0123456789 "
+        b"abcdefghijklmnopqrstuvwxyz repeated fields ROLZ RANK SRT "
+        + b"A" * 96
+        + b"\n"
+    )
+    chunk = (line * ((CHUNK_SIZE // len(line)) + 1))[:CHUNK_SIZE]
+    remaining = SAMPLE_SIZE
+
+    with path.open("wb") as target:
+        while remaining > 0:
+            target.write(chunk[: min(CHUNK_SIZE, remaining)])
+            remaining -= CHUNK_SIZE
+
+
+def write_mixed(path):
+    rng = random.Random(0x53545245414D)
+    chunks = SAMPLE_SIZE // CHUNK_SIZE
+
+    with path.open("wb") as target:
+        for block in range(chunks):
+            mode = block % 4
+
+            if mode == 0:
+                data = bytes((idx + block) & 255 for idx in range(CHUNK_SIZE))
+            elif mode == 1:
+                data = bytes(65 + (((idx // 257) + block) % 26) for idx in range(CHUNK_SIZE))
+            elif mode == 2:
+                data = bytes(rng.getrandbits(8) for _ in range(CHUNK_SIZE))
+            else:
+                data = b"\0" * CHUNK_SIZE
+
+            target.write(data)
+
+
+def run(command):
+    print("+ " + " ".join(str(part) for part in command), flush=True)
+    subprocess.run(command, check=True)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("kanzi", type=Path)
+    args = parser.parse_args()
+
+    kanzi = args.kanzi.resolve()
+
+    if not kanzi.is_file():
+        raise SystemExit(f"Missing Kanzi executable: {kanzi}")
+
+    with tempfile.TemporaryDirectory(prefix="kanzi-stream-") as tmp_dir:
+        root = Path(tmp_dir)
+        samples = {
+            "random": root / "random-4m.bin",
+            "structured": root / "structured-4m.txt",
+            "mixed": root / "mixed-4m.bin",
+        }
+        writers = {
+            "random": write_random,
+            "structured": write_structured,
+            "mixed": write_mixed,
+        }
+
+        for name, writer in writers.items():
+            writer(samples[name])
+
+        for name, source in samples.items():
+            source_hash = sha256(source)
+
+            for level in LEVELS:
+                compressed = root / f"{name}-l{level}.knz"
+                decoded = root / f"{name}-l{level}.out"
+                run([str(kanzi), "-c", "-i", str(source), "-o", str(compressed), "-f", "-b", "1m", "-l", str(level), "-x32", "-j", "1", "-v", "1"])
+                run([str(kanzi), "-d", "-i", str(compressed), "-o", str(decoded), "-f", "-j", "1", "-v", "1"])
+                decoded_hash = sha256(decoded)
+
+                if decoded_hash != source_hash:
+                    raise SystemExit(f"SHA-256 mismatch for {name} level {level}")
+
+                print(
+                    f"{name} level {level}: {source.stat().st_size} => "
+                    f"{compressed.stat().st_size}, SHA-256 match",
+                    flush=True,
+                )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/scripts/transform_regression.py b/.github/scripts/transform_regression.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+import argparse
+import subprocess
+import tempfile
+from pathlib import Path
+
+
+COMMANDS = (
+    ("ROLZ", ("-type=ROLZ", "-noperf")),
+    ("SRT", ("-type=SRT", "-noperf")),
+    ("RANK", ("-type=RANK", "-noperf")),
+    ("all", ("-type=all", "-noperf")),
+)
+
+
+def print_tail(path, lines=80):
+    try:
+        content = path.read_text(errors="replace").splitlines()
+    except OSError as exc:
+        print(f"Could not read log {path}: {exc}")
+        return
+
+    tail = content[-lines:]
+
+    if tail:
+        print(f"--- tail of {path.name} ---")
+        print("\n".join(tail))
+        print("--- end tail ---")
+
+
+def run_case(executable, name, args, timeout):
+    log_file = Path(tempfile.gettempdir()) / f"kanzi-transform-{name}.log"
+    command = [str(executable), *args]
+    print("+ " + " ".join(command), flush=True)
+
+    with log_file.open("w", encoding="utf-8", errors="replace") as log:
+        try:
+            result = subprocess.run(
+                command,
+                stdout=log,
+                stderr=subprocess.STDOUT,
+                timeout=timeout,
+                check=False,
+            )
+            return result.returncode, log_file
+        except subprocess.TimeoutExpired:
+            log.write(f"\nTIMEOUT after {timeout} seconds\n")
+            return 124, log_file
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("executable", type=Path)
+    parser.add_argument("--expect", choices=("pass", "fail"), required=True)
+    parser.add_argument("--timeout", type=int, default=300)
+    args = parser.parse_args()
+
+    executable = args.executable.resolve()
+
+    if not executable.is_file():
+        raise SystemExit(f"Missing test executable: {executable}")
+
+    failures = []
+
+    for name, command_args in COMMANDS:
+        code, log_file = run_case(executable, name, command_args, args.timeout)
+        print(f"{name}: exit {code}, log {log_file}", flush=True)
+
+        if code != 0:
+            failures.append((name, code, log_file))
+            print_tail(log_file)
+
+    if args.expect == "pass":
+        if failures:
+            failed = ", ".join(f"{name}={code}" for name, code, _ in failures)
+            raise SystemExit(f"Expected all transform cases to pass, got {failed}")
+
+        print("All transform regression cases passed", flush=True)
+        return
+
+    if not failures:
+        raise SystemExit("Expected the baseline to reproduce at least one transform failure")
+
+    failed = ", ".join(f"{name}={code}" for name, code, _ in failures)
+    print(f"Baseline reproduced transform failure(s): {failed}", flush=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.github/workflows/kanzi-quality.yml b/.github/workflows/kanzi-quality.yml
@@ -0,0 +1,97 @@
+name: Kanzi quality
+
+on:
+  push:
+    branches: [ master ]
+  pull_request:
+    branches: [ master ]
+  workflow_dispatch:
+    inputs:
+      baseline_ref:
+        description: Known-bad ref used by the MSVC contrast job
+        required: false
+        default: f583957239707a6913a2a9994527bb1d3f623aa7
+
+env:
+  KANZI_REGRESSION_BASE: f583957239707a6913a2a9994527bb1d3f623aa7
+
+jobs:
+  windows-regression-contrast:
+    name: windows-2025-vs2026 / msvc x64 baseline contrast
+    runs-on: windows-2025-vs2026
+    steps:
+      - uses: actions/checkout@v6.0.1
+        with:
+          fetch-depth: 0
+      - name: Select known-bad baseline
+        shell: pwsh
+        run: |
+          $baseline = "${{ github.event.inputs.baseline_ref }}"
+          if ([string]::IsNullOrWhiteSpace($baseline)) {
+            $baseline = "${{ env.KANZI_REGRESSION_BASE }}"
+          }
+          "BASELINE_REF=$baseline" >> $env:GITHUB_ENV
+      - name: Build baseline and current testTransforms
+        shell: cmd
+        run: |
+          git worktree add "%RUNNER_TEMP%\baseline" "%BASELINE_REF%"
+          call .github\scripts\msvc_build.cmd "%RUNNER_TEMP%\baseline" "%RUNNER_TEMP%\baseline-msvc-x64" testTransforms x64
+          if errorlevel 1 exit /b %errorlevel%
+          call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-x64" testTransforms x64
+          if errorlevel 1 exit /b %errorlevel%
+      - name: Confirm baseline reproduces transform failure
+        shell: cmd
+        run: python .github\scripts\transform_regression.py --expect fail "%RUNNER_TEMP%\baseline-msvc-x64\testTransforms.exe"
+      - name: Confirm current head fixes transform failure
+        shell: cmd
+        run: python .github\scripts\transform_regression.py --expect pass "%RUNNER_TEMP%\head-msvc-x64\testTransforms.exe"
+
+  unix-transforms:
+    name: ${{ matrix.os }} / ${{ matrix.cxx }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-latest
+            cc: gcc
+            cxx: g++
+          - os: ubuntu-latest
+            cc: clang
+            cxx: clang++
+          - os: macos-latest
+            cc: clang
+            cxx: clang++
+    steps:
+      - uses: actions/checkout@v6.0.1
+      - name: Build testTransforms and kanzi
+        run: |
+          cd src
+          make clean
+          make testTransforms kanzi CC=${{ matrix.cc }} CXX=${{ matrix.cxx }}
+      - name: Run transform tests
+        run: python3 .github/scripts/transform_regression.py --expect pass bin/testTransforms
+      - name: Run stream round trips
+        run: python3 .github/scripts/stream_roundtrip.py bin/kanzi
+
+  windows-transforms:
+    name: windows-2025-vs2026 / msvc ${{ matrix.arch }}
+    runs-on: windows-2025-vs2026
+    strategy:
+      fail-fast: false
+      matrix:
+        arch: [ x64, x86 ]
+    steps:
+      - uses: actions/checkout@v6.0.1
+      - name: Build and run transform tests
+        shell: cmd
+        run: |
+          call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}" testTransforms ${{ matrix.arch }}
+          if errorlevel 1 exit /b %errorlevel%
+          python .github\scripts\transform_regression.py --expect pass "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}\testTransforms.exe"
+      - name: Build and run stream round trips
+        shell: cmd
+        run: |
+          call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}" kanzi ${{ matrix.arch }}
+          if errorlevel 1 exit /b %errorlevel%
+          python .github\scripts\stream_roundtrip.py "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}\kanzi.exe"