Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions .github/scripts/msvc_build.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
@echo off
setlocal EnableExtensions

if "%~4"=="" (
echo Usage: msvc_build.cmd ROOT OUT_DIR TARGET ARCH
echo TARGET is testTransforms or kanzi
exit /b 2
)

set "ROOT=%~f1"
set "OUT_DIR=%~f2"
set "TARGET=%~3"
set "ARCH=%~4"
set "OBJ_DIR=%OUT_DIR%\obj-%TARGET%-%ARCH%"

for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath`) do set "VSINSTALL=%%i"
if "%VSINSTALL%"=="" (
echo Visual Studio installation not found
exit /b 1
)

call "%VSINSTALL%\Common7\Tools\VsDevCmd.bat" -arch=%ARCH% -host_arch=x64
if errorlevel 1 exit /b %errorlevel%

if not exist "%OUT_DIR%" mkdir "%OUT_DIR%"
if not exist "%OBJ_DIR%" mkdir "%OBJ_DIR%"

pushd "%ROOT%"
if errorlevel 1 exit /b %errorlevel%

git ls-files "src/*.cpp" "src/*/*.cpp" | findstr /v /i /c:"src/test/" /c:"src/app/" > "%OUT_DIR%\kanzi_lib_sources.rsp"
if errorlevel 1 exit /b %errorlevel%

git ls-files "src/*.cpp" "src/*/*.cpp" | findstr /v /i /c:"src/test/" > "%OUT_DIR%\kanzi_cli_sources.rsp"
if errorlevel 1 exit /b %errorlevel%

if /i "%TARGET%"=="testTransforms" goto build_testTransforms
if /i "%TARGET%"=="kanzi" goto build_kanzi

popd
echo Unknown target: %TARGET%
exit /b 2

:build_testTransforms
(
echo src\test\TestTransforms.cpp
type "%OUT_DIR%\kanzi_lib_sources.rsp"
) > "%OUT_DIR%\test_transform_sources.rsp"
cl /nologo /EHsc /MT /O2 /DNDEBUG /D_CRT_SECURE_NO_WARNINGS /DTestTransforms_main=main /GR- /std:c++17 /Zc:__cplusplus /I "%ROOT%\src" /Fo"%OBJ_DIR%\\" /Fe"%OUT_DIR%\testTransforms.exe" @"%OUT_DIR%\test_transform_sources.rsp"
set "RC=%ERRORLEVEL%"
popd
exit /b %RC%

:build_kanzi
cl /nologo /EHsc /MT /O2 /DNDEBUG /D_CRT_SECURE_NO_WARNINGS /GR- /std:c++17 /Zc:__cplusplus /I "%ROOT%\src" /Fo"%OBJ_DIR%\\" /Fe"%OUT_DIR%\kanzi.exe" @"%OUT_DIR%\kanzi_cli_sources.rsp"
set "RC=%ERRORLEVEL%"
popd
exit /b %RC%
124 changes: 124 additions & 0 deletions .github/scripts/stream_roundtrip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/usr/bin/env python3
import argparse
import hashlib
import random
import subprocess
import tempfile
from pathlib import Path


SAMPLE_SIZE = 4 * 1024 * 1024
CHUNK_SIZE = 256 * 1024
LEVELS = tuple(range(1, 10))


def sha256(path):
digest = hashlib.sha256()

with path.open("rb") as source:
for chunk in iter(lambda: source.read(1024 * 1024), b""):
digest.update(chunk)

return digest.hexdigest()


def write_random(path):
rng = random.Random(0x4B414E5A49)
remaining = SAMPLE_SIZE

with path.open("wb") as target:
while remaining > 0:
size = min(CHUNK_SIZE, remaining)
target.write(bytes(rng.getrandbits(8) for _ in range(size)))
remaining -= size


def write_structured(path):
line = (
b"kanzi stream transform check 0123456789 "
b"abcdefghijklmnopqrstuvwxyz repeated fields ROLZ RANK SRT "
+ b"A" * 96
+ b"\n"
)
chunk = (line * ((CHUNK_SIZE // len(line)) + 1))[:CHUNK_SIZE]
remaining = SAMPLE_SIZE

with path.open("wb") as target:
while remaining > 0:
target.write(chunk[: min(CHUNK_SIZE, remaining)])
remaining -= CHUNK_SIZE


def write_mixed(path):
rng = random.Random(0x53545245414D)
chunks = SAMPLE_SIZE // CHUNK_SIZE

with path.open("wb") as target:
for block in range(chunks):
mode = block % 4

if mode == 0:
data = bytes((idx + block) & 255 for idx in range(CHUNK_SIZE))
elif mode == 1:
data = bytes(65 + (((idx // 257) + block) % 26) for idx in range(CHUNK_SIZE))
elif mode == 2:
data = bytes(rng.getrandbits(8) for _ in range(CHUNK_SIZE))
else:
data = b"\0" * CHUNK_SIZE

target.write(data)


def run(command):
print("+ " + " ".join(str(part) for part in command), flush=True)
subprocess.run(command, check=True)


def main():
parser = argparse.ArgumentParser()
parser.add_argument("kanzi", type=Path)
args = parser.parse_args()

kanzi = args.kanzi.resolve()

if not kanzi.is_file():
raise SystemExit(f"Missing Kanzi executable: {kanzi}")

with tempfile.TemporaryDirectory(prefix="kanzi-stream-") as tmp_dir:
root = Path(tmp_dir)
samples = {
"random": root / "random-4m.bin",
"structured": root / "structured-4m.txt",
"mixed": root / "mixed-4m.bin",
}
writers = {
"random": write_random,
"structured": write_structured,
"mixed": write_mixed,
}

for name, writer in writers.items():
writer(samples[name])

for name, source in samples.items():
source_hash = sha256(source)

for level in LEVELS:
compressed = root / f"{name}-l{level}.knz"
decoded = root / f"{name}-l{level}.out"
run([str(kanzi), "-c", "-i", str(source), "-o", str(compressed), "-f", "-b", "1m", "-l", str(level), "-x32", "-j", "1", "-v", "1"])
run([str(kanzi), "-d", "-i", str(compressed), "-o", str(decoded), "-f", "-j", "1", "-v", "1"])
decoded_hash = sha256(decoded)

if decoded_hash != source_hash:
raise SystemExit(f"SHA-256 mismatch for {name} level {level}")

print(
f"{name} level {level}: {source.stat().st_size} => "
f"{compressed.stat().st_size}, SHA-256 match",
flush=True,
)


if __name__ == "__main__":
main()
89 changes: 89 additions & 0 deletions .github/scripts/transform_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env python3
import argparse
import subprocess
import tempfile
from pathlib import Path


COMMANDS = (
("ROLZ", ("-type=ROLZ", "-noperf")),
("SRT", ("-type=SRT", "-noperf")),
("RANK", ("-type=RANK", "-noperf")),
("all", ("-type=all", "-noperf")),
)


def print_tail(path, lines=80):
try:
content = path.read_text(errors="replace").splitlines()
except OSError as exc:
print(f"Could not read log {path}: {exc}")
return

tail = content[-lines:]

if tail:
print(f"--- tail of {path.name} ---")
print("\n".join(tail))
print("--- end tail ---")


def run_case(executable, name, args, timeout):
log_file = Path(tempfile.gettempdir()) / f"kanzi-transform-{name}.log"
command = [str(executable), *args]
print("+ " + " ".join(command), flush=True)

with log_file.open("w", encoding="utf-8", errors="replace") as log:
try:
result = subprocess.run(
command,
stdout=log,
stderr=subprocess.STDOUT,
timeout=timeout,
check=False,
)
return result.returncode, log_file
except subprocess.TimeoutExpired:
log.write(f"\nTIMEOUT after {timeout} seconds\n")
return 124, log_file


def main():
parser = argparse.ArgumentParser()
parser.add_argument("executable", type=Path)
parser.add_argument("--expect", choices=("pass", "fail"), required=True)
parser.add_argument("--timeout", type=int, default=300)
args = parser.parse_args()

executable = args.executable.resolve()

if not executable.is_file():
raise SystemExit(f"Missing test executable: {executable}")

failures = []

for name, command_args in COMMANDS:
code, log_file = run_case(executable, name, command_args, args.timeout)
print(f"{name}: exit {code}, log {log_file}", flush=True)

if code != 0:
failures.append((name, code, log_file))
print_tail(log_file)

if args.expect == "pass":
if failures:
failed = ", ".join(f"{name}={code}" for name, code, _ in failures)
raise SystemExit(f"Expected all transform cases to pass, got {failed}")

print("All transform regression cases passed", flush=True)
return

if not failures:
raise SystemExit("Expected the baseline to reproduce at least one transform failure")

failed = ", ".join(f"{name}={code}" for name, code, _ in failures)
print(f"Baseline reproduced transform failure(s): {failed}", flush=True)


if __name__ == "__main__":
main()
97 changes: 97 additions & 0 deletions .github/workflows/kanzi-quality.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
name: Kanzi quality

on:
push:
branches: [ master ]
pull_request:
branches: [ master ]
workflow_dispatch:
inputs:
baseline_ref:
description: Known-bad ref used by the MSVC contrast job
required: false
default: f583957239707a6913a2a9994527bb1d3f623aa7

env:
KANZI_REGRESSION_BASE: f583957239707a6913a2a9994527bb1d3f623aa7

jobs:
windows-regression-contrast:
name: windows-2025-vs2026 / msvc x64 baseline contrast
runs-on: windows-2025-vs2026
steps:
- uses: actions/checkout@v6.0.1
with:
fetch-depth: 0
- name: Select known-bad baseline
shell: pwsh
run: |
$baseline = "${{ github.event.inputs.baseline_ref }}"
if ([string]::IsNullOrWhiteSpace($baseline)) {
$baseline = "${{ env.KANZI_REGRESSION_BASE }}"
}
"BASELINE_REF=$baseline" >> $env:GITHUB_ENV
- name: Build baseline and current testTransforms
shell: cmd
run: |
git worktree add "%RUNNER_TEMP%\baseline" "%BASELINE_REF%"
call .github\scripts\msvc_build.cmd "%RUNNER_TEMP%\baseline" "%RUNNER_TEMP%\baseline-msvc-x64" testTransforms x64
if errorlevel 1 exit /b %errorlevel%
call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-x64" testTransforms x64
if errorlevel 1 exit /b %errorlevel%
- name: Confirm baseline reproduces transform failure
shell: cmd
run: python .github\scripts\transform_regression.py --expect fail "%RUNNER_TEMP%\baseline-msvc-x64\testTransforms.exe"
- name: Confirm current head fixes transform failure
shell: cmd
run: python .github\scripts\transform_regression.py --expect pass "%RUNNER_TEMP%\head-msvc-x64\testTransforms.exe"

unix-transforms:
name: ${{ matrix.os }} / ${{ matrix.cxx }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
include:
- os: ubuntu-latest
cc: gcc
cxx: g++
- os: ubuntu-latest
cc: clang
cxx: clang++
- os: macos-latest
cc: clang
cxx: clang++
steps:
- uses: actions/checkout@v6.0.1
- name: Build testTransforms and kanzi
run: |
cd src
make clean
make testTransforms kanzi CC=${{ matrix.cc }} CXX=${{ matrix.cxx }}
- name: Run transform tests
run: python3 .github/scripts/transform_regression.py --expect pass bin/testTransforms
- name: Run stream round trips
run: python3 .github/scripts/stream_roundtrip.py bin/kanzi

windows-transforms:
name: windows-2025-vs2026 / msvc ${{ matrix.arch }}
runs-on: windows-2025-vs2026
strategy:
fail-fast: false
matrix:
arch: [ x64, x86 ]
steps:
- uses: actions/checkout@v6.0.1
- name: Build and run transform tests
shell: cmd
run: |
call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}" testTransforms ${{ matrix.arch }}
if errorlevel 1 exit /b %errorlevel%
python .github\scripts\transform_regression.py --expect pass "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}\testTransforms.exe"
- name: Build and run stream round trips
shell: cmd
run: |
call .github\scripts\msvc_build.cmd "%GITHUB_WORKSPACE%" "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}" kanzi ${{ matrix.arch }}
if errorlevel 1 exit /b %errorlevel%
python .github\scripts\stream_roundtrip.py "%RUNNER_TEMP%\head-msvc-${{ matrix.arch }}\kanzi.exe"
Loading