Skip to content
Merged
Show file tree
Hide file tree
Changes from 39 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
dc3222d
nvvm ir integration
abhilash1910 Aug 26, 2025
028a294
add test
abhilash1910 Aug 27, 2025
a418f4b
remove nvvm error handling from utils
abhilash1910 Sep 1, 2025
92af3dd
use version dependent nvvm inclusion
abhilash1910 Sep 1, 2025
bdd1671
fix nvvm compilation flow and test
abhilash1910 Sep 1, 2025
f4151bb
Merge branch 'main' into nvvm
abhilash1910 Sep 1, 2025
dc9a4e3
refactor
abhilash1910 Sep 3, 2025
22d18b9
fix unwanted rebase
abhilash1910 Sep 3, 2025
0f7fda4
fix core linter errors
abhilash1910 Sep 3, 2025
9ed8051
refactor tests
abhilash1910 Sep 3, 2025
bccda47
refactor
abhilash1910 Sep 3, 2025
64436c1
refactor
abhilash1910 Sep 3, 2025
58317d0
ruff format
abhilash1910 Sep 4, 2025
caf4f22
ruff format
abhilash1910 Sep 4, 2025
88237bc
revert changes to cuda_utils
abhilash1910 Sep 4, 2025
5e2e137
new line
abhilash1910 Sep 4, 2025
0301a4d
fix CI rm list import
abhilash1910 Sep 5, 2025
28e2d4b
use noqa
abhilash1910 Sep 7, 2025
af1008f
format
abhilash1910 Sep 7, 2025
a85a44f
verify and skip 110
abhilash1910 Sep 8, 2025
0be06aa
add flags and lto
abhilash1910 Sep 8, 2025
1c63a11
rename gpu-arch to arch
abhilash1910 Sep 15, 2025
cab6db0
change libnvvm version check
abhilash1910 Sep 15, 2025
3abcd38
format
abhilash1910 Sep 15, 2025
caf634c
compute 90
abhilash1910 Sep 15, 2025
64d19ab
Apply suggestions from code review
leofang Sep 15, 2025
d5d216c
Merge branch 'main' into nvvm
leofang Sep 15, 2025
c5993dc
update test
abhilash1910 Sep 16, 2025
5d5b1d3
use exception manager
abhilash1910 Sep 16, 2025
f6b5528
format
abhilash1910 Sep 16, 2025
c7fad0a
format ruff
abhilash1910 Sep 16, 2025
2e6e02b
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 16, 2025
680d790
add release notes
abhilash1910 Sep 16, 2025
c55fa59
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 16, 2025
2cbee7f
rectify quotes
abhilash1910 Sep 16, 2025
63e8d57
refix format
abhilash1910 Sep 16, 2025
94c2e56
refresh
abhilash1910 Sep 17, 2025
34bf2cc
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 17, 2025
6b130bb
user major minor
abhilash1910 Sep 17, 2025
d96c848
fix test
leofang Sep 17, 2025
fcd7c0c
Merge branch 'main' into nvvm
leofang Sep 17, 2025
8331ecf
fix IR - again
leofang Sep 17, 2025
2fa944e
fix nvvm option handling
leofang Sep 17, 2025
4d32276
remove redundant IR & fix linter
leofang Sep 17, 2025
e5b5ea4
avoid extra copy + ensure compiled objcode loadable
leofang Sep 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 157 additions & 10 deletions cuda_core/cuda/core/experimental/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import annotations

import weakref
from contextlib import contextmanager
from dataclasses import dataclass
from typing import TYPE_CHECKING, Union
from warnings import warn
Expand All @@ -20,13 +21,87 @@
_handle_boolean_option,
check_or_create_options,
driver,
get_binding_version,
handle_return,
is_nested_sequence,
is_sequence,
nvrtc,
)


@contextmanager
def _nvvm_exception_manager(self):
"""
Taken from _linker.py
"""
try:
yield
except Exception as e:
error_log = ""
if hasattr(self, "_mnff"):
try:
nvvm = _get_nvvm_module()
logsize = nvvm.get_program_log_size(self._mnff.handle)
if logsize > 1:
log = bytearray(logsize)
nvvm.get_program_log(self._mnff.handle, log)
error_log = log.decode("utf-8", errors="backslashreplace")
except Exception:
error_log = ""
# Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
# unfortunately we are still supporting Python 3.9/3.10...
e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
raise e


_nvvm_module = None
_nvvm_import_attempted = False


def _get_nvvm_module():
"""
Handles the import of NVVM module with version and availability checks.
NVVM bindings were added in cuda-bindings 12.9.0, so we need to handle cases where:
1. cuda.bindings is not new enough (< 12.9.0)
2. libnvvm is not found in the Python environment

Returns:
The nvvm module if available and working

Raises:
RuntimeError: If NVVM is not available due to version or library issues
"""
global _nvvm_module, _nvvm_import_attempted

if _nvvm_import_attempted:
if _nvvm_module is None:
raise RuntimeError("NVVM module is not available (previous import attempt failed)")
return _nvvm_module

_nvvm_import_attempted = True

try:
version = get_binding_version()
if version < (12, 9):
raise RuntimeError(
f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. "
"Please update cuda-bindings to use NVVM features."
)

from cuda.bindings import nvvm
from cuda.bindings._internal.nvvm import _inspect_function_pointer

if _inspect_function_pointer("__nvvmCreateProgram") == 0:
raise RuntimeError("NVVM library (libnvvm) is not available in this Python environment. ")

_nvvm_module = nvvm
return _nvvm_module

except RuntimeError as e:
_nvvm_module = None
raise e


def _process_define_macro_inner(formatted_options, macro):
if isinstance(macro, str):
formatted_options.append(f"--define-macro={macro}")
Expand Down Expand Up @@ -229,11 +304,10 @@ def __post_init__(self):

self._formatted_options = []
if self.arch is not None:
self._formatted_options.append(f"--gpu-architecture={self.arch}")
self._formatted_options.append(f"-arch={self.arch}")
else:
self._formatted_options.append(
"--gpu-architecture=sm_" + "".join(f"{i}" for i in Device().compute_capability)
)
self.arch = f"sm_{Device().arch}"
self._formatted_options.append(f"-arch={self.arch}")
if self.relocatable_device_code is not None:
self._formatted_options.append(
f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}"
Expand Down Expand Up @@ -370,28 +444,33 @@ class Program:
code : Any
String of the CUDA Runtime Compilation program.
code_type : Any
String of the code type. Currently ``"ptx"`` and ``"c++"`` are supported.
String of the code type. Currently ``"ptx"``, ``"c++"``, and ``"nvvm"`` are supported.
options : ProgramOptions, optional
A ProgramOptions object to customize the compilation process.
See :obj:`ProgramOptions` for more information.
"""

class _MembersNeededForFinalize:
__slots__ = "handle"
__slots__ = "handle", "backend"

def __init__(self, program_obj, handle):
def __init__(self, program_obj, handle, backend):
self.handle = handle
self.backend = backend
weakref.finalize(program_obj, self.close)

def close(self):
if self.handle is not None:
handle_return(nvrtc.nvrtcDestroyProgram(self.handle))
if self.backend == "NVRTC":
handle_return(nvrtc.nvrtcDestroyProgram(self.handle))
elif self.backend == "NVVM":
nvvm = _get_nvvm_module()
nvvm.destroy_program(self.handle)
self.handle = None

__slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options")

def __init__(self, code, code_type, options: ProgramOptions = None):
self._mnff = Program._MembersNeededForFinalize(self, None)
self._mnff = Program._MembersNeededForFinalize(self, None, None)

self._options = options = check_or_create_options(ProgramOptions, options, "Program options")
code_type = code_type.lower()
Expand All @@ -402,6 +481,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
# TODO: allow tuples once NVIDIA/cuda-python#72 is resolved

self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], []))
self._mnff.backend = "NVRTC"
self._backend = "NVRTC"
self._linker = None

Expand All @@ -411,8 +491,22 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
)
self._backend = self._linker.backend

elif code_type == "nvvm":
if isinstance(code, str):
code = code.encode("utf-8")
elif not isinstance(code, (bytes, bytearray)):
raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray")

nvvm = _get_nvvm_module()
self._mnff.handle = nvvm.create_program()
self._mnff.backend = "NVVM"
nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode())
self._backend = "NVVM"
self._linker = None

else:
supported_code_types = ("c++", "ptx")
supported_code_types = ("c++", "ptx", "nvvm")
assert code_type not in supported_code_types, f"{code_type=}"
raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")

Expand All @@ -433,6 +527,33 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
ptxas_options=options.ptxas_options,
)

def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]:
"""Translate ProgramOptions to NVVM-specific compilation options."""
nvvm_options = []

assert options.arch is not None
arch = options.arch
if arch.startswith("sm_"):
arch = f"compute_{arch[3:]}"
nvvm_options.append(f"-arch={arch}")
if options.debug:
nvvm_options.append("-g")
if options.device_code_optimize is False:
nvvm_options.append("-opt=0")
elif options.device_code_optimize is True:
nvvm_options.append("-opt=3")

if options.ftz is not None:
nvvm_options.append(f"-ftz={'true' if options.ftz else 'false'}")
if options.prec_sqrt is not None:
nvvm_options.append(f"-prec-sqrt={'true' if options.prec_sqrt else 'false'}")
if options.prec_div is not None:
nvvm_options.append(f"-prec-div={'true' if options.prec_div else 'false'}")
if options.fma is not None:
nvvm_options.append(f"-fma={'true' if options.fma else 'false'}")

return nvvm_options

def close(self):
"""Destroy this program."""
if self._linker:
Expand Down Expand Up @@ -513,6 +634,32 @@ def compile(self, target_type, name_expressions=(), logs=None):

return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name)

elif self._backend == "NVVM":
if target_type not in ("ptx", "ltoir"):
raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"')

nvvm_options = self._translate_program_options_to_nvvm(self._options)
if target_type == "ltoir" and "-gen-lto" not in nvvm_options:
nvvm_options.append("-gen-lto")
nvvm = _get_nvvm_module()
with _nvvm_exception_manager(self):
nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options)
nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options)

size = nvvm.get_compiled_result_size(self._mnff.handle)
data = bytearray(size)
nvvm.get_compiled_result(self._mnff.handle, data)

if logs is not None:
logsize = nvvm.get_program_log_size(self._mnff.handle)
if logsize > 1:
log = bytearray(logsize)
nvvm.get_program_log(self._mnff.handle, log)
logs.write(log.decode("utf-8", errors="backslashreplace"))

data_bytes = bytes(data)
return ObjectCode._init(data_bytes, target_type, name=self._options.name)

supported_backends = ("nvJitLink", "driver")
if self._backend not in supported_backends:
raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})')
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/docs/source/release/0.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ New features

- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple.
- CUDA 13.x testing support through new ``test-cu13`` dependency group.

- Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``.

New examples
------------
Expand Down
Loading