Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
dc3222d
nvvm ir integration
abhilash1910 Aug 26, 2025
028a294
add test
abhilash1910 Aug 27, 2025
a418f4b
remove nvvm error handling from utils
abhilash1910 Sep 1, 2025
92af3dd
use version dependent nvvm inclusion
abhilash1910 Sep 1, 2025
bdd1671
fix nvvm compilation flow and test
abhilash1910 Sep 1, 2025
f4151bb
Merge branch 'main' into nvvm
abhilash1910 Sep 1, 2025
dc9a4e3
refactor
abhilash1910 Sep 3, 2025
22d18b9
fix unwanted rebase
abhilash1910 Sep 3, 2025
0f7fda4
fix core linter errors
abhilash1910 Sep 3, 2025
9ed8051
refactor tests
abhilash1910 Sep 3, 2025
bccda47
refactor
abhilash1910 Sep 3, 2025
64436c1
refactor
abhilash1910 Sep 3, 2025
58317d0
ruff format
abhilash1910 Sep 4, 2025
caf4f22
ruff format
abhilash1910 Sep 4, 2025
88237bc
revert changes to cuda_utils
abhilash1910 Sep 4, 2025
5e2e137
new line
abhilash1910 Sep 4, 2025
0301a4d
fix CI rm list import
abhilash1910 Sep 5, 2025
28e2d4b
use noqa
abhilash1910 Sep 7, 2025
af1008f
format
abhilash1910 Sep 7, 2025
a85a44f
verify and skip 110
abhilash1910 Sep 8, 2025
0be06aa
add flags and lto
abhilash1910 Sep 8, 2025
1c63a11
rename gpu-arch to arch
abhilash1910 Sep 15, 2025
cab6db0
change libnvvm version check
abhilash1910 Sep 15, 2025
3abcd38
format
abhilash1910 Sep 15, 2025
caf634c
compute 90
abhilash1910 Sep 15, 2025
64d19ab
Apply suggestions from code review
leofang Sep 15, 2025
d5d216c
Merge branch 'main' into nvvm
leofang Sep 15, 2025
c5993dc
update test
abhilash1910 Sep 16, 2025
5d5b1d3
use exception manager
abhilash1910 Sep 16, 2025
f6b5528
format
abhilash1910 Sep 16, 2025
c7fad0a
format ruff
abhilash1910 Sep 16, 2025
2e6e02b
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 16, 2025
680d790
add release notes
abhilash1910 Sep 16, 2025
c55fa59
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 16, 2025
2cbee7f
rectify quotes
abhilash1910 Sep 16, 2025
63e8d57
refix format
abhilash1910 Sep 16, 2025
94c2e56
refresh
abhilash1910 Sep 17, 2025
34bf2cc
[pre-commit.ci] auto code formatting
pre-commit-ci[bot] Sep 17, 2025
6b130bb
user major minor
abhilash1910 Sep 17, 2025
d96c848
fix test
leofang Sep 17, 2025
fcd7c0c
Merge branch 'main' into nvvm
leofang Sep 17, 2025
8331ecf
fix IR - again
leofang Sep 17, 2025
2fa944e
fix nvvm option handling
leofang Sep 17, 2025
4d32276
remove redundant IR & fix linter
leofang Sep 17, 2025
e5b5ea4
avoid extra copy + ensure compiled objcode loadable
leofang Sep 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cuda_core/cuda/core/experimental/_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from cuda.core.experimental._stream import Stream
from cuda.core.experimental._utils.clear_error_support import (
assert_type,
assert_type_str_or_bytes,
assert_type_str_or_bytes_like,
raise_code_path_meant_to_be_unreachable,
)
from cuda.core.experimental._utils.cuda_utils import driver, get_binding_version, handle_return, precondition
Expand Down Expand Up @@ -615,14 +615,14 @@ def _lazy_load_module(self, *args, **kwargs):
if self._handle is not None:
return
module = self._module
assert_type_str_or_bytes(module)
assert_type_str_or_bytes_like(module)
if isinstance(module, str):
if self._backend_version == "new":
self._handle = handle_return(self._loader["file"](module.encode(), [], [], 0, [], [], 0))
else: # "old" backend
self._handle = handle_return(self._loader["file"](module.encode()))
return
if isinstance(module, bytes):
if isinstance(module, (bytes, bytearray)):
if self._backend_version == "new":
self._handle = handle_return(self._loader["data"](module, [], [], 0, [], [], 0))
else: # "old" backend
Expand Down
166 changes: 156 additions & 10 deletions cuda_core/cuda/core/experimental/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from __future__ import annotations

import weakref
from contextlib import contextmanager
from dataclasses import dataclass
from typing import TYPE_CHECKING, Union
from warnings import warn
Expand All @@ -20,13 +21,87 @@
_handle_boolean_option,
check_or_create_options,
driver,
get_binding_version,
handle_return,
is_nested_sequence,
is_sequence,
nvrtc,
)


@contextmanager
def _nvvm_exception_manager(self):
"""
Taken from _linker.py
"""
try:
yield
except Exception as e:
error_log = ""
if hasattr(self, "_mnff"):
try:
nvvm = _get_nvvm_module()
logsize = nvvm.get_program_log_size(self._mnff.handle)
if logsize > 1:
log = bytearray(logsize)
nvvm.get_program_log(self._mnff.handle, log)
error_log = log.decode("utf-8", errors="backslashreplace")
except Exception:
error_log = ""
# Starting Python 3.11 we could also use Exception.add_note() for the same purpose, but
# unfortunately we are still supporting Python 3.9/3.10...
e.args = (e.args[0] + (f"\nNVVM program log: {error_log}" if error_log else ""), *e.args[1:])
raise e


_nvvm_module = None
_nvvm_import_attempted = False


def _get_nvvm_module():
"""
Handles the import of NVVM module with version and availability checks.
NVVM bindings were added in cuda-bindings 12.9.0, so we need to handle cases where:
1. cuda.bindings is not new enough (< 12.9.0)
2. libnvvm is not found in the Python environment

Returns:
The nvvm module if available and working

Raises:
RuntimeError: If NVVM is not available due to version or library issues
"""
global _nvvm_module, _nvvm_import_attempted

if _nvvm_import_attempted:
if _nvvm_module is None:
raise RuntimeError("NVVM module is not available (previous import attempt failed)")
return _nvvm_module

_nvvm_import_attempted = True

try:
version = get_binding_version()
if version < (12, 9):
raise RuntimeError(
f"NVVM bindings require cuda-bindings >= 12.9.0, but found {version[0]}.{version[1]}.x. "
"Please update cuda-bindings to use NVVM features."
)

from cuda.bindings import nvvm
from cuda.bindings._internal.nvvm import _inspect_function_pointer

if _inspect_function_pointer("__nvvmCreateProgram") == 0:
raise RuntimeError("NVVM library (libnvvm) is not available in this Python environment. ")

_nvvm_module = nvvm
return _nvvm_module

except RuntimeError as e:
_nvvm_module = None
raise e


def _process_define_macro_inner(formatted_options, macro):
if isinstance(macro, str):
formatted_options.append(f"--define-macro={macro}")
Expand Down Expand Up @@ -229,11 +304,10 @@ def __post_init__(self):

self._formatted_options = []
if self.arch is not None:
self._formatted_options.append(f"--gpu-architecture={self.arch}")
self._formatted_options.append(f"-arch={self.arch}")
else:
self._formatted_options.append(
"--gpu-architecture=sm_" + "".join(f"{i}" for i in Device().compute_capability)
)
self.arch = f"sm_{Device().arch}"
self._formatted_options.append(f"-arch={self.arch}")
if self.relocatable_device_code is not None:
self._formatted_options.append(
f"--relocatable-device-code={_handle_boolean_option(self.relocatable_device_code)}"
Expand Down Expand Up @@ -370,28 +444,33 @@ class Program:
code : Any
String of the CUDA Runtime Compilation program.
code_type : Any
String of the code type. Currently ``"ptx"`` and ``"c++"`` are supported.
String of the code type. Currently ``"ptx"``, ``"c++"``, and ``"nvvm"`` are supported.
options : ProgramOptions, optional
A ProgramOptions object to customize the compilation process.
See :obj:`ProgramOptions` for more information.
"""

class _MembersNeededForFinalize:
__slots__ = "handle"
__slots__ = "handle", "backend"

def __init__(self, program_obj, handle):
def __init__(self, program_obj, handle, backend):
self.handle = handle
self.backend = backend
weakref.finalize(program_obj, self.close)

def close(self):
if self.handle is not None:
handle_return(nvrtc.nvrtcDestroyProgram(self.handle))
if self.backend == "NVRTC":
handle_return(nvrtc.nvrtcDestroyProgram(self.handle))
elif self.backend == "NVVM":
nvvm = _get_nvvm_module()
nvvm.destroy_program(self.handle)
self.handle = None

__slots__ = ("__weakref__", "_mnff", "_backend", "_linker", "_options")

def __init__(self, code, code_type, options: ProgramOptions = None):
self._mnff = Program._MembersNeededForFinalize(self, None)
self._mnff = Program._MembersNeededForFinalize(self, None, None)

self._options = options = check_or_create_options(ProgramOptions, options, "Program options")
code_type = code_type.lower()
Expand All @@ -402,6 +481,7 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
# TODO: allow tuples once NVIDIA/cuda-python#72 is resolved

self._mnff.handle = handle_return(nvrtc.nvrtcCreateProgram(code.encode(), options._name, 0, [], []))
self._mnff.backend = "NVRTC"
self._backend = "NVRTC"
self._linker = None

Expand All @@ -411,8 +491,22 @@ def __init__(self, code, code_type, options: ProgramOptions = None):
ObjectCode._init(code.encode(), code_type), options=self._translate_program_options(options)
)
self._backend = self._linker.backend

elif code_type == "nvvm":
if isinstance(code, str):
code = code.encode("utf-8")
elif not isinstance(code, (bytes, bytearray)):
raise TypeError("NVVM IR code must be provided as str, bytes, or bytearray")

nvvm = _get_nvvm_module()
self._mnff.handle = nvvm.create_program()
self._mnff.backend = "NVVM"
nvvm.add_module_to_program(self._mnff.handle, code, len(code), options._name.decode())
self._backend = "NVVM"
self._linker = None

else:
supported_code_types = ("c++", "ptx")
supported_code_types = ("c++", "ptx", "nvvm")
assert code_type not in supported_code_types, f"{code_type=}"
raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")

Expand All @@ -433,6 +527,33 @@ def _translate_program_options(self, options: ProgramOptions) -> LinkerOptions:
ptxas_options=options.ptxas_options,
)

def _translate_program_options_to_nvvm(self, options: ProgramOptions) -> list[str]:
"""Translate ProgramOptions to NVVM-specific compilation options."""
nvvm_options = []

assert options.arch is not None
arch = options.arch
if arch.startswith("sm_"):
arch = f"compute_{arch[3:]}"
nvvm_options.append(f"-arch={arch}")
if options.debug:
nvvm_options.append("-g")
if options.device_code_optimize is False:
nvvm_options.append("-opt=0")
elif options.device_code_optimize is True:
nvvm_options.append("-opt=3")
# NVVM is not consistent with NVRTC, it uses 0/1 instead...
if options.ftz is not None:
nvvm_options.append(f"-ftz={'1' if options.ftz else '0'}")
if options.prec_sqrt is not None:
nvvm_options.append(f"-prec-sqrt={'1' if options.prec_sqrt else '0'}")
if options.prec_div is not None:
nvvm_options.append(f"-prec-div={'1' if options.prec_div else '0'}")
if options.fma is not None:
nvvm_options.append(f"-fma={'1' if options.fma else '0'}")

return nvvm_options

def close(self):
"""Destroy this program."""
if self._linker:
Expand Down Expand Up @@ -513,6 +634,31 @@ def compile(self, target_type, name_expressions=(), logs=None):

return ObjectCode._init(data, target_type, symbol_mapping=symbol_mapping, name=self._options.name)

elif self._backend == "NVVM":
if target_type not in ("ptx", "ltoir"):
raise ValueError(f'NVVM backend only supports target_type="ptx", "ltoir", got "{target_type}"')

nvvm_options = self._translate_program_options_to_nvvm(self._options)
if target_type == "ltoir" and "-gen-lto" not in nvvm_options:
nvvm_options.append("-gen-lto")
nvvm = _get_nvvm_module()
with _nvvm_exception_manager(self):
nvvm.verify_program(self._mnff.handle, len(nvvm_options), nvvm_options)
nvvm.compile_program(self._mnff.handle, len(nvvm_options), nvvm_options)

size = nvvm.get_compiled_result_size(self._mnff.handle)
data = bytearray(size)
nvvm.get_compiled_result(self._mnff.handle, data)

if logs is not None:
logsize = nvvm.get_program_log_size(self._mnff.handle)
if logsize > 1:
log = bytearray(logsize)
nvvm.get_program_log(self._mnff.handle, log)
logs.write(log.decode("utf-8", errors="backslashreplace"))

return ObjectCode._init(data, target_type, name=self._options.name)

supported_backends = ("nvJitLink", "driver")
if self._backend not in supported_backends:
raise ValueError(f'Unsupported backend="{self._backend}" ({supported_backends=})')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ def assert_type(obj, expected_type):
raise TypeError(f"Expected type {expected_type.__name__}, but got {type(obj).__name__}")


def assert_type_str_or_bytes(obj):
def assert_type_str_or_bytes_like(obj):
"""Ensure obj is of type str or bytes, else raise AssertionError with a clear message."""
if not isinstance(obj, (str, bytes)):
raise TypeError(f"Expected type str or bytes, but got {type(obj).__name__}")
if not isinstance(obj, (str, bytes, bytearray)):
raise TypeError(f"Expected type str or bytes or bytearray, but got {type(obj).__name__}")


def raise_code_path_meant_to_be_unreachable():
Expand Down
1 change: 1 addition & 0 deletions cuda_core/docs/source/release/0.X.Y-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ New features
- Added :attr:`Device.arch` property that returns the compute capability as a string (e.g., '75' for CC 7.5), providing a convenient alternative to manually concatenating the compute capability tuple.
- CUDA 13.x testing support through new ``test-cu13`` dependency group.
- Stream-ordered memory allocation can now be shared on Linux via :class:`DeviceMemoryResource`.
- Added NVVM IR support to :class:`Program`. NVVM IR is now understood with ``code_type="nvvm"``.


New examples
Expand Down
Loading
Loading