Skip to content
This repository was archived by the owner on Feb 24, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c4853ec
Refactor Simplify function to handle multiple functions in IRModule
LeiWang1999 Oct 16, 2024
9a21acf
Update submodule commit reference
LeiWang1999 Oct 17, 2024
f8d046b
Add CUDA_DEVICE_ORDER environment variable to bashrc
LeiWang1999 Oct 17, 2024
c1371dd
test fix
LeiWang1999 Oct 17, 2024
416cad2
lint fix
LeiWang1999 Oct 17, 2024
9209d1e
Refactor test_general_matmul_bf16.py to use bitblas.testing.main()
LeiWang1999 Oct 17, 2024
1cf7570
Update submodule commit reference
LeiWang1999 Oct 17, 2024
5fec040
Update Ubuntu version in install scripts based on LLVM version
LeiWang1999 Oct 18, 2024
4e1a0d2
Update Ubuntu version in install scripts based on LLVM version
LeiWang1999 Oct 18, 2024
fa85f8c
Update submodule commit reference
LeiWang1999 Oct 19, 2024
429d5b5
Update submodule commit reference
LeiWang1999 Oct 19, 2024
4003509
Update submodule commit reference
LeiWang1999 Oct 20, 2024
1d86582
Merge branch 'main' of https://github.com/microsoft/BitBLAS into amd_hip
LeiWang1999 Oct 20, 2024
df3af0d
Update submodule commit reference
LeiWang1999 Oct 28, 2024
1f1e027
Merge branch 'main' of https://github.com/microsoft/BitBLAS into amd_hip
LeiWang1999 Oct 28, 2024
732dda6
Update submodule commit reference
LeiWang1999 Oct 29, 2024
ebffbfa
Merge branch 'main' of https://github.com/microsoft/BitBLAS into amd_hip
LeiWang1999 Oct 29, 2024
ff227fa
Merge branch 'main' of https://github.com/microsoft/BitBLAS into amd_hip
LeiWang1999 Nov 4, 2024
ac62936
[Dev] Update subproject commit for TVM
LeiWang1999 Nov 7, 2024
a7a239c
ignore profiler directories.
LeiWang1999 Nov 7, 2024
dcedbde
MFMA Support
LeiWang1999 Nov 7, 2024
e0b36f5
lint fix
LeiWang1999 Nov 7, 2024
fe668f9
Merge branch 'main' of https://github.com/microsoft/BitBLAS into amd_hip
LeiWang1999 Nov 7, 2024
3579c6b
MFMA Fixed.
LeiWang1999 Nov 8, 2024
e60ccd9
merge upstream
LeiWang1999 Nov 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/tvm
Submodule tvm updated from c6be66 to 180359
6 changes: 3 additions & 3 deletions bitblas/tl/mfma_macro_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def ldmatrix_a(self, A_local_buf, A_shared_buf, ki, thread_bindings, rk=0):
WARP_SIZE = self.WARP_SIZE
block_row_warps = self.block_row_warps
block_col_warps = self.block_col_warps
warp_col_tiles = self.warp_col_tiles
warp_row_tiles = self.warp_row_tiles
warp_cols = self.warp_cols
chunk = self.chunk
micro_size_x = self.micro_size_x
Expand All @@ -171,13 +171,13 @@ def _warp_ldmatrix_a(
for local_id in T.vectorized(local_size_a):
row, col = T.meta_var(reverse_index_map(tx, local_id))
l, r = (rk * chunk + ki * micro_size_k,
tz * warp_col_tiles + i * micro_size_x)
tz * warp_row_tiles + i * micro_size_x)
A_local_buf[i * local_size_a + local_id] = A_shared_buf[l + row, r + col]
else:
for i in T.serial(warp_cols):
for local_id in T.vectorized(local_size_a):
row, col = T.meta_var(reverse_index_map(tx, local_id))
l, r = (tz * warp_col_tiles + i * micro_size_x,
l, r = (tz * warp_row_tiles + i * micro_size_x,
rk * chunk + ki * micro_size_k)
A_local_buf[i * local_size_a + local_id] = A_shared_buf[l + row, r + col]

Expand Down