pip install --extra-index-url https://download.pytorch.org/whl/cu116 torch==1.12.1+cu116
pip install transformers==4.22.1
pip install accelerate==0.12.0
pip install bitsandbytes==0.33.1
from transformers import pipeline
auto_map = False
load_in_8bit = True
if auto_map:
device_map = "auto"
else:
device_map = {
"transformer.wte": 0,
"transformer.wpe": 0,
"transformer.ln_f": "cpu",
"lm_head": 0,
"transformer.h.0": 0,
"transformer.h.1": "cpu",
"transformer.h.2": "cpu",
"transformer.h.3": "cpu",
"transformer.h.4": "cpu",
"transformer.h.5": "cpu",
"transformer.h.6": "cpu",
"transformer.h.7": "cpu",
"transformer.h.8": "cpu",
"transformer.h.9": "cpu",
"transformer.h.10": "cpu",
"transformer.h.11": "cpu"
}
pipe = pipeline(
model="EleutherAI/gpt-neo-125M",
max_length=32,
model_kwargs={
"device_map": device_map,
"load_in_8bit": load_in_8bit
}
)
print("\n", pipe("It was")[0]["generated_text"])
❯ python main.py
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 1.44k/1.44k [00:00<00:00, 634kB/s]
===================================BUG REPORT===================================
Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
For effortless bug reporting copy-paste your error into this form: https://docs.google.com/forms/d/e/1FAIpQLScPB8emS3Thkp66nvqwmjTEgxp8Y9ufuWTzFyr9kJ5AoI47dQ/viewform?usp=sf_link
================================================================================
/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/cuda_setup/paths.py:20: UserWarning: WARNING: The following directories listed in your path were found to be non-existent: {PosixPath('/home/user/.gtkrc'), PosixPath('/etc/gtk/gtkrc')}
[... lots of similar warnings about non-existent paths ...]
CUDA_SETUP: WARNING! libcudart.so not found in any environmental path. Searching /usr/local/cuda/lib64...
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so
CUDA SETUP: Highest compute capability among GPUs detected: 8.6
CUDA SETUP: Detected CUDA version 117
CUDA SETUP: Loading binary /home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda117.so...
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Traceback (most recent call last):
File "/home/user/test/bnb-test/main.py", line 37, in <module>
print("\n", pipe("It was")[0]["generated_text"])
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 176, in __call__
return super().__call__(text_inputs, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1074, in __call__
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 1081, in run_single
model_outputs = self.forward(model_inputs, **forward_params)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/pipelines/base.py", line 990, in forward
model_outputs = self._forward(model_inputs, **forward_params)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/pipelines/text_generation.py", line 218, in _forward
generated_sequence = self.model.generate(input_ids=input_ids, attention_mask=attention_mask, **generate_kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
return func(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/generation_utils.py", line 1319, in generate
return self.greedy_search(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/generation_utils.py", line 1713, in greedy_search
outputs = self(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/accelerate/hooks.py", line 148, in new_forward
output = old_forward(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/models/gpt_neo/modeling_gpt_neo.py", line 744, in forward
transformer_outputs = self.transformer(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/models/gpt_neo/modeling_gpt_neo.py", line 623, in forward
outputs = block(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/accelerate/hooks.py", line 148, in new_forward
output = old_forward(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/models/gpt_neo/modeling_gpt_neo.py", line 328, in forward
attn_outputs = self.attn(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/accelerate/hooks.py", line 148, in new_forward
output = old_forward(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/models/gpt_neo/modeling_gpt_neo.py", line 280, in forward
return self.attention(
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/accelerate/hooks.py", line 148, in new_forward
output = old_forward(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/transformers/models/gpt_neo/modeling_gpt_neo.py", line 224, in forward
query = self.q_proj(hidden_states)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/accelerate/hooks.py", line 148, in new_forward
output = old_forward(*args, **kwargs)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/nn/modules.py", line 256, in forward
out = bnb.matmul(x, self.weight, bias=self.bias, state=self.state)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 391, in matmul
return MatMul8bitLt.apply(A, B, out, bias, state)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/autograd/_functions.py", line 254, in forward
state.CxB, state.SB = F.transform(state.CB, to_order=formatB)
File "/home/user/test/bnb-test/.venv/lib/python3.10/site-packages/bitsandbytes/functional.py", line 1604, in transform
prev_device = pre_call(A.device)
AttributeError: 'NoneType' object has no attribute 'device'
It seems like
bitsandbytescan't be used if the model is shared between GPU and CPU.I could not find any info saying that the entire model must be loaded in GPU in order to use
bitsandbytes,so I'm not sure if this is a bug or the expected behavior.
The environment setup:
The
main.pyscript:The
auto_mapandload_in_8bitcontrol the script settings.When you run the script with
auto_map = Falseandload_in_8bit = Truethen it crashes with this error:All other combinations of
auto_mapandload_in_8bitproduce no error and give thegenerated_text.