Spaces:
Runtime error
Runtime error
Commit
Β·
268bc7e
1
Parent(s):
a0d59c2
added disk_offload function
Browse files
app.py
CHANGED
@@ -26,6 +26,7 @@ locale.getpreferredencoding = getpreferredencoding
|
|
26 |
|
27 |
from torch import cuda, bfloat16
|
28 |
import transformers
|
|
|
29 |
|
30 |
# Model used
|
31 |
model_id = 'meta-llama/Llama-2-7b-chat-hf'
|
@@ -52,14 +53,14 @@ model_config = transformers.AutoConfig.from_pretrained(
|
|
52 |
)
|
53 |
|
54 |
# Downloading and Initializing the model
|
55 |
-
model = transformers.AutoModelForCausalLM.from_pretrained(
|
56 |
model_id,
|
57 |
trust_remote_code=True,
|
58 |
config=model_config,
|
59 |
# quantization_config=bnb_config,
|
60 |
device_map='auto',
|
61 |
token=hf_auth
|
62 |
-
)
|
63 |
|
64 |
# enable evaluation mode to allow model inference
|
65 |
model.eval()
|
|
|
26 |
|
27 |
from torch import cuda, bfloat16
|
28 |
import transformers
|
29 |
+
from accelerate import disk_offload
|
30 |
|
31 |
# Model used
|
32 |
model_id = 'meta-llama/Llama-2-7b-chat-hf'
|
|
|
53 |
)
|
54 |
|
55 |
# Downloading and Initializing the model
|
56 |
+
model = disk_offload(transformers.AutoModelForCausalLM.from_pretrained(
|
57 |
model_id,
|
58 |
trust_remote_code=True,
|
59 |
config=model_config,
|
60 |
# quantization_config=bnb_config,
|
61 |
device_map='auto',
|
62 |
token=hf_auth
|
63 |
+
))
|
64 |
|
65 |
# enable evaluation mode to allow model inference
|
66 |
model.eval()
|