Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
50712a4
1
Parent(s):
7e4a8db
Revert ":wrench: TEST no quantisation"
Browse filesThis reverts commit 7e4a8dba765ad62f4ba6511e489ee1b79b3a36df.
- moral_compass_demo.py +3 -3
moral_compass_demo.py
CHANGED
|
@@ -16,7 +16,7 @@ qwen_model = AutoModelForCausalLM.from_pretrained(
|
|
| 16 |
qwen_model_name,
|
| 17 |
torch_dtype=torch.bfloat16, #testing for underflow issues
|
| 18 |
device_map="auto",
|
| 19 |
-
|
| 20 |
)
|
| 21 |
|
| 22 |
qwen_generationconfig = GenerationConfig(
|
|
@@ -58,7 +58,7 @@ llama2_pipe = pipeline(
|
|
| 58 |
model=llama2_model_id,
|
| 59 |
torch_dtype=torch.bfloat16,
|
| 60 |
device_map="auto",
|
| 61 |
-
|
| 62 |
# quantization_config=quantization_config,
|
| 63 |
)
|
| 64 |
|
|
@@ -69,7 +69,7 @@ llama3_pipe = pipeline(
|
|
| 69 |
model=llama3_model_id,
|
| 70 |
torch_dtype=torch.bfloat16,
|
| 71 |
device_map="auto",
|
| 72 |
-
|
| 73 |
)
|
| 74 |
|
| 75 |
@spaces.GPU
|
|
|
|
| 16 |
qwen_model_name,
|
| 17 |
torch_dtype=torch.bfloat16, #testing for underflow issues
|
| 18 |
device_map="auto",
|
| 19 |
+
quantization_config = quantization_config,
|
| 20 |
)
|
| 21 |
|
| 22 |
qwen_generationconfig = GenerationConfig(
|
|
|
|
| 58 |
model=llama2_model_id,
|
| 59 |
torch_dtype=torch.bfloat16,
|
| 60 |
device_map="auto",
|
| 61 |
+
model_kwargs={"quantization_config": quantization_config},
|
| 62 |
# quantization_config=quantization_config,
|
| 63 |
)
|
| 64 |
|
|
|
|
| 69 |
model=llama3_model_id,
|
| 70 |
torch_dtype=torch.bfloat16,
|
| 71 |
device_map="auto",
|
| 72 |
+
model_kwargs={"quantization_config": quantization_config},
|
| 73 |
)
|
| 74 |
|
| 75 |
@spaces.GPU
|