Update README.md
Browse filesUse BF16 as compute type
README.md
CHANGED
@@ -182,7 +182,7 @@ So, the sum of 100, 520, and 60 is 680.
|
|
182 |
from transformers import AutoTokenizer, TextStreamer
|
183 |
from intel_extension_for_transformers.transformers import AutoModelForCausalLM, WeightOnlyQuantConfig
|
184 |
model_name = "Intel/neural-chat-7b-v3-1"
|
185 |
-
config = WeightOnlyQuantConfig(compute_dtype="
|
186 |
prompt = "Once upon a time, there existed a little girl,"
|
187 |
|
188 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
182 |
from transformers import AutoTokenizer, TextStreamer
|
183 |
from intel_extension_for_transformers.transformers import AutoModelForCausalLM, WeightOnlyQuantConfig
|
184 |
model_name = "Intel/neural-chat-7b-v3-1"
|
185 |
+
config = WeightOnlyQuantConfig(compute_dtype="bf16", weight_dtype="int4")
|
186 |
prompt = "Once upon a time, there existed a little girl,"
|
187 |
|
188 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|