Update README.md
Browse files
README.md
CHANGED
@@ -39,13 +39,25 @@ To import this model with Python and run it, you can use the following code:
|
|
39 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
40 |
|
41 |
model_id = "azhiboedova/Meta-Llama-3.1-8B-Instruct-AQLM-2Bit-1x16"
|
42 |
-
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
43 |
-
model = AutoModelForCausalLM.from_pretrained(model_id)
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
```
|
50 |
|
51 |
**Model Developers**
|
|
|
39 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
40 |
|
41 |
model_id = "azhiboedova/Meta-Llama-3.1-8B-Instruct-AQLM-2Bit-1x16"
|
|
|
|
|
42 |
|
43 |
+
pipeline = transformers.pipeline(
|
44 |
+
"text-generation",
|
45 |
+
model=model_id,
|
46 |
+
model_kwargs={"torch_dtype": torch.bfloat16},
|
47 |
+
device_map="auto",
|
48 |
+
)
|
49 |
+
|
50 |
+
messages = [
|
51 |
+
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
|
52 |
+
{"role": "user", "content": "Translate English to German!: How are you?"},
|
53 |
+
]
|
54 |
+
|
55 |
+
outputs = pipeline(
|
56 |
+
messages,
|
57 |
+
max_new_tokens=256,
|
58 |
+
)
|
59 |
+
|
60 |
+
print(outputs[0]["generated_text"][-1]["content"])
|
61 |
```
|
62 |
|
63 |
**Model Developers**
|