Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,46 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
---
|
4 |
+
|
5 |
+
## How to use
|
6 |
+
|
7 |
+
This instructed model uses a chat template that must be adhered to the input for conversational use.
|
8 |
+
The easiest way to apply it is using the tokenizer's built-in chat template, as shown in the following snippet.
|
9 |
+
|
10 |
+
```python
|
11 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
12 |
+
import transformers
|
13 |
+
import torch
|
14 |
+
|
15 |
+
model_id = "BSC-LT/salamandra7b_rag_prompt_ca-en-es"
|
16 |
+
|
17 |
+
prompt = "Here is a question that you should answer based on the given context. Write a response that answers the question using only information provided in the context. Provide the answer in Spanish."
|
18 |
+
|
19 |
+
context = """Water boils at 100°C (212°F) at standard atmospheric pressure, which is at sea level.
|
20 |
+
However, this boiling point can vary depending on altitude and atmospheric pressure.
|
21 |
+
At higher altitudes, where atmospheric pressure is lower, water boils at a lower temperature.
|
22 |
+
For example, at 2,000 meters (about 6,600 feet) above sea level, water boils at around 93°C (199°F).
|
23 |
+
"""
|
24 |
+
instruction = "At what temperature does water boil?"
|
25 |
+
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
+
model = AutoModelForCausalLM.from_pretrained(
|
28 |
+
model_id,
|
29 |
+
device_map="cuda",
|
30 |
+
torch_dtype=torch.bfloat16
|
31 |
+
)
|
32 |
+
|
33 |
+
content = f"{prompt}\n\nContext:\n{context}\n\nQuestion:\n{instruction}"
|
34 |
+
chat = [ { "role": "user", "content": content } ]
|
35 |
+
|
36 |
+
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
|
37 |
+
|
38 |
+
eos_tokens = [
|
39 |
+
tokenizer.eos_token_id,
|
40 |
+
tokenizer.convert_tokens_to_ids("<|im_end|>"),
|
41 |
+
]
|
42 |
+
|
43 |
+
inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt")
|
44 |
+
outputs = model.generate(input_ids=inputs.to(model.device), eos_token_id=eos_tokens, max_new_tokens=200)
|
45 |
+
|
46 |
+
```
|