Update README.md
Browse files
README.md
CHANGED
@@ -28,20 +28,53 @@ model = LLaMAForCausalLM.from_pretrained(
|
|
28 |
model = PeftModel.from_pretrained(model, "bertin-project/bertin-alpaca-lora-7b")
|
29 |
```
|
30 |
|
31 |
-
|
32 |
|
33 |
```python
|
34 |
-
from transformers import
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
### Instruction:
|
41 |
{instruction}
|
42 |
|
43 |
### Response:
|
44 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
# Estimado Manolo,
|
46 |
#
|
47 |
# ¡Bienvenido a nuestro equipo! Estamos muy contentos de que hayas decidido unirse a nosotros y estamos ansiosos por comenzar a trabajar juntos.
|
|
|
28 |
model = PeftModel.from_pretrained(model, "bertin-project/bertin-alpaca-lora-7b")
|
29 |
```
|
30 |
|
31 |
+
Until `PEFT` is fully supported in Hugginface0s pipelines, for generation we can either consolidate the LoRA weights into the LLaMA model weights, or use the adapter's `generate()` method. Remember that the promtp still needs the English template:
|
32 |
|
33 |
```python
|
34 |
+
from transformers import GenerationConfig
|
35 |
|
36 |
+
# Load the model
|
37 |
+
model = ...
|
38 |
+
|
39 |
+
# Generate prompts from Alpaca template
|
40 |
+
def generate_prompt(instruction, input=None):
|
41 |
+
if input:
|
42 |
+
return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. # noqa: E501
|
43 |
+
|
44 |
+
### Instruction:
|
45 |
+
{instruction}
|
46 |
+
|
47 |
+
### Input:
|
48 |
+
{input}
|
49 |
+
|
50 |
+
### Response:
|
51 |
+
"""
|
52 |
+
else:
|
53 |
+
return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. # noqa: E501
|
54 |
|
55 |
### Instruction:
|
56 |
{instruction}
|
57 |
|
58 |
### Response:
|
59 |
+
"""
|
60 |
+
|
61 |
+
# Generate responses
|
62 |
+
def generate(instruction, input=None):
|
63 |
+
prompt = generate_prompt(instruction, input)
|
64 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
65 |
+
input_ids = inputs["input_ids"].cuda()
|
66 |
+
generation_output = model.generate(
|
67 |
+
input_ids=input_ids,
|
68 |
+
generation_config=GenerationConfig(temperature=0.2, top_p=0.75, num_beams=4),
|
69 |
+
return_dict_in_generate=True,
|
70 |
+
output_scores=True,
|
71 |
+
max_new_tokens=256
|
72 |
+
)
|
73 |
+
for seq in generation_output.sequences:
|
74 |
+
output = tokenizer.decode(seq)
|
75 |
+
print("Respuesta:", output.split("### Response:")[1].strip())
|
76 |
+
|
77 |
+
generate("Escribe un correo electrónico dando la bienvenida a un nuevo empleado llamado Manolo.")
|
78 |
# Estimado Manolo,
|
79 |
#
|
80 |
# ¡Bienvenido a nuestro equipo! Estamos muy contentos de que hayas decidido unirse a nosotros y estamos ansiosos por comenzar a trabajar juntos.
|