versae commited on
Commit
a286db9
1 Parent(s): 2b0cfc7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -6
README.md CHANGED
@@ -28,20 +28,53 @@ model = LLaMAForCausalLM.from_pretrained(
28
  model = PeftModel.from_pretrained(model, "bertin-project/bertin-alpaca-lora-7b")
29
  ```
30
 
31
- For generation, the promtp still needs the English template:
32
 
33
  ```python
34
- from transformers import pipeline
35
 
36
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
37
- instruction = "Escribe un correo electrónico dando la bienvenida a un nuevo empleado llamado Manolo."
38
- pipe.generate(f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  ### Instruction:
41
  {instruction}
42
 
43
  ### Response:
44
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # Estimado Manolo,
46
  #
47
  # ¡Bienvenido a nuestro equipo! Estamos muy contentos de que hayas decidido unirse a nosotros y estamos ansiosos por comenzar a trabajar juntos.
 
28
  model = PeftModel.from_pretrained(model, "bertin-project/bertin-alpaca-lora-7b")
29
  ```
30
 
31
+ Until `PEFT` is fully supported in Hugginface0s pipelines, for generation we can either consolidate the LoRA weights into the LLaMA model weights, or use the adapter's `generate()` method. Remember that the promtp still needs the English template:
32
 
33
  ```python
34
+ from transformers import GenerationConfig
35
 
36
+ # Load the model
37
+ model = ...
38
+
39
+ # Generate prompts from Alpaca template
40
+ def generate_prompt(instruction, input=None):
41
+ if input:
42
+ return f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. # noqa: E501
43
+
44
+ ### Instruction:
45
+ {instruction}
46
+
47
+ ### Input:
48
+ {input}
49
+
50
+ ### Response:
51
+ """
52
+ else:
53
+ return f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. # noqa: E501
54
 
55
  ### Instruction:
56
  {instruction}
57
 
58
  ### Response:
59
+ """
60
+
61
+ # Generate responses
62
+ def generate(instruction, input=None):
63
+ prompt = generate_prompt(instruction, input)
64
+ inputs = tokenizer(prompt, return_tensors="pt")
65
+ input_ids = inputs["input_ids"].cuda()
66
+ generation_output = model.generate(
67
+ input_ids=input_ids,
68
+ generation_config=GenerationConfig(temperature=0.2, top_p=0.75, num_beams=4),
69
+ return_dict_in_generate=True,
70
+ output_scores=True,
71
+ max_new_tokens=256
72
+ )
73
+ for seq in generation_output.sequences:
74
+ output = tokenizer.decode(seq)
75
+ print("Respuesta:", output.split("### Response:")[1].strip())
76
+
77
+ generate("Escribe un correo electrónico dando la bienvenida a un nuevo empleado llamado Manolo.")
78
  # Estimado Manolo,
79
  #
80
  # ¡Bienvenido a nuestro equipo! Estamos muy contentos de que hayas decidido unirse a nosotros y estamos ansiosos por comenzar a trabajar juntos.