edwko commited on
Commit
f468afa
1 Parent(s): 855e470

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -7
README.md CHANGED
@@ -154,23 +154,40 @@ This model uses a specific chat format for optimal performance.
154
  ## Usage with HuggingFace transformers
155
  The model can be used with HuggingFace's `transformers` library:
156
  ```python
 
157
  from transformers import AutoModelForCausalLM, AutoTokenizer
158
 
159
- model = AutoModelForCausalLM.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct")
 
 
160
  tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct")
161
 
162
- def generate_response(message):
163
- # Encode the formatted message as input ids
164
- input_ids = tokenizer.encode(f"<s>user\n{message}</s>", return_tensors="pt")
165
- output = model.generate(input_ids, max_length=100, pad_token_id=tokenizer.eos_token_id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  # Decode the generated output
168
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
169
-
170
  return generated_text
171
 
172
- message = "What is the capital of Spain?"
173
  response = generate_response(message)
 
174
  ```
175
 
176
  ## Risk Disclaimer
 
154
  ## Usage with HuggingFace transformers
155
  The model can be used with HuggingFace's `transformers` library:
156
  ```python
157
+ import torch
158
  from transformers import AutoModelForCausalLM, AutoTokenizer
159
 
160
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
161
+
162
+ model = AutoModelForCausalLM.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct").to(device)
163
  tokenizer = AutoTokenizer.from_pretrained("OuteAI/Lite-Mistral-150M-v2-Instruct")
164
 
165
+ def generate_response(message: str, temperature: float = 0.4, repetition_penalty: float = 1.1) -> str:
166
+ # Apply the chat template and convert to PyTorch tensors
167
+ messages = [
168
+ {"role": "system", "content": "You are a helpful assistant."},
169
+ {"role": "user", "content": message}
170
+ ]
171
+ input_ids = tokenizer.apply_chat_template(
172
+ messages, add_generation_prompt=True, return_tensors="pt"
173
+ ).to(device)
174
+
175
+ # Generate the response
176
+ output = model.generate(
177
+ input_ids,
178
+ max_length=512,
179
+ temperature=temperature,
180
+ repetition_penalty=repetition_penalty,
181
+ do_sample=True
182
+ )
183
 
184
  # Decode the generated output
185
  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
 
186
  return generated_text
187
 
188
+ message = "I'd like to learn about language models. Can you break down the concept for me?"
189
  response = generate_response(message)
190
+ print(response)
191
  ```
192
 
193
  ## Risk Disclaimer