teknium commited on
Commit
eb85cf0
1 Parent(s): b78852d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -176,7 +176,7 @@ In LM-Studio, simply select the ChatML Prefix on the settings side pane:
176
 
177
  # Inference Code
178
 
179
- Here is example code using HuggingFace Transformers to inference the model (note: even in 4bit, it will require more than 24GB of VRAM)
180
 
181
  ```python
182
  # Code to inference Hermes with HF Transformers
@@ -187,9 +187,9 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
187
  from transformers import LlamaTokenizer, MixtralForCausalLM
188
  import bitsandbytes, flash_attn
189
 
190
- tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', trust_remote_code=True)
191
  model = MixtralForCausalLM.from_pretrained(
192
- "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
193
  torch_dtype=torch.float16,
194
  device_map="auto",
195
  load_in_8bit=False,
 
176
 
177
  # Inference Code
178
 
179
+ Here is example code using HuggingFace Transformers to inference the model (note: in 4bit, it will require around 5GB of VRAM)
180
 
181
  ```python
182
  # Code to inference Hermes with HF Transformers
 
187
  from transformers import LlamaTokenizer, MixtralForCausalLM
188
  import bitsandbytes, flash_attn
189
 
190
+ tokenizer = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mistral-7B-DPO', trust_remote_code=True)
191
  model = MixtralForCausalLM.from_pretrained(
192
+ "NousResearch/Nous-Hermes-2-Mistral-7B-DPO",
193
  torch_dtype=torch.float16,
194
  device_map="auto",
195
  load_in_8bit=False,