dfurman commited on
Commit
d3d56a0
1 Parent(s): c8c30fe

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +13 -4
README.md CHANGED
@@ -42,20 +42,29 @@ dtype: bfloat16
42
  <summary>Setup</summary>
43
 
44
  ```python
45
- !pip install -qU transformers accelerate
46
 
47
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
48
  import torch
49
 
50
  model = "dfurman/HermesBagel-34B-v0.1"
 
 
 
 
 
 
51
 
52
  tokenizer = AutoTokenizer.from_pretrained(model)
53
-
54
  model = AutoModelForCausalLM.from_pretrained(
55
  model,
56
  torch_dtype=torch.bfloat16,
57
  device_map="auto",
58
- trust_remote_code=True,
59
  )
60
  ```
61
 
 
42
  <summary>Setup</summary>
43
 
44
  ```python
45
+ !pip install -qU transformers accelerate bitsandbytes
46
 
47
+ from transformers import (
48
+ AutoTokenizer,
49
+ AutoModelForCausalLM,
50
+ BitsAndBytesConfig
51
+ )
52
  import torch
53
 
54
  model = "dfurman/HermesBagel-34B-v0.1"
55
+ nf4_config = BitsAndBytesConfig(
56
+ load_in_4bit=True,
57
+ bnb_4bit_quant_type="nf4",
58
+ bnb_4bit_use_double_quant=True,
59
+ bnb_4bit_compute_dtype=torch.bfloat16
60
+ )
61
 
62
  tokenizer = AutoTokenizer.from_pretrained(model)
 
63
  model = AutoModelForCausalLM.from_pretrained(
64
  model,
65
  torch_dtype=torch.bfloat16,
66
  device_map="auto",
67
+ quantization_config=nf4_config,
68
  )
69
  ```
70