adamo1139 commited on
Commit
5d1cd21
1 Parent(s): d4fd13f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -2
README.md CHANGED
@@ -85,8 +85,13 @@ quant_stage:
85
  dynamic: false
86
  symmetric: true
87
  targets: ["Linear"]
 
 
 
 
 
 
88
  """
89
-
90
  model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
91
  model_name = model_stub.split("/")[-1]
92
 
@@ -99,7 +104,7 @@ model = SparseAutoModelForCausalLM.from_pretrained(
99
  )
100
  tokenizer = AutoTokenizer.from_pretrained(model_stub)
101
 
102
- output_dir = f"./{model_name}-FP8"
103
 
104
  DATASET_ID = "HuggingFaceH4/ultrachat_200k"
105
  DATASET_SPLIT = "train_sft"
@@ -140,6 +145,7 @@ oneshot(
140
  save_compressed=True,
141
  )
142
 
 
143
  ```
144
 
145
 
 
85
  dynamic: false
86
  symmetric: true
87
  targets: ["Linear"]
88
+ kv_cache_scheme:
89
+ num_bits: 8
90
+ type: float
91
+ strategy: tensor
92
+ dynamic: false
93
+ symmetric: true
94
  """
 
95
  model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
96
  model_name = model_stub.split("/")[-1]
97
 
 
104
  )
105
  tokenizer = AutoTokenizer.from_pretrained(model_stub)
106
 
107
+ output_dir = f"./{model_name}-Static-FP8-KV"
108
 
109
  DATASET_ID = "HuggingFaceH4/ultrachat_200k"
110
  DATASET_SPLIT = "train_sft"
 
145
  save_compressed=True,
146
  )
147
 
148
+
149
  ```
150
 
151