Update README.md
Browse files
README.md
CHANGED
@@ -85,8 +85,13 @@ quant_stage:
|
|
85 |
dynamic: false
|
86 |
symmetric: true
|
87 |
targets: ["Linear"]
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
"""
|
89 |
-
|
90 |
model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
|
91 |
model_name = model_stub.split("/")[-1]
|
92 |
|
@@ -99,7 +104,7 @@ model = SparseAutoModelForCausalLM.from_pretrained(
|
|
99 |
)
|
100 |
tokenizer = AutoTokenizer.from_pretrained(model_stub)
|
101 |
|
102 |
-
output_dir = f"./{model_name}-FP8"
|
103 |
|
104 |
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
|
105 |
DATASET_SPLIT = "train_sft"
|
@@ -140,6 +145,7 @@ oneshot(
|
|
140 |
save_compressed=True,
|
141 |
)
|
142 |
|
|
|
143 |
```
|
144 |
|
145 |
|
|
|
85 |
dynamic: false
|
86 |
symmetric: true
|
87 |
targets: ["Linear"]
|
88 |
+
kv_cache_scheme:
|
89 |
+
num_bits: 8
|
90 |
+
type: float
|
91 |
+
strategy: tensor
|
92 |
+
dynamic: false
|
93 |
+
symmetric: true
|
94 |
"""
|
|
|
95 |
model_stub = "NousResearch/Hermes-3-Llama-3.1-8B"
|
96 |
model_name = model_stub.split("/")[-1]
|
97 |
|
|
|
104 |
)
|
105 |
tokenizer = AutoTokenizer.from_pretrained(model_stub)
|
106 |
|
107 |
+
output_dir = f"./{model_name}-Static-FP8-KV"
|
108 |
|
109 |
DATASET_ID = "HuggingFaceH4/ultrachat_200k"
|
110 |
DATASET_SPLIT = "train_sft"
|
|
|
145 |
save_compressed=True,
|
146 |
)
|
147 |
|
148 |
+
|
149 |
```
|
150 |
|
151 |
|