fxmarty
/

llama-tiny-w-fp8-a-fp8

Safetensors

llama

quark

Model card Files Files and versions Community

fxmarty commited on Oct 10

Commit

c85996e

•

1 Parent(s): bd1d16e

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

config.json +41 -47

config.json CHANGED Viewed

@@ -21,54 +21,48 @@
   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
-    "activation_scheme": "static",
-    "kv_cache_scheme": null,
-    "library": "quark",
-    "quant_method": "fp8",
-    "quark_config": {
-      "algo_config": null,
-      "exclude": [
-        "lm_head"
-      ],
-      "export": {
-        "kv_cache_group": [],
-        "pack_method": "reorder",
-        "weight_format": "real_quantized",
-        "weight_merge_groups": null
-      },
-      "global_quant_config": {
-        "bias": null,
-        "input_tensors": {
-          "ch_axis": null,
-          "dtype": "fp8_e4m3",
-          "group_size": null,
-          "is_dynamic": false,
-          "observer_cls": "PerTensorMinMaxObserver",
-          "qscheme": "per_tensor",
-          "round_method": null,
-          "scale_type": null,
-          "symmetric": null
-        },
-        "output_tensors": null,
-        "target_device": null,
-        "weight": {
-          "ch_axis": null,
-          "dtype": "fp8_e4m3",
-          "group_size": null,
-          "is_dynamic": false,
-          "observer_cls": "PerTensorMinMaxObserver",
-          "qscheme": "per_tensor",
-          "round_method": null,
-          "scale_type": null,
-          "symmetric": null
-        }
-      },
-      "layer_quant_config": {},
-      "layer_type_quant_config": {},
       "pack_method": "reorder",
-      "quant_method": "quark",
-      "quant_mode": 1
-    }
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,

   "pad_token_id": -1,
   "pretraining_tp": 1,
   "quantization_config": {
+    "algo_config": null,
+    "exclude": [
+      "lm_head"
+    ],
+    "export": {
+      "kv_cache_group": [],
       "pack_method": "reorder",
+      "weight_format": "real_quantized",
+      "weight_merge_groups": null
+    },
+    "global_quant_config": {
+      "bias": null,
+      "input_tensors": {
+        "ch_axis": null,
+        "dtype": "fp8_e4m3",
+        "group_size": null,
+        "is_dynamic": false,
+        "observer_cls": "PerTensorMinMaxObserver",
+        "qscheme": "per_tensor",
+        "round_method": null,
+        "scale_type": null,
+        "symmetric": null
+      },
+      "output_tensors": null,
+      "target_device": null,
+      "weight": {
+        "ch_axis": null,
+        "dtype": "fp8_e4m3",
+        "group_size": null,
+        "is_dynamic": false,
+        "observer_cls": "PerTensorMinMaxObserver",
+        "qscheme": "per_tensor",
+        "round_method": null,
+        "scale_type": null,
+        "symmetric": null
+      }
+    },
+    "layer_quant_config": {},
+    "layer_type_quant_config": {},
+    "pack_method": "reorder",
+    "quant_method": "quark",
+    "quant_mode": 1
   },
   "rms_norm_eps": 1e-06,
   "rope_scaling": null,