neuralmagic
/

Phi-3-mini-128k-instruct-quantized.w8a16

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

alexmarques commited on 18 days ago

Commit

67a9b67

•

1 Parent(s): c8e3503

Upload folder using huggingface_hub

Files changed (4) hide show

config.json +3 -9
generation_config.json +1 -1
model.safetensors +1 -1
recipe.yaml +2 -4

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "microsoft__Phi-3-mini-128k-instruct",
   "architectures": [
     "Phi3ForCausalLM"
   ],
@@ -38,13 +38,7 @@
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
-    "quantization_status": "frozen",
-    "sparsity_config": {
-      "format": "dense",
-      "global_sparsity": 1.2496503239511723,
-      "registry_requires_subclass": false,
-      "sparsity_structure": "unstructured"
-    }
   },
   "embd_pdrop": 0.0,
   "eos_token_id": 32000,
@@ -168,7 +162,7 @@
   "sliding_window": 262144,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.42.3",
   "use_cache": true,
   "vocab_size": 32064
 }

 {
+  "_name_or_path": "/root/.cache/huggingface/hub/models--microsoft--Phi-3-mini-128k-instruct/snapshots/d548c233192db00165d842bf8edff054bb3212f8",
   "architectures": [
     "Phi3ForCausalLM"
   ],
     ],
     "kv_cache_scheme": null,
     "quant_method": "compressed-tensors",
+    "quantization_status": "frozen"
   },
   "embd_pdrop": 0.0,
   "eos_token_id": 32000,
   "sliding_window": 262144,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.44.0",
   "use_cache": true,
   "vocab_size": 32064
 }

generation_config.json CHANGED Viewed

@@ -7,5 +7,5 @@
     32007
   ],
   "pad_token_id": 32000,
-  "transformers_version": "4.42.3"
 }

     32007
   ],
   "pad_token_id": 32000,
+  "transformers_version": "4.44.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a94c12daa3595c34fcda567abbd4550d5351b45c5e72d52e5bb4ffca94053065
 size 4020365960

 version https://git-lfs.github.com/spec/v1
+oid sha256:21a4e5d388f3be579989e33516a3140e625c832f522f0bac935f6c94c72a2512
 size 4020365960

recipe.yaml CHANGED Viewed

@@ -4,7 +4,5 @@ quant_stage:
       sequential_update: false
       dampening_frac: 0.01
       ignore: [lm_head]
-      config_groups:
-        group_0:
-          targets: [Linear]
-          weights: {num_bits: 8, type: int, symmetric: true, strategy: channel}

       sequential_update: false
       dampening_frac: 0.01
       ignore: [lm_head]
+      scheme: W8A16
+      targets: Linear