TheBlokeAI
/

Mixtral-tiny-GPTQ

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

TheBloke commited on Dec 11, 2023

Commit

9f35ed9

•

1 Parent(s): 8587832

GPTQ model commit

Files changed (3) hide show

config.json +12 -25
model.safetensors +2 -2
quantize_config.json +8 -14

config.json CHANGED Viewed

@@ -20,30 +20,6 @@
     "output_router_logits": false,
     "pad_token_id": 0,
     "pretraining_tp": 1,
-    "quantization_config": {
-        "batch_size": 1,
-        "bits": 4,
-        "block_name_to_quantize": "model.layers",
-        "cache_block_outputs": true,
-        "damp_percent": 0.1,
-        "desc_act": true,
-        "exllama_config": {
-            "version": 1
-        },
-        "group_size": 128,
-        "max_input_length": null,
-        "model_seqlen": 8192,
-        "module_name_preceding_first_block": [
-            "model.embed_tokens"
-        ],
-        "pad_token_id": null,
-        "quant_method": "gptq",
-        "sym": true,
-        "tokenizer": null,
-        "true_sequential": true,
-        "use_cuda_fp16": true,
-        "use_exllama": true
-    },
     "rms_norm_eps": 1e-05,
     "rope_theta": 10000.0,
     "router_aux_loss_coef": 0.001,
@@ -52,5 +28,16 @@
     "torch_dtype": "float16",
     "transformers_version": "4.36.0.dev0",
     "use_cache": true,
-    "vocab_size": 32000
 }

     "output_router_logits": false,
     "pad_token_id": 0,
     "pretraining_tp": 1,
     "rms_norm_eps": 1e-05,
     "rope_theta": 10000.0,
     "router_aux_loss_coef": 0.001,
     "torch_dtype": "float16",
     "transformers_version": "4.36.0.dev0",
     "use_cache": true,
+    "vocab_size": 32000,
+    "quantization_config": {
+        "bits": 4,
+        "group_size": 128,
+        "damp_percent": 0.1,
+        "desc_act": true,
+        "sym": true,
+        "true_sequential": true,
+        "model_name_or_path": null,
+        "model_file_base_name": "model",
+        "quant_method": "gptq"
+    }
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2560788eeb8fca3bfd01225750d340f28532e0f817c55fc73f7e13f242db9152
-size 226044824

 version https://git-lfs.github.com/spec/v1
+oid sha256:b407869971afeac96a2f915a7a1762923f47cf6212c38a5ff5f57590f9aa7395
+size 226060152

quantize_config.json CHANGED Viewed

@@ -1,16 +1,10 @@
 {
-    "bits": [
-        4
-    ],
-    "group_size": [
-        128
-    ],
-    "damp_percent": [
-        0.1
-    ],
-    "desc_act": [
-        true
-    ],
-    "sym": true,
-    "true_sequential": true
 }

 {
+  "bits": 4,
+  "group_size": 128,
+  "damp_percent": 0.1,
+  "desc_act": true,
+  "sym": true,
+  "true_sequential": true,
+  "model_name_or_path": null,
+  "model_file_base_name": "model"
 }