nm-testing
/

tinyllama-oneshot-w8a8-dynamic-token-v3

Text Generation

text-generation-inference

Inference Endpoints

8-bit precision

compressed-tensors

Model card Files Files and versions Community

sadkins65 commited on Jun 17

Commit

34b3d24

•

1 Parent(s): 9fe06d1

Upload folder using huggingface_hub

Files changed (4) hide show

config.json +3 -3
generation_config.json +1 -1
model.safetensors +2 -2
recipe.yaml +2 -2

config.json CHANGED Viewed

@@ -46,7 +46,7 @@
     "quantization_status": "frozen",
     "sparsity_config": {
       "format": "dense",
-      "global_sparsity": 7.826375935115232,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
@@ -66,8 +66,8 @@
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
-  "transformers_version": "4.40.0",
   "use_cache": true,
   "vocab_size": 32000
 }

     "quantization_status": "frozen",
     "sparsity_config": {
       "format": "dense",
+      "global_sparsity": 7.81941733210164,
       "registry_requires_subclass": false,
       "sparsity_structure": "unstructured"
     }
   "rope_scaling": null,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.2",
   "use_cache": true,
   "vocab_size": 32000
 }

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
-  "transformers_version": "4.40.0"
 }

   "eos_token_id": 2,
   "max_length": 2048,
   "pad_token_id": 0,
+  "transformers_version": "4.40.2"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b77c5cda8b7af33e238158ceadff4e01d904b0ef3e66c19cb04fa6f628b35a1
-size 1231252356

 version https://git-lfs.github.com/spec/v1
+oid sha256:166ff7480ffea0fab716c76f7865509f0cd0d2d3f998f148ba6960aeee610287
+size 1231252716

recipe.yaml CHANGED Viewed

@@ -5,6 +5,6 @@ quant_stage:
       ignore: [lm_head]
       config_groups:
         group_0:
-          weights: {num_bits: 8, type: int, symmetric: true, strategy: tensor}
-          input_activations: {num_bits: 8, type: int, symmetric: true, dynamic: true, strategy: token}
           targets: [Linear]

       ignore: [lm_head]
       config_groups:
         group_0:
+          weights: {num_bits: 8, type: int, symmetric: true}
+          input_activations: {num_bits: 8, type: int, dynamic: true, symmetric: true, strategy: token}
           targets: [Linear]