Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

config.json +53 -11
generation_config.json +1 -1
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +2 -2
model.safetensors.index.json +2 -2

config.json CHANGED Viewed

@@ -1,11 +1,61 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
   "eos_token_id": 128009,
   "hidden_act": "silu",
   "hidden_size": 4096,
@@ -18,20 +68,12 @@
   "num_hidden_layers": 32,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
-  "quantization_config": {
-    "activation_scheme": "static",
-    "ignored_layers": [
-      "lm_head"
-    ],
-    "kv_cache_scheme": "static",
-    "quant_method": "fp8"
-  },
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.42.4",
   "use_cache": true,
   "vocab_size": 128256
-}

 {
+  "_name_or_path": "/home/mgoin/.cache/huggingface/hub/models--meta-llama--Meta-Llama-3-8B-Instruct/snapshots/e1945c40cd546c78e41f1151f4db032b271faeaa",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 128000,
+  "compression_config": {
+    "config_groups": {
+      "group_0": {
+        "input_activations": {
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor",
+          "symmetric": true,
+          "type": "float"
+        },
+        "output_activations": null,
+        "targets": [
+          "Linear"
+        ],
+        "weights": {
+          "block_structure": null,
+          "dynamic": false,
+          "group_size": null,
+          "num_bits": 8,
+          "observer": "minmax",
+          "observer_kwargs": {},
+          "strategy": "tensor",
+          "symmetric": true,
+          "type": "float"
+        }
+      }
+    },
+    "format": "float-quantized",
+    "global_compression_ratio": 1.459016372092587,
+    "ignore": [
+      "lm_head"
+    ],
+    "kv_cache_scheme": {
+      "block_structure": null,
+      "dynamic": false,
+      "group_size": null,
+      "num_bits": 8,
+      "observer": "minmax",
+      "observer_kwargs": {},
+      "strategy": "tensor",
+      "symmetric": true,
+      "type": "float"
+    },
+    "quant_method": "compressed-tensors",
+    "quantization_status": "frozen"
+  },
   "eos_token_id": 128009,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "num_hidden_layers": 32,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
   "rope_scaling": null,
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.1",
   "use_cache": true,
   "vocab_size": 128256
+}

generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.42.4"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.43.1"
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3984136d7c07adb442b2c081a7e78b683f53d5738417a9ec8f5dc9e140751434
-size 4997857192

 version https://git-lfs.github.com/spec/v1
+oid sha256:552b13fa34c1fe48969abf0f41ef7ef263d33ac90500652d3da65200dee40650
+size 4997861162

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61a4a49b2165f2ce2dad57f842c92f467036f37e1ce083d9e22c40f4583814b6
-size 4083429400

 version https://git-lfs.github.com/spec/v1
+oid sha256:082eaa4b1c2fbd0007b9813f5ce2c6d29b24ee5aefcb265c0e6d91aa26e53d3d
+size 4083432614

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 9081202688
   },
   "weight_map": {
     "lm_head.weight": "model-00002-of-00002.safetensors",
@@ -279,7 +279,7 @@
     "model.layers.18.self_attn.v_proj.input_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
-    "model.layers.18.self_attn.v_scale": "model-00001-of-00002.safetensors",
     "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
     "model.layers.19.mlp.down_proj.input_scale": "model-00002-of-00002.safetensors",
     "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",

 {
   "metadata": {
+    "total_size": 9081201664
   },
   "weight_map": {
     "lm_head.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.self_attn.v_proj.input_scale": "model-00002-of-00002.safetensors",
     "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
     "model.layers.18.self_attn.v_proj.weight_scale": "model-00002-of-00002.safetensors",
+    "model.layers.18.self_attn.v_scale": "model-00002-of-00002.safetensors",
     "model.layers.19.input_layernorm.weight": "model-00002-of-00002.safetensors",
     "model.layers.19.mlp.down_proj.input_scale": "model-00002-of-00002.safetensors",
     "model.layers.19.mlp.down_proj.weight": "model-00002-of-00002.safetensors",