update to autoround format

Signed-off-by: wenhuach <wenhuach87@gmail.com>

Files changed (2) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/data5/models/Meta-Llama-3.1-70B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -26,22 +26,22 @@
   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.1",
-    "batch_size": 1,
     "bits": 4,
-    "damp_percent": 0.01,
     "data_type": "int",
-    "desc_act": false,
     "enable_minmax_tuning": true,
     "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
     "gradient_accumulate_steps": 1,
     "group_size": 128,
-    "iters": 1,
     "low_gpu_mem_usage": true,
-    "lr": 1.0,
-    "minmax_lr": 1.0,
-    "nsamples": 1,
-    "quant_method": "gptq",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
@@ -128,8 +128,7 @@
         "model.layers.78",
         "model.layers.79"
       ]
-    ],
-    "true_sequential": false
   },
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
@@ -141,7 +140,7 @@
   },
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
-  "torch_dtype": "float16",
   "transformers_version": "4.46.1",
   "use_cache": true,
   "vocab_size": 128256

 {
+  "_name_or_path": ".",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "quantization_config": {
     "amp": true,
     "autoround_version": "0.4.1",
+    "backend": "auto_round:gptq:exllamav2",
+    "batch_size": 8,
     "bits": 4,
     "data_type": "int",
+    "dataset": "NeelNanda/pile-10k",
     "enable_minmax_tuning": true,
     "enable_norm_bias_tuning": false,
     "enable_quanted_input": true,
     "gradient_accumulate_steps": 1,
     "group_size": 128,
+    "iters": 1000,
     "low_gpu_mem_usage": true,
+    "lr": 0.001,
+    "minmax_lr": 0.001,
+    "nsamples": 512,
+    "quant_method": "intel/auto-round",
     "scale_dtype": "torch.float16",
     "seqlen": 2048,
     "sym": true,
         "model.layers.78",
         "model.layers.79"
       ]
+    ]
   },
   "rms_norm_eps": 1e-05,
   "rope_scaling": {
   },
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
   "transformers_version": "4.46.1",
   "use_cache": true,
   "vocab_size": 128256

quantize_config.json → quantization_config.json RENAMED Viewed

@@ -6,14 +6,14 @@
   "enable_quanted_input": true,
   "enable_minmax_tuning": true,
   "seqlen": 2048,
-  "batch_size": 1,
   "scale_dtype": "torch.float16",
-  "lr": 1.0,
-  "minmax_lr": 1.0,
   "gradient_accumulate_steps": 1,
-  "iters": 1,
   "amp": true,
-  "nsamples": 1,
   "low_gpu_mem_usage": true,
   "to_quant_block_names": [
     [
@@ -100,9 +100,8 @@
     ]
   ],
   "enable_norm_bias_tuning": false,
   "autoround_version": "0.4.1",
-  "quant_method": "gptq",
-  "desc_act": false,
-  "true_sequential": false,
-  "damp_percent": 0.01
 }

   "enable_quanted_input": true,
   "enable_minmax_tuning": true,
   "seqlen": 2048,
+  "batch_size": 8,
   "scale_dtype": "torch.float16",
+  "lr": 0.001,
+  "minmax_lr": 0.001,
   "gradient_accumulate_steps": 1,
+  "iters": 1000,
   "amp": true,
+  "nsamples": 512,
   "low_gpu_mem_usage": true,
   "to_quant_block_names": [
     [
     ]
   ],
   "enable_norm_bias_tuning": false,
+  "dataset": "NeelNanda/pile-10k",
   "autoround_version": "0.4.1",
+  "quant_method": "intel/auto-round",
+  "backend": "auto_round:gptq:exllamav2"
 }