fxmarty commited on
Commit
bd1d16e
1 Parent(s): 04c37fa

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +47 -41
config.json CHANGED
@@ -21,48 +21,54 @@
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
- "algo_config": null,
25
- "exclude": [
26
- "lm_head"
27
- ],
28
- "export": {
29
- "kv_cache_group": [],
30
- "pack_method": "reorder",
31
- "weight_format": "real_quantized",
32
- "weight_merge_groups": null
33
- },
34
- "global_quant_config": {
35
- "bias": null,
36
- "input_tensors": {
37
- "ch_axis": null,
38
- "dtype": "fp8_e4m3",
39
- "group_size": null,
40
- "is_dynamic": false,
41
- "observer_cls": "PerTensorMinMaxObserver",
42
- "qscheme": "per_tensor",
43
- "round_method": null,
44
- "scale_type": null,
45
- "symmetric": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
- "output_tensors": null,
48
- "target_device": null,
49
- "weight": {
50
- "ch_axis": null,
51
- "dtype": "fp8_e4m3",
52
- "group_size": null,
53
- "is_dynamic": false,
54
- "observer_cls": "PerTensorMinMaxObserver",
55
- "qscheme": "per_tensor",
56
- "round_method": null,
57
- "scale_type": null,
58
- "symmetric": null
59
- }
60
- },
61
- "layer_quant_config": {},
62
- "layer_type_quant_config": {},
63
- "pack_method": "reorder",
64
- "quant_method": "quark",
65
- "quant_mode": 1
66
  },
67
  "rms_norm_eps": 1e-06,
68
  "rope_scaling": null,
 
21
  "pad_token_id": -1,
22
  "pretraining_tp": 1,
23
  "quantization_config": {
24
+ "activation_scheme": "static",
25
+ "kv_cache_scheme": null,
26
+ "library": "quark",
27
+ "quant_method": "fp8",
28
+ "quark_config": {
29
+ "algo_config": null,
30
+ "exclude": [
31
+ "lm_head"
32
+ ],
33
+ "export": {
34
+ "kv_cache_group": [],
35
+ "pack_method": "reorder",
36
+ "weight_format": "real_quantized",
37
+ "weight_merge_groups": null
38
+ },
39
+ "global_quant_config": {
40
+ "bias": null,
41
+ "input_tensors": {
42
+ "ch_axis": null,
43
+ "dtype": "fp8_e4m3",
44
+ "group_size": null,
45
+ "is_dynamic": false,
46
+ "observer_cls": "PerTensorMinMaxObserver",
47
+ "qscheme": "per_tensor",
48
+ "round_method": null,
49
+ "scale_type": null,
50
+ "symmetric": null
51
+ },
52
+ "output_tensors": null,
53
+ "target_device": null,
54
+ "weight": {
55
+ "ch_axis": null,
56
+ "dtype": "fp8_e4m3",
57
+ "group_size": null,
58
+ "is_dynamic": false,
59
+ "observer_cls": "PerTensorMinMaxObserver",
60
+ "qscheme": "per_tensor",
61
+ "round_method": null,
62
+ "scale_type": null,
63
+ "symmetric": null
64
+ }
65
  },
66
+ "layer_quant_config": {},
67
+ "layer_type_quant_config": {},
68
+ "pack_method": "reorder",
69
+ "quant_method": "quark",
70
+ "quant_mode": 1
71
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  },
73
  "rms_norm_eps": 1e-06,
74
  "rope_scaling": null,