nickfraser
commited on
Commit
•
cfd94d7
1
Parent(s):
3fea540
Added models that are fully quantized with FP8.
Browse files- .gitattributes +4 -0
- linear_conv_fp8_sdpa_fp16_eq_bl/args.json +1 -0
- linear_conv_fp8_sdpa_fp16_eq_bl/params.safetensors +3 -0
- linear_conv_fp8_sdpa_fp16_eq_bl/quant_params.json +3 -0
- linear_conv_fp8_sdpa_fp16_eq_bl/vae.safetensors +3 -0
- linear_conv_fp8_sdpa_fp16_no_eq_bl/args.json +1 -0
- linear_conv_fp8_sdpa_fp16_no_eq_bl/params.safetensors +3 -0
- linear_conv_fp8_sdpa_fp16_no_eq_bl/quant_params.json +3 -0
- linear_conv_fp8_sdpa_fp16_no_eq_bl/vae.safetensors +3 -0
- linear_conv_fp8_sdpa_fp8_eq_bl/args.json +1 -0
- linear_conv_fp8_sdpa_fp8_eq_bl/params.safetensors +3 -0
- linear_conv_fp8_sdpa_fp8_eq_bl/quant_params.json +3 -0
- linear_conv_fp8_sdpa_fp8_eq_bl/vae.safetensors +3 -0
- linear_conv_fp8_sdpa_fp8_no_eq_bl/args.json +1 -0
- linear_conv_fp8_sdpa_fp8_no_eq_bl/params.safetensors +3 -0
- linear_conv_fp8_sdpa_fp8_no_eq_bl/quant_params.json +3 -0
- linear_conv_fp8_sdpa_fp8_no_eq_bl/vae.safetensors +3 -0
.gitattributes
CHANGED
@@ -36,3 +36,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
36 |
brevitas/quant_param.json filter=lfs diff=lfs merge=lfs -text
|
37 |
all_sym_8_calib10/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
38 |
all_linear_sym_8_calib10/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
36 |
brevitas/quant_param.json filter=lfs diff=lfs merge=lfs -text
|
37 |
all_sym_8_calib10/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
38 |
all_linear_sym_8_calib10/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
39 |
+
linear_conv_fp8_sdpa_fp16_eq_bl/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
40 |
+
linear_conv_fp8_sdpa_fp16_no_eq_bl/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
41 |
+
linear_conv_fp8_sdpa_fp8_eq_bl/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
42 |
+
linear_conv_fp8_sdpa_fp8_no_eq_bl/quant_params.json filter=lfs diff=lfs merge=lfs -text
|
linear_conv_fp8_sdpa_fp16_eq_bl/args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:2", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14.0, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "linear_output_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": true, "quantize_input_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "quantize_sdp": false, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp16_eq_bl/params.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0243ea64b169f7cae6c55faf90478b403873a5d4d8078b36df97ee1bd43c3809
|
3 |
+
size 5136147144
|
linear_conv_fp8_sdpa_fp16_eq_bl/quant_params.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72efbde246c6776d50d29044b91692327bb4c940eaa39cc9bc80b6a2c48cccb7
|
3 |
+
size 68165293
|
linear_conv_fp8_sdpa_fp16_eq_bl/vae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8539dc4b7f19a677d55f5c485c5ca746669fd687b75bff3e94d9957068bdd1d6
|
3 |
+
size 167335310
|
linear_conv_fp8_sdpa_fp16_no_eq_bl/args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:0", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14.0, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "linear_output_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": false, "quantize_input_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "quantize_sdp": false, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp16_no_eq_bl/params.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0e97fe828e9d8f626d70899c4bcffc57493e25259963152fd5a8c2b2c67c97f
|
3 |
+
size 5136147144
|
linear_conv_fp8_sdpa_fp16_no_eq_bl/quant_params.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a2b7eb9ee758a41b85021d1e3805f35b3b6abdd17b1d0b797a2127188c2dcd2
|
3 |
+
size 68767048
|
linear_conv_fp8_sdpa_fp16_no_eq_bl/vae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8539dc4b7f19a677d55f5c485c5ca746669fd687b75bff3e94d9957068bdd1d6
|
3 |
+
size 167335310
|
linear_conv_fp8_sdpa_fp8_eq_bl/args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:3", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14.0, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "linear_output_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": true, "quantize_input_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "quantize_sdp": true, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp8_eq_bl/params.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b56a759e9638c44da8e00e63dea1ce2e7c243009b6b2b5b1dd657be88388cca3
|
3 |
+
size 5136147144
|
linear_conv_fp8_sdpa_fp8_eq_bl/quant_params.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:504e53446d9565ebc8573134559dbf89225741015974c7bb4aae9a5a10d45ada
|
3 |
+
size 68164956
|
linear_conv_fp8_sdpa_fp8_eq_bl/vae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8539dc4b7f19a677d55f5c485c5ca746669fd687b75bff3e94d9957068bdd1d6
|
3 |
+
size 167335310
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/args.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:1", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14.0, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "linear_output_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": false, "quantize_input_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "quantize_sdp": true, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/params.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1c5dfd7e6bb99d53f53ea9abb732853c4093e09e1d6c7439e3640337c69799e
|
3 |
+
size 5136147144
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/quant_params.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b895ed3ef893f203fa28e076f9d9bc9c2097a551e8b78d90c6f440ffaaf064de
|
3 |
+
size 68766642
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/vae.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8539dc4b7f19a677d55f5c485c5ca746669fd687b75bff3e94d9957068bdd1d6
|
3 |
+
size 167335310
|