nickfraser
commited on
Commit
•
fa0155f
1
Parent(s):
cfd94d7
Updated sdpa fp8 models
Browse files
linear_conv_fp8_sdpa_fp8_eq_bl/args.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:3", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:3", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "sdpa_bit_width": 8, "sdpa_param_method": "stats", "sdpa_scale_stats_op": "minmax", "sdpa_zp_stats_op": "minmax", "sdpa_scale_precision": "float_scale", "sdpa_quant_type": "sym", "sdpa_quant_format": "float_fnuz_e4m3", "sdpa_quant_granularity": "per_tensor", "sdpa_scale_type": "static", "quant_blacklist": ["time_emb", "conv_in", "conv_out"], "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": true, "quantize_input_zero_point": false, "quantize_sdpa_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp8_eq_bl/quant_params.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b69b47b50cdf458095187e5ee76886b04341254199cc216af744e8f96a18041f
|
3 |
+
size 68290748
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/args.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:1", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14
|
|
|
1 |
+
{"model": "stabilityai/stable-diffusion-xl-base-1.0", "device": "cuda:1", "batch_size": 1, "prompt": 2, "calibration_prompt": 500, "calibration_prompt_path": "./captions.tsv", "checkpoint_name": "unet.ckpt", "load_checkpoint": null, "path_to_latents": "./coco/latents/latents.pt", "path_to_coco": "./coco/", "resolution": 1024, "guidance_scale": 8.0, "calibration_steps": 14, "output_path": ".", "quantize": true, "activation_equalization": true, "gptq": false, "bias_correction": true, "dtype": "float32", "attention_slicing": false, "export_target": "params_only", "export_weight_q_node": false, "conv_weight_bit_width": 8, "linear_weight_bit_width": 8, "conv_input_bit_width": 8, "act_eq_alpha": 0.9, "linear_input_bit_width": 8, "weight_param_method": "stats", "input_param_method": "stats", "input_scale_stats_op": "minmax", "input_zp_stats_op": "minmax", "weight_scale_precision": "float_scale", "input_scale_precision": "float_scale", "weight_quant_type": "sym", "input_quant_type": "sym", "weight_quant_format": "float_fnuz_e4m3", "input_quant_format": "float_fnuz_e4m3", "weight_quant_granularity": "per_channel", "input_quant_granularity": "per_tensor", "input_scale_type": "static", "weight_group_size": 16, "sdpa_bit_width": 8, "sdpa_param_method": "stats", "sdpa_scale_stats_op": "minmax", "sdpa_zp_stats_op": "minmax", "sdpa_scale_precision": "float_scale", "sdpa_quant_type": "sym", "sdpa_quant_format": "float_fnuz_e4m3", "sdpa_quant_granularity": "per_tensor", "sdpa_scale_type": "static", "quant_blacklist": ["time_emb", "conv_in", "conv_out"], "quantize_weight_zero_point": true, "exclude_blacklist_act_eq": false, "quantize_input_zero_point": false, "quantize_sdpa_zero_point": false, "export_cpu_float32": false, "use_mlperf_inference": true, "use_negative_prompts": true, "dry_run": false, "override_conv_quant_config": false, "vae_fp16_fix": true, "share_qkv_quant": true}
|
linear_conv_fp8_sdpa_fp8_no_eq_bl/quant_params.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d2a0d8d72db3b1de02c0a8afade8da52544015efa10a8aeefcc4bafd9ac5afe
|
3 |
+
size 68892987
|