{ | |
"api_key": null, | |
"verify_url": "http://johnrachwan.pythonanywhere.com", | |
"smash_config": { | |
"pruners": "None", | |
"pruning_ratio": 0.0, | |
"factorizers": "None", | |
"quantizers": "['quanto']", | |
"weight_quantization_bits": "int8", | |
"output_deviation": 0.005, | |
"compilers": "None", | |
"static_batch": true, | |
"static_shape": true, | |
"controlnet": "None", | |
"unet_dim": 4, | |
"device": "cuda", | |
"cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsy76fk9el", | |
"batch_size": 1, | |
"model_name": "davidkim205/Ko-Llama-3-8B-Instruct", | |
"task": "text_text_generation", | |
"max_batch_size": 1, | |
"qtype_weight": "torch.qint8", | |
"qtype_activation": "torch.quint8", | |
"qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>", | |
"qscheme": "torch.per_tensor_symmetric", | |
"qconfig": "x86", | |
"group_size": 128, | |
"damp_percent": 0.1, | |
"save_load_fn": "torch" | |
} | |
} |