{ "api_key": null, "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": { "pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "['quanto']", "weight_quantization_bits": "float8", "output_deviation": 0.005, "compilers": "None", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "cache_dir": "/ceph/hdd/staff/charpent/.cache/models_abnk3h4", "batch_size": 1, "model_name": "NCSOFT/Llama-3-OffsetBias-8B", "task": "text_text_generation", "max_batch_size": 1, "qtype_weight": "torch.qint8", "qtype_activation": "torch.quint8", "qobserver": "", "qscheme": "torch.per_tensor_symmetric", "qconfig": "x86", "group_size": 128, "damp_percent": 0.1, "save_load_fn": "torch" } }