{ "api_key": null, "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": { "pruners": "None", "pruning_ratio": 0.0, "factorizers": "None", "quantizers": "['llm-int8']", "weight_quantization_bits": 4, "output_deviation": 0.005, "compilers": "None", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "cache_dir": "/ceph/hdd/staff/charpent/.cache/models3e0zcli_", "batch_size": 1, "model_name": "unsloth/llama-3-8b-Instruct", "task": "text_text_generation", "max_batch_size": 1, "qtype_weight": "torch.qint8", "qtype_activation": "torch.quint8", "qobserver": "", "qscheme": "torch.per_tensor_symmetric", "qconfig": "x86", "group_size": 128, "damp_percent": 0.1, "save_load_fn": "bitsandbytes" } }