{ "api_key": null, "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": { "pruners": "[]", "pruning_ratio": 0.0, "factorizers": "[]", "quantizers": "['gptq']", "n_quantization_bits": 4, "output_deviation": 0.005, "compilers": "[]", "static_batch": true, "static_shape": true, "controlnet": "None", "unet_dim": 4, "device": "cuda", "cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsdqetji2r", "batch_size": 1, "model_name": "kittn/mistral-7B-v0.1-hf", "max_batch_size": 1, "save_dir": "/ceph/hdd/staff/charpent/.cache/modelsbhuxvpss", "qtype_weight": "torch.qint8", "qtype_activation": "torch.quint8", "qobserver": "", "qscheme": "torch.per_tensor_symmetric", "qconfig": "x86", "group_size": 128, "damp_percent": 0.1, "save_load_fn": "hf-gptq" } }