{ "api_key": null, "verify_url": "http://johnrachwan.pythonanywhere.com", "smash_config": { "pruners": "[]", "factorizers": "[]", "quantizers": "['llm-int8']", "compilers": "[]", "task": "text_text_generation", "device": "cuda", "cache_dir": "/ceph/hdd/staff/charpent/.cache/models48vm_y7x", "batch_size": 1, "model_name": "42dot/42dot_LLM-PLM-1.3B", "pruning_ratio": 0.0, "n_quantization_bits": 4, "output_deviation": 0.01, "max_batch_size": 1, "qtype_weight": "torch.qint8", "qtype_activation": "torch.quint8", "qobserver": "", "qscheme": "torch.per_tensor_symmetric", "qconfig": "x86", "group_size": 128, "damp_percent": 0.1, "save_load_fn": "bitsandbytes" } }