{ | |
"api_key": null, | |
"verify_url": "http://johnrachwan.pythonanywhere.com", | |
"smash_config": { | |
"pruners": "None", | |
"factorizers": "None", | |
"quantizers": "['gptq']", | |
"compilers": "None", | |
"task": "text_text_generation", | |
"device": "cuda", | |
"cache_dir": "/ceph/hdd/staff/charpent/.cache/modelshh4nlu_0", | |
"batch_size": 1, | |
"model_name": "princeton-nlp/Sheared-LLaMA-1.3B", | |
"pruning_ratio": 0.0, | |
"n_quantization_bits": 8, | |
"output_deviation": 0.005, | |
"max_batch_size": 1, | |
"qtype_weight": "torch.qint8", | |
"qtype_activation": "torch.quint8", | |
"qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>", | |
"qscheme": "torch.per_tensor_symmetric", | |
"qconfig": "x86", | |
"group_size": 128, | |
"damp_percent": 0.1, | |
"save_load_fn": "hf-gptq" | |
} | |
} |