GPTQModel don't support VLLM backend for this model.
#4
by
Anditty
- opened
model = GPTQModel.from_quantized(model_name, trust_remote_code=True, device_map="sequential", max_memory=max_memory, torch_dtype=torch.float16, backend=BACKEND.VLLM)
[rank0]: assert self.quant_method is not None
[rank0]: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
[rank0]: AssertionError