Spaces:

xuxw98
/

TAPA

Runtime error

xuxw98 commited on Jul 1, 2023

Commit

c8ac827

•

1 Parent(s): 2b6f2c0

Update lit_llama/utils.py

Files changed (1) hide show

lit_llama/utils.py CHANGED Viewed

@@ -89,13 +89,13 @@ class EmptyInitOnDevice(torch.overrides.TorchFunctionMode):
         if self.quantization_mode == 'llm.int8':
             if device.type != "cuda":
                 raise ValueError("Quantization is only supported on the GPU.")
-            from lit_llama.quantization import Linear8bitLt
             self.quantized_linear_cls = Linear8bitLt
         elif self.quantization_mode == 'gptq.int4':
-            from lit_llama.quantization import ColBlockQuantizedLinear
             self.quantized_linear_cls = functools.partial(ColBlockQuantizedLinear, bits=4, tile_cols=-1)
         elif self.quantization_mode == 'gptq.int8':
-            from lit_llama.quantization import ColBlockQuantizedLinear
             self.quantized_linear_cls = functools.partial(ColBlockQuantizedLinear, bits=8, tile_cols=-1)
         elif self.quantization_mode is not None:
             raise RuntimeError(f"unknown quantization mode {self.quantization_mode}")

         if self.quantization_mode == 'llm.int8':
             if device.type != "cuda":
                 raise ValueError("Quantization is only supported on the GPU.")
+            from .quantization import Linear8bitLt
             self.quantized_linear_cls = Linear8bitLt
         elif self.quantization_mode == 'gptq.int4':
+            from .quantization import ColBlockQuantizedLinear
             self.quantized_linear_cls = functools.partial(ColBlockQuantizedLinear, bits=4, tile_cols=-1)
         elif self.quantization_mode == 'gptq.int8':
+            from .quantization import ColBlockQuantizedLinear
             self.quantized_linear_cls = functools.partial(ColBlockQuantizedLinear, bits=8, tile_cols=-1)
         elif self.quantization_mode is not None:
             raise RuntimeError(f"unknown quantization mode {self.quantization_mode}")