zxdu20 commited on
Commit
3218e92
1 Parent(s): 216185d

Remove assert in load_cpu_kernel

Browse files
Files changed (1) hide show
  1. quantization.py +2 -4
quantization.py CHANGED
@@ -442,7 +442,6 @@ class QuantizedEmbedding(Embedding): # TODO: backward, check empty_init
442
  def load_cpu_kernel(**kwargs):
443
  global cpu_kernels
444
  cpu_kernels = CPUKernel(**kwargs)
445
- assert cpu_kernels.load
446
 
447
 
448
  def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
@@ -453,9 +452,8 @@ def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=F
453
  dense_h_to_4h_quantization_cache = None
454
  dense_4h_to_h_quantization_cache = None
455
 
456
- try:
457
- load_cpu_kernel(**kwargs)
458
- except:
459
  if kernels is None: # CUDA kernels failed
460
  print("Cannot load cpu or cuda kernel, quantization failed:")
461
  assert kernels is not None
442
  def load_cpu_kernel(**kwargs):
443
  global cpu_kernels
444
  cpu_kernels = CPUKernel(**kwargs)
 
445
 
446
 
447
  def quantize(model, weight_bit_width, use_quantization_cache=False, empty_init=False, **kwargs):
452
  dense_h_to_4h_quantization_cache = None
453
  dense_4h_to_h_quantization_cache = None
454
 
455
+ load_cpu_kernel(**kwargs)
456
+ if not cpu_kernels.load:
 
457
  if kernels is None: # CUDA kernels failed
458
  print("Cannot load cpu or cuda kernel, quantization failed:")
459
  assert kernels is not None