Alikestocode commited on
Commit
b2bf767
·
1 Parent(s): 2ddfeca

Clarify LLM Compressor optional status - vLLM has native AWQ support

Browse files

- LLM Compressor is only needed for quantizing models, not loading pre-quantized AWQ
- vLLM can load AWQ models natively without llm-compressor
- Update import logic to try both package names
- Comment out llmcompressor from requirements (optional dependency)

Files changed (2) hide show
  1. app.py +24 -4
  2. requirements.txt +3 -1
app.py CHANGED
@@ -36,14 +36,34 @@ except ImportError:
36
  SamplingParams = None
37
  print("Warning: vLLM not available, falling back to Transformers")
38
 
39
- # Try to import LLM Compressor (for quantization)
 
 
40
  try:
41
- from llmcompressor import oneshot
42
- from llmcompressor.modifiers.quantization import AWQModifier
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  LLM_COMPRESSOR_AVAILABLE = True
 
44
  except ImportError:
45
  LLM_COMPRESSOR_AVAILABLE = False
46
- print("Warning: LLM Compressor not available (models should be pre-quantized)")
 
47
 
48
  # Try to import AWQ (deprecated, but kept for fallback compatibility)
49
  # Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor
 
36
  SamplingParams = None
37
  print("Warning: vLLM not available, falling back to Transformers")
38
 
39
+ # Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
40
+ # Note: llm-compressor is only needed for quantizing models, not for loading pre-quantized AWQ models
41
+ # vLLM can load AWQ models natively without llm-compressor
42
  try:
43
+ # Try both package names (llm-compressor and llmcompressor)
44
+ try:
45
+ from llmcompressor import oneshot
46
+ from llmcompressor.modifiers.quantization import AWQModifier
47
+ except ImportError:
48
+ # Try alternative package name
49
+ import sys
50
+ import subprocess
51
+ # Package might be named llm-compressor (with hyphen)
52
+ try:
53
+ import importlib.util
54
+ spec = importlib.util.find_spec("llm_compressor")
55
+ if spec is None:
56
+ raise ImportError("llm-compressor not found")
57
+ from llm_compressor import oneshot
58
+ from llm_compressor.modifiers.quantization import AWQModifier
59
+ except ImportError:
60
+ raise ImportError("Neither llmcompressor nor llm-compressor found")
61
  LLM_COMPRESSOR_AVAILABLE = True
62
+ print("Info: LLM Compressor available (for quantizing models)")
63
  except ImportError:
64
  LLM_COMPRESSOR_AVAILABLE = False
65
+ # This is fine - vLLM has native AWQ support, so we don't need llm-compressor for loading
66
+ print("Info: LLM Compressor not available (not needed - vLLM has native AWQ support for pre-quantized models)")
67
 
68
  # Try to import AWQ (deprecated, but kept for fallback compatibility)
69
  # Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor
requirements.txt CHANGED
@@ -8,7 +8,9 @@ spaces
8
  sentencepiece
9
  accelerate
10
  vllm>=0.6.0
11
- llmcompressor>=0.1.0
 
 
12
  autoawq
13
  flash-attn>=2.5.0
14
  timm
 
8
  sentencepiece
9
  accelerate
10
  vllm>=0.6.0
11
+ # llm-compressor is optional - only needed for quantizing models, not loading pre-quantized AWQ
12
+ # vLLM has native AWQ support built-in
13
+ # llmcompressor>=0.1.0 # Commented out - not needed for loading pre-quantized models
14
  autoawq
15
  flash-attn>=2.5.0
16
  timm