Spaces:
Sleeping
Sleeping
Commit
·
b2bf767
1
Parent(s):
2ddfeca
Clarify LLM Compressor optional status - vLLM has native AWQ support
Browse files- LLM Compressor is only needed for quantizing models, not loading pre-quantized AWQ
- vLLM can load AWQ models natively without llm-compressor
- Update import logic to try both package names
- Comment out llmcompressor from requirements (optional dependency)
- app.py +24 -4
- requirements.txt +3 -1
app.py
CHANGED
|
@@ -36,14 +36,34 @@ except ImportError:
|
|
| 36 |
SamplingParams = None
|
| 37 |
print("Warning: vLLM not available, falling back to Transformers")
|
| 38 |
|
| 39 |
-
# Try to import LLM Compressor (for quantization)
|
|
|
|
|
|
|
| 40 |
try:
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
LLM_COMPRESSOR_AVAILABLE = True
|
|
|
|
| 44 |
except ImportError:
|
| 45 |
LLM_COMPRESSOR_AVAILABLE = False
|
| 46 |
-
|
|
|
|
| 47 |
|
| 48 |
# Try to import AWQ (deprecated, but kept for fallback compatibility)
|
| 49 |
# Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor
|
|
|
|
| 36 |
SamplingParams = None
|
| 37 |
print("Warning: vLLM not available, falling back to Transformers")
|
| 38 |
|
| 39 |
+
# Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
|
| 40 |
+
# Note: llm-compressor is only needed for quantizing models, not for loading pre-quantized AWQ models
|
| 41 |
+
# vLLM can load AWQ models natively without llm-compressor
|
| 42 |
try:
|
| 43 |
+
# Try both package names (llm-compressor and llmcompressor)
|
| 44 |
+
try:
|
| 45 |
+
from llmcompressor import oneshot
|
| 46 |
+
from llmcompressor.modifiers.quantization import AWQModifier
|
| 47 |
+
except ImportError:
|
| 48 |
+
# Try alternative package name
|
| 49 |
+
import sys
|
| 50 |
+
import subprocess
|
| 51 |
+
# Package might be named llm-compressor (with hyphen)
|
| 52 |
+
try:
|
| 53 |
+
import importlib.util
|
| 54 |
+
spec = importlib.util.find_spec("llm_compressor")
|
| 55 |
+
if spec is None:
|
| 56 |
+
raise ImportError("llm-compressor not found")
|
| 57 |
+
from llm_compressor import oneshot
|
| 58 |
+
from llm_compressor.modifiers.quantization import AWQModifier
|
| 59 |
+
except ImportError:
|
| 60 |
+
raise ImportError("Neither llmcompressor nor llm-compressor found")
|
| 61 |
LLM_COMPRESSOR_AVAILABLE = True
|
| 62 |
+
print("Info: LLM Compressor available (for quantizing models)")
|
| 63 |
except ImportError:
|
| 64 |
LLM_COMPRESSOR_AVAILABLE = False
|
| 65 |
+
# This is fine - vLLM has native AWQ support, so we don't need llm-compressor for loading
|
| 66 |
+
print("Info: LLM Compressor not available (not needed - vLLM has native AWQ support for pre-quantized models)")
|
| 67 |
|
| 68 |
# Try to import AWQ (deprecated, but kept for fallback compatibility)
|
| 69 |
# Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor
|
requirements.txt
CHANGED
|
@@ -8,7 +8,9 @@ spaces
|
|
| 8 |
sentencepiece
|
| 9 |
accelerate
|
| 10 |
vllm>=0.6.0
|
| 11 |
-
|
|
|
|
|
|
|
| 12 |
autoawq
|
| 13 |
flash-attn>=2.5.0
|
| 14 |
timm
|
|
|
|
| 8 |
sentencepiece
|
| 9 |
accelerate
|
| 10 |
vllm>=0.6.0
|
| 11 |
+
# llm-compressor is optional - only needed for quantizing models, not loading pre-quantized AWQ
|
| 12 |
+
# vLLM has native AWQ support built-in
|
| 13 |
+
# llmcompressor>=0.1.0 # Commented out - not needed for loading pre-quantized models
|
| 14 |
autoawq
|
| 15 |
flash-attn>=2.5.0
|
| 16 |
timm
|