Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 10, 2025

Commit

b2bf767

1 Parent(s): 2ddfeca

Clarify LLM Compressor optional status - vLLM has native AWQ support

- LLM Compressor is only needed for quantizing models, not loading pre-quantized AWQ
- vLLM can load AWQ models natively without llm-compressor
- Update import logic to try both package names
- Comment out llmcompressor from requirements (optional dependency)

Files changed (2) hide show

app.py +24 -4
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -36,14 +36,34 @@ except ImportError:
     SamplingParams = None
     print("Warning: vLLM not available, falling back to Transformers")
-# Try to import LLM Compressor (for quantization)
 try:
-    from llmcompressor import oneshot
-    from llmcompressor.modifiers.quantization import AWQModifier
     LLM_COMPRESSOR_AVAILABLE = True
 except ImportError:
     LLM_COMPRESSOR_AVAILABLE = False
-    print("Warning: LLM Compressor not available (models should be pre-quantized)")
 # Try to import AWQ (deprecated, but kept for fallback compatibility)
 # Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor

     SamplingParams = None
     print("Warning: vLLM not available, falling back to Transformers")
+# Try to import LLM Compressor (for quantization - optional, vLLM has native AWQ support)
+# Note: llm-compressor is only needed for quantizing models, not for loading pre-quantized AWQ models
+# vLLM can load AWQ models natively without llm-compressor
 try:
+    # Try both package names (llm-compressor and llmcompressor)
+    try:
+        from llmcompressor import oneshot
+        from llmcompressor.modifiers.quantization import AWQModifier
+    except ImportError:
+        # Try alternative package name
+        import sys
+        import subprocess
+        # Package might be named llm-compressor (with hyphen)
+        try:
+            import importlib.util
+            spec = importlib.util.find_spec("llm_compressor")
+            if spec is None:
+                raise ImportError("llm-compressor not found")
+            from llm_compressor import oneshot
+            from llm_compressor.modifiers.quantization import AWQModifier
+        except ImportError:
+            raise ImportError("Neither llmcompressor nor llm-compressor found")
     LLM_COMPRESSOR_AVAILABLE = True
+    print("Info: LLM Compressor available (for quantizing models)")
 except ImportError:
     LLM_COMPRESSOR_AVAILABLE = False
+    # This is fine - vLLM has native AWQ support, so we don't need llm-compressor for loading
+    print("Info: LLM Compressor not available (not needed - vLLM has native AWQ support for pre-quantized models)")
 # Try to import AWQ (deprecated, but kept for fallback compatibility)
 # Note: AutoAWQ is deprecated; vLLM handles AWQ natively via llm-compressor

requirements.txt CHANGED Viewed

@@ -8,7 +8,9 @@ spaces
 sentencepiece
 accelerate
 vllm>=0.6.0
-llmcompressor>=0.1.0
 autoawq
 flash-attn>=2.5.0
 timm

 sentencepiece
 accelerate
 vllm>=0.6.0
+# llm-compressor is optional - only needed for quantizing models, not loading pre-quantized AWQ
+# vLLM has native AWQ support built-in
+# llmcompressor>=0.1.0  # Commented out - not needed for loading pre-quantized models
 autoawq
 flash-attn>=2.5.0
 timm