llm-compressor-my-repo / test_final_solution.py
n00b001's picture
save
d95ff5b unverified
#!/usr/bin/env python
"""
Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models
"""
from app import get_quantization_recipe
def test_qwen2_5_vl_compatible_methods():
"""
Test all methods that should work with Qwen2.5-VL models
"""
print("Testing quantization methods compatible with Qwen2.5-VL models...")
# Methods that should work
compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
all_passed = True
for method in compatible_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"βœ“ {method} works with Qwen2_5_VLForConditionalGeneration")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" - Uses sequential onloading: {recipe[0].sequential_targets}")
print(f" - Ignore patterns: {recipe[0].ignore}")
except Exception as e:
print(f"βœ— {method} failed: {e}")
all_passed = False
return all_passed
def test_awq_incompatibility():
"""
Test that AWQ properly fails for Qwen2.5-VL models
"""
print("\nTesting AWQ incompatibility with Qwen2.5-VL models...")
try:
recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration")
print("βœ— AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)")
return False
except ValueError as e:
if "not compatible" in str(e) and "rotary positional embeddings" in str(e):
print(f"βœ“ AWQ properly fails for Qwen2.5-VL: {e}")
return True
else:
print(f"βœ— AWQ failed but with wrong error: {e}")
return False
def test_awq_still_works_for_llama():
"""
Test that AWQ still works for Llama models
"""
print("\nTesting AWQ still works for Llama models...")
try:
recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
print(f"βœ“ AWQ still works for LlamaForCausalLM")
print(f" - Ignore patterns: {recipe[0].ignore}")
return True
except Exception as e:
print(f"βœ— AWQ failed for LlamaForCausalLM: {e}")
return False
def test_target_model():
"""
Test with the specific target model
"""
print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration")
# All methods except AWQ should work
methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]
success_count = 0
for method in methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
success_count += 1
except Exception as e:
print(f"Method {method} failed: {e}")
print(f"βœ“ {success_count}/{len(methods)} methods work for target model")
return success_count == len(methods)
if __name__ == "__main__":
print("Final verification after fixing AWQ incompatibility issue\n")
test1 = test_qwen2_5_vl_compatible_methods()
test2 = test_awq_incompatibility()
test3 = test_awq_still_works_for_llama()
test4 = test_target_model()
print(f"\n{'='*60}")
if test1 and test2 and test3 and test4:
print("βœ… ALL TESTS PASSED")
print("\nSOLUTION SUMMARY:")
print("β€’ AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility")
print("β€’ All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL")
print("β€’ AWQ still works for Llama models as expected")
print("β€’ Sequential onloading is preserved for memory efficiency")
print("β€’ Users will get clear error messages when trying incompatible methods")
else:
print("❌ SOME TESTS FAILED")
print(f"{'='*60}")