#!/usr/bin/env python """ Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models """ from app import get_quantization_recipe def test_qwen2_5_vl_compatible_methods(): """ Test all methods that should work with Qwen2.5-VL models """ print("Testing quantization methods compatible with Qwen2.5-VL models...") # Methods that should work compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"] all_passed = True for method in compatible_methods: try: recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") print(f"✓ {method} works with Qwen2_5_VLForConditionalGeneration") if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: print(f" - Uses sequential onloading: {recipe[0].sequential_targets}") print(f" - Ignore patterns: {recipe[0].ignore}") except Exception as e: print(f"✗ {method} failed: {e}") all_passed = False return all_passed def test_awq_incompatibility(): """ Test that AWQ properly fails for Qwen2.5-VL models """ print("\nTesting AWQ incompatibility with Qwen2.5-VL models...") try: recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration") print("✗ AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)") return False except ValueError as e: if "not compatible" in str(e) and "rotary positional embeddings" in str(e): print(f"✓ AWQ properly fails for Qwen2.5-VL: {e}") return True else: print(f"✗ AWQ failed but with wrong error: {e}") return False def test_awq_still_works_for_llama(): """ Test that AWQ still works for Llama models """ print("\nTesting AWQ still works for Llama models...") try: recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM") print(f"✓ AWQ still works for LlamaForCausalLM") print(f" - Ignore patterns: {recipe[0].ignore}") return True except Exception as e: print(f"✗ AWQ failed for LlamaForCausalLM: {e}") return False def test_target_model(): """ Test with the specific target model """ print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration") # All methods except AWQ should work methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"] success_count = 0 for method in methods: try: recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") success_count += 1 except Exception as e: print(f"Method {method} failed: {e}") print(f"✓ {success_count}/{len(methods)} methods work for target model") return success_count == len(methods) if __name__ == "__main__": print("Final verification after fixing AWQ incompatibility issue\n") test1 = test_qwen2_5_vl_compatible_methods() test2 = test_awq_incompatibility() test3 = test_awq_still_works_for_llama() test4 = test_target_model() print(f"\n{'='*60}") if test1 and test2 and test3 and test4: print("✅ ALL TESTS PASSED") print("\nSOLUTION SUMMARY:") print("• AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility") print("• All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL") print("• AWQ still works for Llama models as expected") print("• Sequential onloading is preserved for memory efficiency") print("• Users will get clear error messages when trying incompatible methods") else: print("❌ SOME TESTS FAILED") print(f"{'='*60}")