Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models | |
| """ | |
| from app import get_quantization_recipe | |
| def test_qwen2_5_vl_compatible_methods(): | |
| """ | |
| Test all methods that should work with Qwen2.5-VL models | |
| """ | |
| print("Testing quantization methods compatible with Qwen2.5-VL models...") | |
| # Methods that should work | |
| compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"] | |
| all_passed = True | |
| for method in compatible_methods: | |
| try: | |
| recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") | |
| print(f"β {method} works with Qwen2_5_VLForConditionalGeneration") | |
| if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: | |
| print(f" - Uses sequential onloading: {recipe[0].sequential_targets}") | |
| print(f" - Ignore patterns: {recipe[0].ignore}") | |
| except Exception as e: | |
| print(f"β {method} failed: {e}") | |
| all_passed = False | |
| return all_passed | |
| def test_awq_incompatibility(): | |
| """ | |
| Test that AWQ properly fails for Qwen2.5-VL models | |
| """ | |
| print("\nTesting AWQ incompatibility with Qwen2.5-VL models...") | |
| try: | |
| recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration") | |
| print("β AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)") | |
| return False | |
| except ValueError as e: | |
| if "not compatible" in str(e) and "rotary positional embeddings" in str(e): | |
| print(f"β AWQ properly fails for Qwen2.5-VL: {e}") | |
| return True | |
| else: | |
| print(f"β AWQ failed but with wrong error: {e}") | |
| return False | |
| def test_awq_still_works_for_llama(): | |
| """ | |
| Test that AWQ still works for Llama models | |
| """ | |
| print("\nTesting AWQ still works for Llama models...") | |
| try: | |
| recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM") | |
| print(f"β AWQ still works for LlamaForCausalLM") | |
| print(f" - Ignore patterns: {recipe[0].ignore}") | |
| return True | |
| except Exception as e: | |
| print(f"β AWQ failed for LlamaForCausalLM: {e}") | |
| return False | |
| def test_target_model(): | |
| """ | |
| Test with the specific target model | |
| """ | |
| print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration") | |
| # All methods except AWQ should work | |
| methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"] | |
| success_count = 0 | |
| for method in methods: | |
| try: | |
| recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") | |
| success_count += 1 | |
| except Exception as e: | |
| print(f"Method {method} failed: {e}") | |
| print(f"β {success_count}/{len(methods)} methods work for target model") | |
| return success_count == len(methods) | |
| if __name__ == "__main__": | |
| print("Final verification after fixing AWQ incompatibility issue\n") | |
| test1 = test_qwen2_5_vl_compatible_methods() | |
| test2 = test_awq_incompatibility() | |
| test3 = test_awq_still_works_for_llama() | |
| test4 = test_target_model() | |
| print(f"\n{'='*60}") | |
| if test1 and test2 and test3 and test4: | |
| print("β ALL TESTS PASSED") | |
| print("\nSOLUTION SUMMARY:") | |
| print("β’ AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility") | |
| print("β’ All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL") | |
| print("β’ AWQ still works for Llama models as expected") | |
| print("β’ Sequential onloading is preserved for memory efficiency") | |
| print("β’ Users will get clear error messages when trying incompatible methods") | |
| else: | |
| print("β SOME TESTS FAILED") | |
| print(f"{'='*60}") |