Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python | |
| """ | |
| Final test to confirm the original issue is resolved: | |
| GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture | |
| """ | |
| from app import get_quantization_recipe | |
| def test_original_issue_fixed(): | |
| """ | |
| Test to confirm the original error is fixed. | |
| The original error was: | |
| GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture | |
| """ | |
| print("Testing the original issue that was reported...") | |
| print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture") | |
| print() | |
| # Test the original problematic case | |
| try: | |
| recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration") | |
| print("β GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") | |
| print(f" Recipe: {recipe}") | |
| if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: | |
| print(f" Uses sequential onloading: {recipe[0].sequential_targets}") | |
| print(f" Ignores visual components: {recipe[0].ignore}") | |
| success_gptq = True | |
| except Exception as e: | |
| print(f"β GPTQ still fails: {e}") | |
| success_gptq = False | |
| print() | |
| # Test other methods that were also problematic | |
| other_methods = ["AWQ", "FP8"] | |
| success_others = True | |
| for method in other_methods: | |
| try: | |
| recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") | |
| print(f"β {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") | |
| if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets: | |
| print(f" Uses sequential onloading: {recipe[0].sequential_targets}") | |
| success_others = success_others and True | |
| except Exception as e: | |
| print(f"β {method} still fails: {e}") | |
| success_others = False | |
| print() | |
| # Test new methods for Qwen2.5-VL | |
| new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"] | |
| success_new = True | |
| for method in new_methods: | |
| try: | |
| recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") | |
| print(f"β {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration") | |
| success_new = success_new and True | |
| except Exception as e: | |
| print(f"β {method} fails: {e}") | |
| success_new = False | |
| print() | |
| if success_gptq and success_others and success_new: | |
| print("π SUCCESS: The original issue has been completely resolved!") | |
| print(" - GPTQ now works for Qwen2_5_VLForConditionalGeneration") | |
| print(" - AWQ now works for Qwen2_5_VLForConditionalGeneration") | |
| print(" - FP8 now works for Qwen2_5_VLForConditionalGeneration") | |
| print(" - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!") | |
| print(" - Sequential onloading is used for memory efficiency") | |
| print(" - Visual components are properly ignored during quantization") | |
| return True | |
| else: | |
| print("β FAILURE: Some issues remain") | |
| return False | |
| def test_specific_model(): | |
| """ | |
| Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated | |
| """ | |
| print("\n" + "="*60) | |
| print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated") | |
| print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)") | |
| print("="*60) | |
| # All the methods that should now work for this model | |
| methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"] | |
| success = True | |
| for method in methods: | |
| try: | |
| recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration") | |
| print(f"β {method}: OK") | |
| except Exception as e: | |
| print(f"β {method}: FAILED - {e}") | |
| success = False | |
| if success: | |
| print(f"\nπ All {len(methods)} quantization methods now work for the target model!") | |
| print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.") | |
| else: | |
| print("\nβ Some methods still don't work for the target model.") | |
| return success | |
| if __name__ == "__main__": | |
| print("Testing resolution of the original quantization issue...\n") | |
| issue_fixed = test_original_issue_fixed() | |
| model_specific = test_specific_model() | |
| print("\n" + "="*60) | |
| if issue_fixed and model_specific: | |
| print("β ALL TESTS PASSED - The issue is completely resolved!") | |
| print("\nThe Hugging Face Space now supports:") | |
| print(" β’ All original methods: GPTQ, AWQ, FP8") | |
| print(" β’ New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8") | |
| print(" β’ Sequential onloading for memory efficiency") | |
| print(" β’ Proper handling of Qwen2.5-VL visual components") | |
| print(" β’ All methods work with Qwen2_5_VLForConditionalGeneration models") | |
| else: | |
| print("β SOME TESTS FAILED - Issue may not be completely resolved") | |
| print("="*60) |