llm-compressor-my-repo / test_final_verification.py
n00b001's picture
save
d95ff5b unverified
#!/usr/bin/env python
"""
Final test to confirm the original issue is resolved:
GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
"""
from app import get_quantization_recipe
def test_original_issue_fixed():
"""
Test to confirm the original error is fixed.
The original error was:
GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
"""
print("Testing the original issue that was reported...")
print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture")
print()
# Test the original problematic case
try:
recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration")
print("βœ“ GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
print(f" Recipe: {recipe}")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
print(f" Ignores visual components: {recipe[0].ignore}")
success_gptq = True
except Exception as e:
print(f"βœ— GPTQ still fails: {e}")
success_gptq = False
print()
# Test other methods that were also problematic
other_methods = ["AWQ", "FP8"]
success_others = True
for method in other_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"βœ“ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
success_others = success_others and True
except Exception as e:
print(f"βœ— {method} still fails: {e}")
success_others = False
print()
# Test new methods for Qwen2.5-VL
new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
success_new = True
for method in new_methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"βœ“ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
success_new = success_new and True
except Exception as e:
print(f"βœ— {method} fails: {e}")
success_new = False
print()
if success_gptq and success_others and success_new:
print("πŸŽ‰ SUCCESS: The original issue has been completely resolved!")
print(" - GPTQ now works for Qwen2_5_VLForConditionalGeneration")
print(" - AWQ now works for Qwen2_5_VLForConditionalGeneration")
print(" - FP8 now works for Qwen2_5_VLForConditionalGeneration")
print(" - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!")
print(" - Sequential onloading is used for memory efficiency")
print(" - Visual components are properly ignored during quantization")
return True
else:
print("❌ FAILURE: Some issues remain")
return False
def test_specific_model():
"""
Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated
"""
print("\n" + "="*60)
print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated")
print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)")
print("="*60)
# All the methods that should now work for this model
methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
success = True
for method in methods:
try:
recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
print(f"βœ“ {method}: OK")
except Exception as e:
print(f"βœ— {method}: FAILED - {e}")
success = False
if success:
print(f"\nπŸŽ‰ All {len(methods)} quantization methods now work for the target model!")
print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.")
else:
print("\n❌ Some methods still don't work for the target model.")
return success
if __name__ == "__main__":
print("Testing resolution of the original quantization issue...\n")
issue_fixed = test_original_issue_fixed()
model_specific = test_specific_model()
print("\n" + "="*60)
if issue_fixed and model_specific:
print("βœ… ALL TESTS PASSED - The issue is completely resolved!")
print("\nThe Hugging Face Space now supports:")
print(" β€’ All original methods: GPTQ, AWQ, FP8")
print(" β€’ New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8")
print(" β€’ Sequential onloading for memory efficiency")
print(" β€’ Proper handling of Qwen2.5-VL visual components")
print(" β€’ All methods work with Qwen2_5_VLForConditionalGeneration models")
else:
print("❌ SOME TESTS FAILED - Issue may not be completely resolved")
print("="*60)