Spaces:

n00b001
/

llm-compressor-my-repo

Sleeping

App Files Files Community

llm-compressor-my-repo / test_final_verification.py

n00b001

save

d95ff5b unverified 14 days ago

raw

history blame contribute delete

5.23 kB

	#!/usr/bin/env python
	"""
	Final test to confirm the original issue is resolved:
	GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
	"""

	from app import get_quantization_recipe

	def test_original_issue_fixed():
	"""
	Test to confirm the original error is fixed.
	The original error was:
	GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture
	"""
	print("Testing the original issue that was reported...")
	print("Original error: GPTQ quantization is not supported for Qwen2_5_VLForConditionalGeneration architecture")
	print()

	# Test the original problematic case
	try:
	recipe = get_quantization_recipe("GPTQ", "Qwen2_5_VLForConditionalGeneration")
	print("✓ GPTQ quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
	print(f" Recipe: {recipe}")
	if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
	print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
	print(f" Ignores visual components: {recipe[0].ignore}")
	success_gptq = True
	except Exception as e:
	print(f"✗ GPTQ still fails: {e}")
	success_gptq = False

	print()

	# Test other methods that were also problematic
	other_methods = ["AWQ", "FP8"]
	success_others = True
	for method in other_methods:
	try:
	recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
	print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
	if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
	print(f" Uses sequential onloading: {recipe[0].sequential_targets}")
	success_others = success_others and True
	except Exception as e:
	print(f"✗ {method} still fails: {e}")
	success_others = False

	print()

	# Test new methods for Qwen2.5-VL
	new_methods = ["W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]
	success_new = True
	for method in new_methods:
	try:
	recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
	print(f"✓ {method} quantization recipe created successfully for Qwen2_5_VLForConditionalGeneration")
	success_new = success_new and True
	except Exception as e:
	print(f"✗ {method} fails: {e}")
	success_new = False

	print()

	if success_gptq and success_others and success_new:
	print("🎉 SUCCESS: The original issue has been completely resolved!")
	print(" - GPTQ now works for Qwen2_5_VLForConditionalGeneration")
	print(" - AWQ now works for Qwen2_5_VLForConditionalGeneration")
	print(" - FP8 now works for Qwen2_5_VLForConditionalGeneration")
	print(" - New methods (W4A16, W8A16, W8A8_INT8, W8A8_FP8) also work!")
	print(" - Sequential onloading is used for memory efficiency")
	print(" - Visual components are properly ignored during quantization")
	return True
	else:
	print("❌ FAILURE: Some issues remain")
	return False

	def test_specific_model():
	"""
	Test with the specific model mentioned: huihui-ai/Huihui-Fara-7B-abliterated
	"""
	print("\n" + "="*60)
	print("Testing with the specific model: huihui-ai/Huihui-Fara-7B-abliterated")
	print("(This model has architecture: Qwen2_5_VLForConditionalGeneration)")
	print("="*60)

	# All the methods that should now work for this model
	methods = ["GPTQ", "AWQ", "FP8", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8"]

	success = True
	for method in methods:
	try:
	recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
	print(f"✓ {method}: OK")
	except Exception as e:
	print(f"✗ {method}: FAILED - {e}")
	success = False

	if success:
	print(f"\n🎉 All {len(methods)} quantization methods now work for the target model!")
	print("Users can now quantize huihui-ai/Huihui-Fara-7B-abliterated with any of these methods.")
	else:
	print("\n❌ Some methods still don't work for the target model.")

	return success

	if __name__ == "__main__":
	print("Testing resolution of the original quantization issue...\n")

	issue_fixed = test_original_issue_fixed()
	model_specific = test_specific_model()

	print("\n" + "="*60)
	if issue_fixed and model_specific:
	print("✅ ALL TESTS PASSED - The issue is completely resolved!")
	print("\nThe Hugging Face Space now supports:")
	print(" • All original methods: GPTQ, AWQ, FP8")
	print(" • New methods: W4A16, W8A16, W8A8_INT8, W8A8_FP8")
	print(" • Sequential onloading for memory efficiency")
	print(" • Proper handling of Qwen2.5-VL visual components")
	print(" • All methods work with Qwen2_5_VLForConditionalGeneration models")
	else:
	print("❌ SOME TESTS FAILED - Issue may not be completely resolved")
	print("="*60)