Spaces:

n00b001
/

llm-compressor-my-repo

Sleeping

App Files Files Community

llm-compressor-my-repo / test_final_solution.py

n00b001

save

d95ff5b unverified 12 days ago

raw

history blame contribute delete

3.89 kB

	#!/usr/bin/env python
	"""
	Final verification test after implementing proper AWQ incompatibility with Qwen2.5-VL models
	"""

	from app import get_quantization_recipe

	def test_qwen2_5_vl_compatible_methods():
	"""
	Test all methods that should work with Qwen2.5-VL models
	"""
	print("Testing quantization methods compatible with Qwen2.5-VL models...")

	# Methods that should work
	compatible_methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]

	all_passed = True
	for method in compatible_methods:
	try:
	recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
	print(f"✓ {method} works with Qwen2_5_VLForConditionalGeneration")
	if hasattr(recipe[0], 'sequential_targets') and recipe[0].sequential_targets:
	print(f" - Uses sequential onloading: {recipe[0].sequential_targets}")
	print(f" - Ignore patterns: {recipe[0].ignore}")
	except Exception as e:
	print(f"✗ {method} failed: {e}")
	all_passed = False

	return all_passed

	def test_awq_incompatibility():
	"""
	Test that AWQ properly fails for Qwen2.5-VL models
	"""
	print("\nTesting AWQ incompatibility with Qwen2.5-VL models...")

	try:
	recipe = get_quantization_recipe("AWQ", "Qwen2_5_VLForConditionalGeneration")
	print("✗ AWQ unexpectedly succeeded for Qwen2.5-VL (should have failed)")
	return False
	except ValueError as e:
	if "not compatible" in str(e) and "rotary positional embeddings" in str(e):
	print(f"✓ AWQ properly fails for Qwen2.5-VL: {e}")
	return True
	else:
	print(f"✗ AWQ failed but with wrong error: {e}")
	return False

	def test_awq_still_works_for_llama():
	"""
	Test that AWQ still works for Llama models
	"""
	print("\nTesting AWQ still works for Llama models...")

	try:
	recipe = get_quantization_recipe("AWQ", "LlamaForCausalLM")
	print(f"✓ AWQ still works for LlamaForCausalLM")
	print(f" - Ignore patterns: {recipe[0].ignore}")
	return True
	except Exception as e:
	print(f"✗ AWQ failed for LlamaForCausalLM: {e}")
	return False

	def test_target_model():
	"""
	Test with the specific target model
	"""
	print(f"\nTesting with target model architecture: Qwen2_5_VLForConditionalGeneration")

	# All methods except AWQ should work
	methods = ["GPTQ", "W4A16", "W8A16", "W8A8_INT8", "W8A8_FP8", "FP8"]

	success_count = 0
	for method in methods:
	try:
	recipe = get_quantization_recipe(method, "Qwen2_5_VLForConditionalGeneration")
	success_count += 1
	except Exception as e:
	print(f"Method {method} failed: {e}")

	print(f"✓ {success_count}/{len(methods)} methods work for target model")
	return success_count == len(methods)

	if __name__ == "__main__":
	print("Final verification after fixing AWQ incompatibility issue\n")

	test1 = test_qwen2_5_vl_compatible_methods()
	test2 = test_awq_incompatibility()
	test3 = test_awq_still_works_for_llama()
	test4 = test_target_model()

	print(f"\n{'='*60}")
	if test1 and test2 and test3 and test4:
	print("✅ ALL TESTS PASSED")
	print("\nSOLUTION SUMMARY:")
	print("• AWQ is now properly blocked for Qwen2.5-VL models due to incompatibility")
	print("• All other methods (GPTQ, W4A16, W8A16, W8A8_INT8, W8A8_FP8, FP8) work for Qwen2.5-VL")
	print("• AWQ still works for Llama models as expected")
	print("• Sequential onloading is preserved for memory efficiency")
	print("• Users will get clear error messages when trying incompatible methods")
	else:
	print("❌ SOME TESTS FAILED")
	print(f"{'='*60}")