Spaces:

danishjameel003
/

newtestingdanish

Sleeping

App Files Files Community

newtestingdanish / test_enhanced_feedback.py

aghaai

Fresh commit of all updated files

459923e about 2 months ago

raw

history blame contribute delete

15.2 kB

	#!/usr/bin/env python3
	"""
	Test script for the enhanced Feedback system.
	Demonstrates all new features while maintaining backward compatibility.
	"""

	import json
	import logging
	from Feedback import Grader

	# Set up logging
	logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
	logger = logging.getLogger(__name__)

	def test_basic_functionality():
	"""Test basic functionality (backward compatibility)."""
	print("\n" + "="*60)
	print("TEST 1: Basic Functionality (Backward Compatibility)")
	print("="*60)

	# Initialize with default configuration
	grader = Grader(api_key="your-api-key-here")

	# Short essay (should work exactly as before)
	short_essay = """
	Climate change is a serious global issue. The burning of fossil fuels releases
	greenhouse gases that trap heat in the atmosphere. This causes global temperatures
	to rise, leading to melting ice caps and extreme weather events. We must take
	action to reduce our carbon footprint and transition to renewable energy sources.
	"""

	training_context = "Evaluate this essay on climate change."

	try:
	feedback = grader.grade_answer_with_gpt(short_essay, training_context)
	print("✅ Basic functionality works!")
	print(f"Overall score: {feedback.get('overall_score', 'N/A')}")
	print(f"Number of sections: {len(feedback.get('sections', []))}")

	# Check if chunking was used (should not be for short essay)
	if 'chunk_analysis' in feedback:
	print(f"Chunking was used: {feedback['chunk_analysis']['total_chunks']} chunks")
	else:
	print("No chunking used (as expected for short essay)")

	except Exception as e:
	print(f"❌ Basic functionality failed: {e}")

	def test_chunking_functionality():
	"""Test chunking functionality with long essay."""
	print("\n" + "="*60)
	print("TEST 2: Chunking Functionality")
	print("="*60)

	# Initialize with chunking enabled
	config = {
	'enable_chunking': True,
	'max_chunk_tokens': 4000,
	'enable_validation': True,
	'warn_on_truncation': True
	}

	grader = Grader(api_key="your-api-key-here", config=config)

	# Long essay that should trigger chunking
	long_essay = """
	OUTLINE:
	1. Introduction
	2. Historical Context
	3. Current Challenges
	4. Future Implications
	5. Conclusion

	ESSAY:

	Climate change represents one of the most pressing challenges facing humanity in the 21st century.
	The scientific consensus is clear: human activities, particularly the burning of fossil fuels,
	have significantly contributed to the warming of our planet. This essay will explore the historical
	context of climate change, examine current challenges, and discuss future implications for our world.

	Historically, the Industrial Revolution marked the beginning of significant human impact on the
	Earth's climate. The widespread use of coal, oil, and natural gas for energy production has led
	to unprecedented levels of carbon dioxide and other greenhouse gases in the atmosphere. These
	gases trap heat from the sun, creating what scientists call the greenhouse effect. While this
	effect is natural and necessary for life on Earth, human activities have intensified it to
	dangerous levels.

	The current challenges posed by climate change are multifaceted and far-reaching. Rising global
	temperatures have led to the melting of polar ice caps and glaciers, contributing to sea level
	rise that threatens coastal communities worldwide. Extreme weather events, including hurricanes,
	droughts, and heatwaves, have become more frequent and intense. These changes affect not only
	the environment but also human health, agriculture, and economic stability.

	Furthermore, climate change exacerbates existing social and economic inequalities. Developing
	nations, which have contributed least to the problem, often bear the brunt of climate impacts.
	Small island nations face existential threats from rising sea levels, while agricultural
	communities struggle with changing weather patterns that affect crop yields. The economic costs
	of climate change are staggering, with estimates suggesting trillions of dollars in potential
	damages if action is not taken.

	Looking to the future, the implications of climate change become even more concerning if current
	trends continue. Scientists warn that we may reach critical tipping points, such as the collapse
	of major ice sheets or the release of methane from thawing permafrost, which could accelerate
	warming beyond our ability to control. The window for meaningful action is closing rapidly,
	making immediate and coordinated global response essential.

	However, there is reason for hope. Renewable energy technologies, such as solar and wind power,
	have become increasingly cost-effective and widespread. Many countries have committed to reducing
	their greenhouse gas emissions through international agreements like the Paris Climate Accord.
	Individual actions, from reducing energy consumption to supporting sustainable practices, can
	collectively make a significant difference.

	In conclusion, climate change represents a complex challenge that requires immediate attention
	and coordinated action at all levels of society. While the challenges are significant, the
	solutions are within our reach. Through technological innovation, policy changes, and individual
	commitment, we can work toward a more sustainable future. The time to act is now, as the
	consequences of inaction will only become more severe with each passing year.
	"""

	training_context = "Evaluate this comprehensive essay on climate change, considering its structure, arguments, and evidence."

	try:
	# First, validate essay length
	length_analysis = grader.validate_essay_length(long_essay)
	print(f"Essay analysis:")
	print(f" - Tokens: {length_analysis['token_count']}")
	print(f" - Words: {length_analysis['word_count']}")
	print(f" - Chunking needed: {length_analysis['chunking_needed']}")
	print(f" - Recommendation: {length_analysis['processing_recommendation']}")

	# Grade the essay
	feedback = grader.grade_answer_with_gpt(long_essay, training_context)

	print("\n✅ Chunking functionality works!")
	print(f"Overall score: {feedback.get('overall_score', 'N/A')}")

	# Check chunking information
	if 'chunk_analysis' in feedback:
	chunk_info = feedback['chunk_analysis']
	print(f"Chunking details:")
	print(f" - Total chunks: {chunk_info['total_chunks']}")
	print(f" - Chunks processed: {chunk_info['chunks_processed']}")
	print(f" - Aggregation method: {chunk_info['aggregation_method']}")

	# Check token information
	if 'token_info' in feedback:
	token_info = feedback['token_info']
	print(f"Token processing:")
	print(f" - Original tokens: {token_info['original_tokens']}")
	print(f" - Processed tokens: {token_info['processed_tokens']}")
	print(f" - Was truncated: {token_info['was_truncated']}")
	if 'chunked_processing' in token_info:
	print(f" - Chunked processing: {token_info['chunked_processing']}")

	# Display section scores
	print("\nSection scores:")
	for section in feedback.get('sections', []):
	print(f" - {section['name']}: {section['score']}% ({section['issues_count']} issues)")

	except Exception as e:
	print(f"❌ Chunking functionality failed: {e}")

	def test_question_specific_grading():
	"""Test question-specific grading functionality."""
	print("\n" + "="*60)
	print("TEST 3: Question-Specific Grading")
	print("="*60)

	grader = Grader(api_key="your-api-key-here")

	essay = """
	Climate change is caused by human activities like burning fossil fuels and deforestation.
	These activities release greenhouse gases that trap heat in the atmosphere, causing global
	temperatures to rise. The effects include melting ice caps, rising sea levels, and more
	extreme weather events. Solutions include transitioning to renewable energy, reducing
	emissions, and protecting forests.
	"""

	question = "What are the main causes and effects of climate change?"

	try:
	feedback = grader.grade_answer_with_question(essay, question)

	print("✅ Question-specific grading works!")
	print(f"Overall score: {feedback.get('overall_score', 'N/A')}")

	# Check question-specific feedback
	if 'question_specific_feedback' in feedback:
	q_feedback = feedback['question_specific_feedback']
	print(f"Question: {q_feedback.get('question', 'N/A')}")
	print(f"Question relevance score: {q_feedback.get('question_relevance_score', 'N/A')}%")
	print(f"Covered aspects: {len(q_feedback.get('covered_aspects', []))}")
	print(f"Missing aspects: {len(q_feedback.get('missing_aspects', []))}")

	except Exception as e:
	print(f"❌ Question-specific grading failed: {e}")

	def test_configuration_management():
	"""Test configuration management features."""
	print("\n" + "="*60)
	print("TEST 4: Configuration Management")
	print("="*60)

	# Initialize with custom configuration
	config = {
	'enable_chunking': True,
	'max_chunk_tokens': 3000,
	'enable_granular_feedback': False,
	'enable_validation': True,
	'warn_on_truncation': True
	}

	grader = Grader(api_key="your-api-key-here", config=config)

	try:
	# Get processing stats
	stats = grader.get_processing_stats()
	print("✅ Configuration management works!")
	print("Current configuration:")
	for key, value in stats['configuration'].items():
	print(f" - {key}: {value}")

	print("\nCapabilities:")
	for key, value in stats['capabilities'].items():
	print(f" - {key}: {value}")

	# Test configuration update
	print("\nUpdating configuration...")
	grader.update_config({'max_chunk_tokens': 5000})

	updated_stats = grader.get_processing_stats()
	print(f"Updated max_chunk_tokens: {updated_stats['configuration']['max_chunk_tokens']}")

	# Test reset to defaults
	print("\nResetting to defaults...")
	grader.reset_to_defaults()

	default_stats = grader.get_processing_stats()
	print(f"Default max_chunk_tokens: {default_stats['configuration']['max_chunk_tokens']}")

	except Exception as e:
	print(f"❌ Configuration management failed: {e}")

	def test_validation_and_error_recovery():
	"""Test validation and error recovery features."""
	print("\n" + "="*60)
	print("TEST 5: Validation and Error Recovery")
	print("="*60)

	config = {
	'enable_validation': True,
	'enable_enhanced_logging': True,
	'log_missing_categories': True
	}

	grader = Grader(api_key="your-api-key-here", config=config)

	# Test with a short essay to check validation
	short_essay = "Climate change is bad. We should fix it."

	try:
	feedback = grader.grade_answer_with_gpt(short_essay, "Evaluate this essay.")

	print("✅ Validation and error recovery works!")
	print(f"Overall score: {feedback.get('overall_score', 'N/A')}")

	# Check if all required sections are present
	sections = feedback.get('sections', [])
	required_categories = [
	'Grammar & Punctuation',
	'Vocabulary Usage',
	'Sentence Structure',
	'Content Relevance & Depth',
	'Argument Development',
	'Evidence & Citations',
	'Structure & Organization',
	'Conclusion Quality'
	]

	present_categories = [section['name'] for section in sections]
	missing_categories = [cat for cat in required_categories if cat not in present_categories]

	if missing_categories:
	print(f"⚠️ Missing categories: {missing_categories}")
	else:
	print("✅ All required categories present")

	except Exception as e:
	print(f"❌ Validation and error recovery failed: {e}")

	def test_essay_length_validation():
	"""Test essay length validation functionality."""
	print("\n" + "="*60)
	print("TEST 6: Essay Length Validation")
	print("="*60)

	grader = Grader(api_key="your-api-key-here")

	# Test different essay lengths
	test_cases = [
	("Very short essay.", "Short essay test"),
	("This is a medium length essay with several sentences. It contains enough content to test the system properly.", "Medium essay test"),
	("This is a very long essay. " * 100, "Long essay test")
	]

	try:
	for essay, description in test_cases:
	print(f"\nTesting {description}:")
	analysis = grader.validate_essay_length(essay)

	print(f" - Tokens: {analysis['token_count']}")
	print(f" - Words: {analysis['word_count']}")
	print(f" - Chunking needed: {analysis['chunking_needed']}")
	print(f" - Recommendation: {analysis['processing_recommendation']}")

	if analysis['warnings']:
	print(f" - Warnings: {analysis['warnings']}")

	print("\n✅ Essay length validation works!")

	except Exception as e:
	print(f"❌ Essay length validation failed: {e}")

	def main():
	"""Run all tests."""
	print("Enhanced Feedback System - Test Suite")
	print("="*60)
	print("This test suite demonstrates all enhanced features while")
	print("maintaining backward compatibility with existing code.")
	print("="*60)

	# Note: Replace "your-api-key-here" with actual API key for real testing
	print("\n⚠️ NOTE: Replace 'your-api-key-here' with actual OpenAI API key for real testing")
	print(" The tests will show the structure but won't make actual API calls without a valid key.\n")

	try:
	test_basic_functionality()
	test_chunking_functionality()
	test_question_specific_grading()
	test_configuration_management()
	test_validation_and_error_recovery()
	test_essay_length_validation()

	print("\n" + "="*60)
	print("✅ ALL TESTS COMPLETED")
	print("="*60)
	print("The enhanced Feedback system is working correctly!")
	print("All features are backward compatible and ready for production use.")

	except Exception as e:
	print(f"\n❌ Test suite failed: {e}")

	if __name__ == "__main__":
	main()