|
|
|
|
|
"""
|
|
|
Quick test for the enhanced quality scoring system
|
|
|
"""
|
|
|
|
|
|
import sys
|
|
|
import os
|
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
|
|
|
|
from app import (
|
|
|
calculate_quality_score,
|
|
|
generate_comprehensive_quality_report,
|
|
|
suggest_quality_improvements
|
|
|
)
|
|
|
|
|
|
def test_quality_scoring():
|
|
|
"""Test the enhanced quality scoring with the actual data from your conversion"""
|
|
|
print("🧪 Testing Enhanced Quality Scoring System")
|
|
|
print("=" * 50)
|
|
|
|
|
|
|
|
|
docx_info = {
|
|
|
'text_content_length': 1573,
|
|
|
'font_families': {'Arial'},
|
|
|
'has_tables': True,
|
|
|
'has_images': True,
|
|
|
'rtl_content_detected': True,
|
|
|
'placeholder_count': 9,
|
|
|
'has_textboxes': False,
|
|
|
'has_smartart': False,
|
|
|
'has_complex_shapes': False,
|
|
|
'table_structure_issues': ['Complex cell merging detected']
|
|
|
}
|
|
|
|
|
|
pdf_validation = {
|
|
|
'file_size_mb': 0.12,
|
|
|
'file_exists': True,
|
|
|
'size_reasonable': True,
|
|
|
'warnings': [],
|
|
|
'success_metrics': [
|
|
|
'PDF file size is reasonable',
|
|
|
'Document contains tables - formatting preservation critical',
|
|
|
'Document contains images - quality preservation applied',
|
|
|
'Font substitution applied for 1 font families'
|
|
|
]
|
|
|
}
|
|
|
|
|
|
post_process_results = {
|
|
|
'pages_processed': 1,
|
|
|
'placeholders_verified': 9,
|
|
|
'tables_verified': 1,
|
|
|
'arabic_text_verified': 150,
|
|
|
'layout_issues_fixed': 0,
|
|
|
'warnings': [],
|
|
|
'success_metrics': [
|
|
|
'All 9 placeholders preserved',
|
|
|
'Arabic RTL text verified: 150 characters',
|
|
|
'Table structure preserved'
|
|
|
]
|
|
|
}
|
|
|
|
|
|
|
|
|
quality_score = calculate_quality_score(docx_info, pdf_validation, post_process_results)
|
|
|
print(f"🏆 Enhanced Quality Score: {quality_score:.1f}%")
|
|
|
|
|
|
|
|
|
quality_report = generate_comprehensive_quality_report(docx_info, pdf_validation, post_process_results)
|
|
|
print("\n📋 Enhanced Quality Report:")
|
|
|
print(quality_report)
|
|
|
|
|
|
|
|
|
suggestions = suggest_quality_improvements(docx_info, pdf_validation, post_process_results, quality_score)
|
|
|
print(f"\n💡 Improvement Suggestions:")
|
|
|
for suggestion in suggestions:
|
|
|
print(suggestion)
|
|
|
|
|
|
return quality_score
|
|
|
|
|
|
def test_different_scenarios():
|
|
|
"""Test quality scoring with different scenarios"""
|
|
|
print("\n" + "=" * 50)
|
|
|
print("🔬 Testing Different Quality Scenarios")
|
|
|
print("=" * 50)
|
|
|
|
|
|
scenarios = [
|
|
|
{
|
|
|
'name': 'Perfect Conversion',
|
|
|
'docx_info': {
|
|
|
'text_content_length': 1000,
|
|
|
'font_families': {'Arial'},
|
|
|
'has_tables': True,
|
|
|
'has_images': False,
|
|
|
'rtl_content_detected': True,
|
|
|
'placeholder_count': 5,
|
|
|
'has_textboxes': False,
|
|
|
'has_smartart': False,
|
|
|
'has_complex_shapes': False,
|
|
|
'table_structure_issues': []
|
|
|
},
|
|
|
'pdf_validation': {
|
|
|
'file_size_mb': 0.5,
|
|
|
'warnings': [],
|
|
|
'success_metrics': ['Perfect conversion', 'All elements preserved']
|
|
|
},
|
|
|
'post_process_results': {
|
|
|
'pages_processed': 1,
|
|
|
'placeholders_verified': 5,
|
|
|
'tables_verified': 1,
|
|
|
'arabic_text_verified': 200,
|
|
|
'warnings': [],
|
|
|
'success_metrics': ['All placeholders preserved', 'Arabic text verified']
|
|
|
}
|
|
|
},
|
|
|
{
|
|
|
'name': 'Complex Document with Issues',
|
|
|
'docx_info': {
|
|
|
'text_content_length': 5000,
|
|
|
'font_families': {'Arial', 'Traditional Arabic'},
|
|
|
'has_tables': True,
|
|
|
'has_images': True,
|
|
|
'rtl_content_detected': True,
|
|
|
'placeholder_count': 10,
|
|
|
'has_textboxes': True,
|
|
|
'has_smartart': True,
|
|
|
'has_complex_shapes': True,
|
|
|
'table_structure_issues': ['Nested tables', 'Complex merging']
|
|
|
},
|
|
|
'pdf_validation': {
|
|
|
'file_size_mb': 2.5,
|
|
|
'warnings': ['Large file size'],
|
|
|
'success_metrics': ['Basic conversion completed']
|
|
|
},
|
|
|
'post_process_results': {
|
|
|
'pages_processed': 3,
|
|
|
'placeholders_verified': 8,
|
|
|
'tables_verified': 2,
|
|
|
'arabic_text_verified': 500,
|
|
|
'warnings': ['Some layout issues detected'],
|
|
|
'success_metrics': ['Most elements preserved']
|
|
|
}
|
|
|
}
|
|
|
]
|
|
|
|
|
|
for scenario in scenarios:
|
|
|
print(f"\n📊 Scenario: {scenario['name']}")
|
|
|
score = calculate_quality_score(
|
|
|
scenario['docx_info'],
|
|
|
scenario['pdf_validation'],
|
|
|
scenario['post_process_results']
|
|
|
)
|
|
|
print(f" Quality Score: {score:.1f}%")
|
|
|
|
|
|
if score >= 95:
|
|
|
print(" Result: 🌟 EXCELLENT")
|
|
|
elif score >= 85:
|
|
|
print(" Result: ✅ VERY GOOD")
|
|
|
elif score >= 75:
|
|
|
print(" Result: 👍 GOOD")
|
|
|
elif score >= 65:
|
|
|
print(" Result: ⚠️ FAIR")
|
|
|
else:
|
|
|
print(" Result: ❌ NEEDS IMPROVEMENT")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
|
actual_score = test_quality_scoring()
|
|
|
|
|
|
|
|
|
test_different_scenarios()
|
|
|
|
|
|
print(f"\n" + "=" * 50)
|
|
|
print(f"🎯 SUMMARY")
|
|
|
print(f"=" * 50)
|
|
|
print(f"Your document achieved: {actual_score:.1f}%")
|
|
|
|
|
|
if actual_score >= 90:
|
|
|
print("🌟 Excellent quality! The enhanced system is working perfectly.")
|
|
|
elif actual_score >= 80:
|
|
|
print("✅ Good quality! Minor improvements applied successfully.")
|
|
|
elif actual_score >= 70:
|
|
|
print("👍 Acceptable quality. The system detected and addressed issues.")
|
|
|
else:
|
|
|
print("⚠️ Quality needs improvement. The system provided detailed suggestions.")
|
|
|
|
|
|
print(f"\n💡 The enhanced quality scoring system now provides:")
|
|
|
print(f" • More accurate quality assessment")
|
|
|
print(f" • Detailed improvement suggestions")
|
|
|
print(f" • Better handling of complex documents")
|
|
|
print(f" • Comprehensive quality reports")
|
|
|
|