"""Test the new filename formatting""" import os import sys import datetime import inspect # Add the project root to the path so we can import modules sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # Import the main utils.py file directly import utils as root_utils print(f"Imported utils from: {root_utils.__file__}") print("Current create_descriptive_filename implementation:") print(inspect.getsource(root_utils.create_descriptive_filename)) def main(): """Test the filename formatting""" # Sample inputs sample_files = [ "handwritten-letter.jpg", "magician-or-bottle-cungerer.jpg", "baldwin_15th_north.jpg", "harpers.pdf", "recipe.jpg" ] # Sample OCR results for testing sample_results = [ { "detected_document_type": "handwritten", "topics": ["Letter", "Handwritten", "19th Century", "Personal Correspondence"] }, { "topics": ["Newspaper", "Print", "19th Century", "Illustration", "Advertisement"] }, { "detected_document_type": "letter", "topics": ["Correspondence", "Early Modern", "English Language"] }, { "detected_document_type": "magazine", "topics": ["Publication", "Late 19th Century", "Magazine", "Historical"] }, { "detected_document_type": "recipe", "topics": ["Food", "Culinary", "Historical", "Instruction"] } ] print("\nIMPROVED FILENAME FORMATTING TEST") print("=" * 50) # Format current date manually current_date = datetime.datetime.now().strftime("%b %d, %Y") print(f"Current date for filenames: {current_date}") print("\nBEFORE vs AFTER Examples:\n") for i, (original_file, result) in enumerate(zip(sample_files, sample_results)): # Get file extension from original file file_ext = os.path.splitext(original_file)[1] # Generate the old style filename manually original_name = os.path.splitext(original_file)[0] doc_type_tag = "" if 'detected_document_type' in result: doc_type = result['detected_document_type'].lower() doc_type_tag = f"_{doc_type.replace(' ', '_')}" elif 'topics' in result and result['topics']: doc_type_tag = f"_{result['topics'][0].lower().replace(' ', '_')}" period_tag = "" if 'topics' in result and result['topics']: for tag in result['topics']: if "century" in tag.lower() or "pre-" in tag.lower() or "era" in tag.lower(): period_tag = f"_{tag.lower().replace(' ', '_')}" break old_filename = f"{original_name}{doc_type_tag}{period_tag}{file_ext}" # Generate the new descriptive filename with our improved formatter new_filename = root_utils.create_descriptive_filename(original_file, result, file_ext) print(f"Example {i+1}:") print(f" Original: {original_file}") print(f" Old Format: {old_filename}") print(f" New Format: {new_filename}") print() if __name__ == "__main__": main()