from utils.file_processor import FileProcessor, ProcessorOptions import json import re from pathlib import Path import argparse class DescriptionExtractProcessor(FileProcessor): def process_content(self, content: str) -> str: try: data = json.loads(content) description = data.get('description', '') # Clean the description description = re.sub(r'<[^>]+>', '', description) # Remove HTML description = re.sub(r'\s+', ' ', description) # Normalize whitespace return description.strip() except json.JSONDecodeError as e: self.logger.error(f"Failed to parse JSON: {e}") return "" def main(): parser = argparse.ArgumentParser(description='Extract descriptions from JSON files') parser.add_argument('-d', '--directory', default='.', help='Directory to process') parser.add_argument('-v', '--verbose', action='store_true', help='Enable verbose logging') args = parser.parse_args() options = ProcessorOptions( recursive=True, dry_run=False, debug=args.verbose, file_extensions={'.json'} ) processor = DescriptionExtractProcessor(options) processor.process_directory(Path(args.directory)) if __name__ == "__main__": main()