""" helper.py Utility functions for text processing and data cleaning. """ import re def clean_text_whitespace(text: str) -> str: """ Clean up text by normalizing whitespace and newlines. Args: text (str): Input text string to clean Returns: str: Cleaned text with normalized whitespace and newlines """ if not text or not isinstance(text, str): return text # Replace multiple whitespace characters (spaces, tabs) with a single space # This handles spaces, tabs, and other whitespace characters except newlines text = re.sub(r'[^\S\n]+', ' ', text) # Replace multiple consecutive newlines with a single newline text = re.sub(r'\n{2,}', '\n', text) # Strip leading and trailing whitespace text = text.strip() return text