from PyPDF2 import PdfReader from io import BytesIO import logging logger = logging.getLogger(__name__) def parse_pdf(file_content): try: reader = PdfReader(BytesIO(file_content)) text = "" for page in reader.pages: extracted = page.extract_text() if extracted: text += extracted return text except Exception as e: logger.error(f"PDF parse error: {e}") return "" def parse_text(file_content): try: return file_content.decode("utf-8") except UnicodeDecodeError: try: return file_content.decode("latin1") except Exception as e: logger.error(f"Text parse error: {e}") return ""