import os import pdfplumber from docx import Document def parse_file(file_path): ext = os.path.splitext(file_path)[-1].lower() if ext == ".txt": with open(file_path, "r", encoding="utf-8") as f: return f.read() elif ext == ".docx": doc = Document(file_path) return "\n".join([para.text for para in doc.paragraphs]) elif ext == ".pdf": text = "" with pdfplumber.open(file_path) as pdf: for page in pdf.pages: text += page.extract_text() + "\n" return text.strip() else: return "Unsupported file format."