final_year / ingestion /docx_reader.py
jayasrees's picture
first commit
9d21edd
raw
history blame contribute delete
144 Bytes
from docx import Document
def extract_text_from_docx(path):
doc = Document(path)
return "\n".join(p.text for p in doc.paragraphs)