Spaces:
No application file
No application file
File size: 581 Bytes
2875866 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
import PyPDF2
from docx import Document
def extract_text_from_file(uploaded_file):
"""Handle PDF and DOCX file parsing"""
text = ""
if uploaded_file.type == "application/pdf":
pdf_reader = PyPDF2.PdfReader(uploaded_file)
text = "\n".join([page.extract_text() for page in pdf_reader.pages])
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(uploaded_file)
text = "\n".join([para.text for para in doc.paragraphs])
return text |