Spaces:
Sleeping
Sleeping
File size: 572 Bytes
4254fda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
from PyPDF2 import PdfReader
import docx
def extract_text_from_files(files):
all_text = ""
for file in files:
if file.name.endswith(".pdf"):
reader = PdfReader(file)
for page in reader.pages:
all_text += page.extract_text() + "\n"
elif file.name.endswith(".docx"):
doc = docx.Document(file)
for para in doc.paragraphs:
all_text += para.text + "\n"
elif file.name.endswith(".txt"):
all_text += file.read().decode("utf-8") + "\n"
return all_text |