LaurentTRIPIED's picture
Update app.py
ce2d47f verified
raw
history blame
485 Bytes
import PyPDF2
def extract_text_from_pdf(pdf_path):
text = []
with open(pdf_path, 'rb') as file:
pdf_reader = PyPDF2.PdfReader(file)
for page in pdf_reader.pages:
text.append(page.extract_text())
return text
# Utilisation de la fonction
pdf_path = 'data/07-VF2_UDM_Oneframe_A4-2023.pdf'
pdf_text = extract_text_from_pdf(pdf_path)
for page_num, page_text in enumerate(pdf_text, start=1):
print(f"Page {page_num}:\n{page_text}\n{'='*100}\n")