|
import fitz |
|
from io import BytesIO |
|
import streamlit as st |
|
|
|
def ExtractPDFText(pdf): |
|
content = "" |
|
pdf_bytes = pdf.read() |
|
|
|
try: |
|
|
|
pdf_document = fitz.open("dummy.pdf", pdf_bytes) |
|
|
|
|
|
for page_number in range(pdf_document.page_count): |
|
page = pdf_document[page_number] |
|
text = page.get_text() |
|
content += text |
|
|
|
except Exception as e: |
|
st.error(f"Error extracting text from PDF: {e}") |
|
|
|
finally: |
|
|
|
if "pdf_document" in locals(): |
|
pdf_document.close() |
|
|
|
return content |
|
|
|
|
|
pdf = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
if pdf: |
|
text_content = ExtractPDFText(pdf) |
|
st.write("PDF Content:") |
|
|
|
|