File size: 892 Bytes
d20782d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import fitz
from io import BytesIO
import streamlit as st

def ExtractPDFText(pdf):
    content = ""
    pdf_bytes = pdf.read()

    try:
        # Open the PDF using fitz
        pdf_document = fitz.open("dummy.pdf", pdf_bytes)
        
        # Iterate through pages and extract text
        for page_number in range(pdf_document.page_count):
            page = pdf_document[page_number]
            text = page.get_text()
            content += text
        
    except Exception as e:
        st.error(f"Error extracting text from PDF: {e}")
        
    finally:
        # Close the PDF document
        if "pdf_document" in locals():
            pdf_document.close()

    return content

# Example usage in Streamlit app
pdf = st.file_uploader("Upload a PDF file", type=["pdf"])

if pdf:
    text_content = ExtractPDFText(pdf)
    st.write("PDF Content:")
    # st.write(text_content)