|
import streamlit as st |
|
import PyPDF2 |
|
import tempfile |
|
import os |
|
|
|
def extract_text_from_pdf(uploaded_file): |
|
text = "" |
|
with tempfile.NamedTemporaryFile(delete=False) as tmp_file: |
|
tmp_file.write(uploaded_file.read()) |
|
tmp_file.seek(0) |
|
with open(tmp_file.name, "rb") as f: |
|
reader = PyPDF2.PdfReader(f) |
|
num_pages = len(reader.pages) |
|
for page_num in range(num_pages): |
|
page = reader.pages[page_num] |
|
text += page.extract_text() |
|
return text |
|
|
|
def main(): |
|
st.title("PDF Text Extractor") |
|
|
|
st.write("Upload your pdf:") |
|
|
|
uploaded_file = st.file_uploader("Select PDF file", type=['pdf']) |
|
|
|
if uploaded_file is not None: |
|
st.write("PDF file Upload:", uploaded_file.name) |
|
st.write("File size:", uploaded_file.size) |
|
|
|
if st.button("Extract Text"): |
|
text = extract_text_from_pdf(uploaded_file) |
|
st.write("Extracted Text:") |
|
st.write(text) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|