import json from PyPDF2 import PdfReader import streamlit as st # Streamlit App st.title("PDF to JSON Converter") # Upload PDF File uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: st.write("Processing your file...") # Read the PDF file reader = PdfReader(uploaded_file) pdf_content = {"pages": []} for i, page in enumerate(reader.pages): pdf_content["pages"].append({ "page_number": i + 1, "text": page.extract_text() }) # Display JSON preview st.write("Extracted Content:") st.json(pdf_content) # Save JSON file json_filename = uploaded_file.name.replace(".pdf", ".json") with open(json_filename, "w", encoding="utf-8") as json_file: json.dump(pdf_content, json_file, ensure_ascii=False, indent=4) st.success(f"File converted to JSON! Download your file below.") st.download_button("Download JSON", data=json.dumps(pdf_content, ensure_ascii=False, indent=4), file_name=json_filename)