kinely's picture
Update app.py
7a47555 verified
import streamlit as st
import json
from PyPDF2 import PdfReader
def pdf_to_json(pdf_path, json_path):
# Read the PDF
reader = PdfReader(pdf_path)
data = {"pages": []}
for i, page in enumerate(reader.pages):
text = page.extract_text()
data["pages"].append({"page_number": i + 1, "text": text.strip()})
# Save as JSON
with open(json_path, "w", encoding="utf-8") as json_file:
json.dump(data, json_file, indent=4, ensure_ascii=False)
return json_path
# Streamlit Interface
st.title("DATA LAW PDF to DATA LAW JSON Converter")
# Hard-coded PDF path (replace this with your actual PDF path)
pdf_path = "data law.pdf"
json_path = "output.json"
if st.button("Convert PDF to JSON"):
try:
result = pdf_to_json(pdf_path, json_path)
st.success(f"JSON file created successfully: {result}")
# Option to download the JSON file
with open(result, "r", encoding="utf-8") as json_file:
st.download_button(
label="Download JSON",
data=json_file.read(),
file_name="output.json",
mime="application/json",
)
except Exception as e:
st.error(f"An error occurred: {e}")