Spaces:
Sleeping
Sleeping
import streamlit as st | |
import json | |
from PyPDF2 import PdfReader | |
def pdf_to_json(pdf_path, json_path): | |
# Read the PDF | |
reader = PdfReader(pdf_path) | |
data = {"pages": []} | |
for i, page in enumerate(reader.pages): | |
text = page.extract_text() | |
data["pages"].append({"page_number": i + 1, "text": text.strip()}) | |
# Save as JSON | |
with open(json_path, "w", encoding="utf-8") as json_file: | |
json.dump(data, json_file, indent=4, ensure_ascii=False) | |
return json_path | |
# Streamlit Interface | |
st.title("DATA LAW PDF to DATA LAW JSON Converter") | |
# Hard-coded PDF path (replace this with your actual PDF path) | |
pdf_path = "data law.pdf" | |
json_path = "output.json" | |
if st.button("Convert PDF to JSON"): | |
try: | |
result = pdf_to_json(pdf_path, json_path) | |
st.success(f"JSON file created successfully: {result}") | |
# Option to download the JSON file | |
with open(result, "r", encoding="utf-8") as json_file: | |
st.download_button( | |
label="Download JSON", | |
data=json_file.read(), | |
file_name="output.json", | |
mime="application/json", | |
) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |