Spaces:

kinely
/

datalaw.pdf-datalaw.json

Sleeping

Update app.py

7a47555 verified 2 months ago

1.24 kB

	import streamlit as st
	import json
	from PyPDF2 import PdfReader

	def pdf_to_json(pdf_path, json_path):
	# Read the PDF
	reader = PdfReader(pdf_path)
	data = {"pages": []}

	for i, page in enumerate(reader.pages):
	text = page.extract_text()
	data["pages"].append({"page_number": i + 1, "text": text.strip()})

	# Save as JSON
	with open(json_path, "w", encoding="utf-8") as json_file:
	json.dump(data, json_file, indent=4, ensure_ascii=False)

	return json_path

	# Streamlit Interface
	st.title("DATA LAW PDF to DATA LAW JSON Converter")

	# Hard-coded PDF path (replace this with your actual PDF path)
	pdf_path = "data law.pdf"
	json_path = "output.json"

	if st.button("Convert PDF to JSON"):
	try:
	result = pdf_to_json(pdf_path, json_path)
	st.success(f"JSON file created successfully: {result}")
	# Option to download the JSON file
	with open(result, "r", encoding="utf-8") as json_file:
	st.download_button(
	label="Download JSON",
	data=json_file.read(),
	file_name="output.json",
	mime="application/json",
	)
	except Exception as e:
	st.error(f"An error occurred: {e}")