Spaces:

Chemically-motivated
/

pdf_to_json_converter

Sleeping

Update app.py

5017c6b verified about 1 month ago

1.42 kB

	import json
	import streamlit as st
	from PyPDF2 import PdfReader
	from transformers import pipeline

	# Specify the model name
	model_name = "Canstralian/RabbitRedux"

	# Initialize the pipeline
	nlp_pipeline = pipeline("text2text-generation", model=model_name)

	# Example usage
	input_text = "Provide an example of secure Python coding practices."
	output = nlp_pipeline(input_text)
	print(output)

	def process_pdf(file):
	reader = PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	return text

	def convert_to_json(text):
	# Use the Hugging Face model to process the text
	result = nlp_pipeline(text)
	return result[0]['generated_text']

	st.title("PDF to JSON Converter")

	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	st.write("Processing your file...")

	# Extract text from the PDF
	pdf_text = process_pdf(uploaded_file)

	# Convert the extracted text to JSON using the Hugging Face model
	json_output = convert_to_json(pdf_text)

	# Display the JSON output
	st.write("Converted JSON:")
	st.json(json.loads(json_output))

	# Provide a download link for the JSON file
	json_filename = uploaded_file.name.replace(".pdf", ".json")
	st.download_button(
	label="Download JSON",
	data=json_output,
	file_name=json_filename,
	mime="application/json"
	)