Spaces:

Chemically-motivated
/

pdf_to_json_converter

Running

File size: 1,318 Bytes

97579c6

import json
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline

# Initialize the Hugging Face model pipeline
model_name = "your-huggingface-model-name"  # Replace with your model's name
nlp_pipeline = pipeline("text2text-generation", model=model_name)

def process_pdf(file):
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

def convert_to_json(text):
    # Use the Hugging Face model to process the text
    result = nlp_pipeline(text)
    return result[0]['generated_text']

st.title("PDF to JSON Converter")

uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file is not None:
    st.write("Processing your file...")

    # Extract text from the PDF
    pdf_text = process_pdf(uploaded_file)

    # Convert the extracted text to JSON using the Hugging Face model
    json_output = convert_to_json(pdf_text)

    # Display the JSON output
    st.write("Converted JSON:")
    st.json(json.loads(json_output))

    # Provide a download link for the JSON file
    json_filename = uploaded_file.name.replace(".pdf", ".json")
    st.download_button(
        label="Download JSON",
        data=json_output,
        file_name=json_filename,
        mime="application/json"
    )