File size: 1,318 Bytes
97579c6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import json
import streamlit as st
from PyPDF2 import PdfReader
from transformers import pipeline
# Initialize the Hugging Face model pipeline
model_name = "your-huggingface-model-name" # Replace with your model's name
nlp_pipeline = pipeline("text2text-generation", model=model_name)
def process_pdf(file):
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
return text
def convert_to_json(text):
# Use the Hugging Face model to process the text
result = nlp_pipeline(text)
return result[0]['generated_text']
st.title("PDF to JSON Converter")
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
st.write("Processing your file...")
# Extract text from the PDF
pdf_text = process_pdf(uploaded_file)
# Convert the extracted text to JSON using the Hugging Face model
json_output = convert_to_json(pdf_text)
# Display the JSON output
st.write("Converted JSON:")
st.json(json.loads(json_output))
# Provide a download link for the JSON file
json_filename = uploaded_file.name.replace(".pdf", ".json")
st.download_button(
label="Download JSON",
data=json_output,
file_name=json_filename,
mime="application/json"
)
|