|
import json |
|
import streamlit as st |
|
from PyPDF2 import PdfReader |
|
from transformers import pipeline |
|
|
|
|
|
model_name = "Canstralian/RabbitRedux" |
|
|
|
|
|
nlp_pipeline = pipeline("text2text-generation", model=model_name) |
|
|
|
|
|
input_text = "Provide an example of secure Python coding practices." |
|
output = nlp_pipeline(input_text) |
|
print(output) |
|
|
|
def process_pdf(file): |
|
reader = PdfReader(file) |
|
text = "" |
|
for page in reader.pages: |
|
text += page.extract_text() |
|
return text |
|
|
|
def convert_to_json(text): |
|
|
|
result = nlp_pipeline(text) |
|
return result[0]['generated_text'] |
|
|
|
st.title("PDF to JSON Converter") |
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
if uploaded_file is not None: |
|
st.write("Processing your file...") |
|
|
|
|
|
pdf_text = process_pdf(uploaded_file) |
|
|
|
|
|
json_output = convert_to_json(pdf_text) |
|
|
|
|
|
st.write("Converted JSON:") |
|
st.json(json.loads(json_output)) |
|
|
|
|
|
json_filename = uploaded_file.name.replace(".pdf", ".json") |
|
st.download_button( |
|
label="Download JSON", |
|
data=json_output, |
|
file_name=json_filename, |
|
mime="application/json" |
|
) |
|
|