Spaces:
Runtime error
Runtime error
File size: 1,946 Bytes
4baa579 a653348 d239c1e 4baa579 1fb96d0 d239c1e 4baa579 d239c1e 1fb96d0 a653348 d239c1e 1fb96d0 d239c1e a653348 d239c1e 4baa579 d239c1e a653348 d239c1e a653348 445e71a 1fb96d0 d239c1e a653348 1fb96d0 a653348 39111d4 a653348 d239c1e 39111d4 d239c1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import streamlit as st
import fitz # PyMuPDF
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import pipeline
# Model and tokenizer loading
checkpoint = "./model/LaMini-Flan-T5-248M"
tokenizer = T5Tokenizer.from_pretrained(checkpoint)
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)
# LLM pipeline
def llm_pipeline(pdf_contents, max_length=500, min_length=50):
# Extract text from the PDF contents
pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
pdf_text = ""
for page_num in range(pdf_document.page_count):
page = pdf_document.load_page(page_num)
pdf_text += page.get_text()
# Use the pipeline to generate the summary
pipe_sum = pipeline(
'summarization',
model=base_model,
tokenizer=tokenizer,
max_length=max_length,
min_length=min_length
)
result = pipe_sum(pdf_text)
summary = result[0]['summary_text']
return summary
# Streamlit code
st.set_page_config(layout="wide")
def main():
st.title("Document Summarization App using a Smaller Model")
# Button to upload a PDF file
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
max_length = st.slider("Maximum Summary Length", min_value=50, max_value=1000, step=20, value=500)
min_length = st.slider("Minimum Summary Length", min_value=10, max_value=500, step=10, value=50)
if st.button("Summarize"):
# Check if the uploaded file is a PDF
if uploaded_file.type == "application/pdf":
summary = llm_pipeline(uploaded_file.read(), max_length, min_length)
# Display the summary
st.info("Summarization Complete")
st.success(summary)
else:
st.error("Please upload a valid PDF file.")
if __name__ == "__main__":
main()
|