Spaces:
Runtime error
Runtime error
import streamlit as st | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.document_loaders import PyPDFLoader | |
from transformers import T5Tokenizer, T5ForConditionalGeneration | |
from transformers import pipeline | |
import base64 | |
from huggingface_hub import login | |
import torch | |
import fitz # PyMuPDF | |
# model and tokenizer loading | |
checkpoint = "MBZUAI/LaMini-Flan-T5-248M" | |
# checkpoint = "google/flan-t5-base" | |
tokenizer = T5Tokenizer.from_pretrained(checkpoint) | |
base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32) | |
# LLM pipeline | |
def llm_pipeline(pdf_contents): | |
# Extract text from the PDF contents | |
pdf_document = fitz.open(stream=pdf_contents, filetype="pdf") | |
pdf_text = "" | |
for page_num in range(pdf_document.page_count): | |
page = pdf_document.load_page(page_num) | |
pdf_text += page.get_text() | |
# Use the pipeline to generate the summary | |
pipe_sum = pipeline( | |
'summarization', | |
model=base_model, | |
tokenizer=tokenizer, | |
max_length=500, | |
min_length=50 | |
) | |
result = pipe_sum(pdf_text) | |
summary = result[0]['summary_text'] | |
return summary | |
# Streamlit code | |
st.set_page_config(layout="wide") | |
def main(): | |
st.title("Document Summarization App using Language Model") | |
uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf']) | |
if uploaded_file is not None: | |
if st.button("Summarize"): | |
summary = llm_pipeline(uploaded_file.read()) | |
# Display the summary | |
st.info("Summarization Complete") | |
st.success(summary) | |
if __name__ == "__main__": | |
main() | |