Spaces:

aps19
/

doc_summaryLLM

Runtime error

App Files Files Community

aps19 commited on Sep 20, 2023

Commit

4baa579

1 Parent(s): 1a19e68

lint correction

Browse files

Files changed (1) hide show

app.py +11 -25

app.py CHANGED Viewed

@@ -1,29 +1,14 @@
-import streamlit as st
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.document_loaders import PyPDFLoader
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from transformers import pipeline
-import base64
-from huggingface_hub import login
-import torch
-import fitz  # PyMuPDF
-# model and tokenizer loading
-checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
-# checkpoint = "google/flan-t5-base"
 tokenizer = T5Tokenizer.from_pretrained(checkpoint)
-base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
 # LLM pipeline
-def llm_pipeline(pdf_contents):
-    # Extract text from the PDF contents
-    pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
-    pdf_text = ""
-    for page_num in range(pdf_document.page_count):
-        page = pdf_document.load_page(page_num)
-        pdf_text += page.get_text()
     # Use the pipeline to generate the summary
     pipe_sum = pipeline(
         'summarization',
@@ -33,7 +18,7 @@ def llm_pipeline(pdf_contents):
         min_length=50
     )
-    result = pipe_sum(pdf_text)
     summary = result[0]['summary_text']
     return summary
@@ -41,13 +26,14 @@ def llm_pipeline(pdf_contents):
 st.set_page_config(layout="wide")
 def main():
-    st.title("Document Summarization App using Language Model")
-    uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
-    if uploaded_file is not None:
         if st.button("Summarize"):
-            summary = llm_pipeline(uploaded_file.read())
             # Display the summary
             st.info("Summarization Complete")

+import streamlit as st
 from transformers import T5Tokenizer, T5ForConditionalGeneration
 from transformers import pipeline
+# Model and tokenizer loading
+checkpoint = "t5-small"  # Use the smaller "t5-small" model
 tokenizer = T5Tokenizer.from_pretrained(checkpoint)
+base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)
 # LLM pipeline
+def llm_pipeline(text):
     # Use the pipeline to generate the summary
     pipe_sum = pipeline(
         'summarization',
         min_length=50
     )
+    result = pipe_sum(text)
     summary = result[0]['summary_text']
     return summary
 st.set_page_config(layout="wide")
 def main():
+    st.title("Document Summarization App using a Smaller Model")
+    # Text input area
+    uploaded_text = st.text_area("Paste your document text here:")
+    if uploaded_text:
         if st.button("Summarize"):
+            summary = llm_pipeline(uploaded_text)
             # Display the summary
             st.info("Summarization Complete")