aps19 commited on
Commit
4baa579
1 Parent(s): 1a19e68

lint correction

Browse files
Files changed (1) hide show
  1. app.py +11 -25
app.py CHANGED
@@ -1,29 +1,14 @@
1
- import streamlit as st
2
- from langchain.text_splitter import RecursiveCharacterTextSplitter
3
- from langchain.document_loaders import PyPDFLoader
4
  from transformers import T5Tokenizer, T5ForConditionalGeneration
5
  from transformers import pipeline
6
- import base64
7
- from huggingface_hub import login
8
- import torch
9
- import fitz # PyMuPDF
10
 
11
-
12
- # model and tokenizer loading
13
- checkpoint = "MBZUAI/LaMini-Flan-T5-248M"
14
- # checkpoint = "google/flan-t5-base"
15
  tokenizer = T5Tokenizer.from_pretrained(checkpoint)
16
- base_model = T5ForConditionalGeneration.from_pretrained(checkpoint, device_map='auto', torch_dtype=torch.float32)
17
 
18
  # LLM pipeline
19
- def llm_pipeline(pdf_contents):
20
- # Extract text from the PDF contents
21
- pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
22
- pdf_text = ""
23
- for page_num in range(pdf_document.page_count):
24
- page = pdf_document.load_page(page_num)
25
- pdf_text += page.get_text()
26
-
27
  # Use the pipeline to generate the summary
28
  pipe_sum = pipeline(
29
  'summarization',
@@ -33,7 +18,7 @@ def llm_pipeline(pdf_contents):
33
  min_length=50
34
  )
35
 
36
- result = pipe_sum(pdf_text)
37
  summary = result[0]['summary_text']
38
  return summary
39
 
@@ -41,13 +26,14 @@ def llm_pipeline(pdf_contents):
41
  st.set_page_config(layout="wide")
42
 
43
  def main():
44
- st.title("Document Summarization App using Language Model")
45
 
46
- uploaded_file = st.file_uploader("Upload your PDF file", type=['pdf'])
 
47
 
48
- if uploaded_file is not None:
49
  if st.button("Summarize"):
50
- summary = llm_pipeline(uploaded_file.read())
51
 
52
  # Display the summary
53
  st.info("Summarization Complete")
 
1
+ import streamlit as st
 
 
2
  from transformers import T5Tokenizer, T5ForConditionalGeneration
3
  from transformers import pipeline
 
 
 
 
4
 
5
+ # Model and tokenizer loading
6
+ checkpoint = "t5-small" # Use the smaller "t5-small" model
 
 
7
  tokenizer = T5Tokenizer.from_pretrained(checkpoint)
8
+ base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)
9
 
10
  # LLM pipeline
11
+ def llm_pipeline(text):
 
 
 
 
 
 
 
12
  # Use the pipeline to generate the summary
13
  pipe_sum = pipeline(
14
  'summarization',
 
18
  min_length=50
19
  )
20
 
21
+ result = pipe_sum(text)
22
  summary = result[0]['summary_text']
23
  return summary
24
 
 
26
  st.set_page_config(layout="wide")
27
 
28
  def main():
29
+ st.title("Document Summarization App using a Smaller Model")
30
 
31
+ # Text input area
32
+ uploaded_text = st.text_area("Paste your document text here:")
33
 
34
+ if uploaded_text:
35
  if st.button("Summarize"):
36
+ summary = llm_pipeline(uploaded_text)
37
 
38
  # Display the summary
39
  st.info("Summarization Complete")