aps19 commited on
Commit
a653348
1 Parent(s): 1a0ecc3

document support added

Browse files
Files changed (1) hide show
  1. app.py +22 -10
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  from transformers import T5Tokenizer, T5ForConditionalGeneration
3
  from transformers import pipeline
4
 
@@ -8,7 +9,14 @@ tokenizer = T5Tokenizer.from_pretrained(checkpoint)
8
  base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)
9
 
10
  # LLM pipeline
11
- def llm_pipeline(text):
 
 
 
 
 
 
 
12
  # Use the pipeline to generate the summary
13
  pipe_sum = pipeline(
14
  'summarization',
@@ -18,7 +26,7 @@ def llm_pipeline(text):
18
  min_length=50
19
  )
20
 
21
- result = pipe_sum(text)
22
  summary = result[0]['summary_text']
23
  return summary
24
 
@@ -28,16 +36,20 @@ st.set_page_config(layout="wide")
28
  def main():
29
  st.title("Document Summarization App using a Smaller Model")
30
 
31
- # Text input area
32
- uploaded_text = st.text_area("Paste your document text here:")
33
 
34
- if uploaded_text:
35
  if st.button("Summarize"):
36
- summary = llm_pipeline(uploaded_text)
37
-
38
- # Display the summary
39
- st.info("Summarization Complete")
40
- st.success(summary)
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
  main()
 
1
  import streamlit as st
2
+ import fitz # PyMuPDF
3
  from transformers import T5Tokenizer, T5ForConditionalGeneration
4
  from transformers import pipeline
5
 
 
9
  base_model = T5ForConditionalGeneration.from_pretrained(checkpoint)
10
 
11
  # LLM pipeline
12
+ def llm_pipeline(pdf_contents):
13
+ # Extract text from the PDF contents
14
+ pdf_document = fitz.open(stream=pdf_contents, filetype="pdf")
15
+ pdf_text = ""
16
+ for page_num in range(pdf_document.page_count):
17
+ page = pdf_document.load_page(page_num)
18
+ pdf_text += page.get_text()
19
+
20
  # Use the pipeline to generate the summary
21
  pipe_sum = pipeline(
22
  'summarization',
 
26
  min_length=50
27
  )
28
 
29
+ result = pipe_sum(pdf_text)
30
  summary = result[0]['summary_text']
31
  return summary
32
 
 
36
  def main():
37
  st.title("Document Summarization App using a Smaller Model")
38
 
39
+ # Button to upload a PDF file
40
+ uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
41
 
42
+ if uploaded_file is not None:
43
  if st.button("Summarize"):
44
+ # Check if the uploaded file is a PDF
45
+ if uploaded_file.type == "application/pdf":
46
+ summary = llm_pipeline(uploaded_file.read())
47
+
48
+ # Display the summary
49
+ st.info("Summarization Complete")
50
+ st.success(summary)
51
+ else:
52
+ st.error("Please upload a valid PDF file.")
53
 
54
  if __name__ == "__main__":
55
  main()