aiyer commited on
Commit
5b4c8ee
1 Parent(s): 1aaab51

adding doc summarizer

Browse files
Files changed (1) hide show
  1. app.py +44 -2
app.py CHANGED
@@ -1,4 +1,46 @@
 
1
  import streamlit as st
 
 
2
 
3
- x = st.slider('Select a value')
4
- st.write(x, 'squared is', x * x)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import PyPDF2
2
  import streamlit as st
3
+ from dotenv import load_dotenv
4
+ from transformers import pipeline
5
 
6
+
7
+ def retrieve_pdf_text(pdf_file):
8
+ pdf_reader = PyPDF2.PdfReader(pdf_file)
9
+ text = ""
10
+ for page in pdf_reader.pages:
11
+ text += page.extract_text()
12
+ return text
13
+
14
+
15
+ def main():
16
+ load_dotenv()
17
+ st.set_page_config(page_title='Document Summarizer', page_icon=':books:')
18
+ st.header("Summarize a PDF")
19
+ hf_name = "pszemraj/led-base-book-summary"
20
+
21
+ pdf_file = st.file_uploader("Upload a PDF file with Annual Report", type=["pdf"])
22
+ length = st.slider('Max summary length', 0, 3000, 1000)
23
+
24
+ # if a pdf file is uploaded
25
+ if pdf_file:
26
+ raw_text = retrieve_pdf_text(pdf_file)
27
+ if st.button("Run"):
28
+ with st.spinner("Summarizing.."):
29
+ summarizer = pipeline("summarization", hf_name)
30
+ result = summarizer(
31
+ raw_text,
32
+ min_length=8,
33
+ max_length=length,
34
+ no_repeat_ngram_size=3,
35
+ encoder_no_repeat_ngram_size=3,
36
+ repetition_penalty=3.5,
37
+ num_beams=4,
38
+ do_sample=False,
39
+ early_stopping=True,
40
+ )
41
+ st.write(result[0]["summary_text"])
42
+
43
+
44
+
45
+ if __name__ == '__main__':
46
+ main()