Tuana commited on
Commit
3a4a956
1 Parent(s): d55aa29

switching to single file

Browse files
Files changed (1) hide show
  1. app.py +15 -21
app.py CHANGED
@@ -17,23 +17,23 @@ def start_haystack():
17
  split_length=200,
18
  split_respect_sentence_boundary=True,
19
  )
20
- summarizer = TransformersSummarizer(model_name_or_path="facebook/bart-large-cnn")
21
  return document_store, summarizer, preprocessor
22
 
23
 
24
- def pdf_to_document_store(pdf_files):
 
25
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
26
- for pdf in pdf_files:
27
- with open("temp-path.pdf", 'wb') as temp_file:
28
- base64_pdf = base64.b64encode(pdf.read()).decode('utf-8')
29
- temp_file.write(base64.b64decode(base64_pdf))
30
- doc = converter.convert(file_path="temp-path.pdf", meta=None)[0]
31
- preprocessed_docs=preprocessor.process([doc])
32
- document_store.write_documents(preprocessed_docs)
33
  temp_file.close()
34
 
35
- def summarize(files):
36
- pdf_to_document_store(files)
37
  summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
38
  st.write('Summary')
39
  for summary in summaries:
@@ -41,14 +41,8 @@ def summarize(files):
41
 
42
  document_store, summarizer, preprocessor = start_haystack()
43
 
44
- uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
45
 
46
- if uploaded_files is not None:
47
- if st.button('Summarize Documents'):
48
- summarize(uploaded_files)
49
-
50
- if st.button('Calculate num of docs'):
51
- st.write(document_store.get_document_count())
52
-
53
- if st.button('Clear DocumentStore'):
54
- document_store.delete_documents()
 
17
  split_length=200,
18
  split_respect_sentence_boundary=True,
19
  )
20
+ summarizer = TransformersSummarizer(model_name_or_path="facebook/bart-large-cnn", batch_size=5)
21
  return document_store, summarizer, preprocessor
22
 
23
 
24
+ def pdf_to_document_store(pdf_file):
25
+ document_store.delete_documents()
26
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
27
+ with open("temp-path.pdf", 'wb') as temp_file:
28
+ base64_pdf = base64.b64encode(pdf_file.read()).decode('utf-8')
29
+ temp_file.write(base64.b64decode(base64_pdf))
30
+ doc = converter.convert(file_path="temp-path.pdf", meta=None)
31
+ preprocessed_docs=preprocessor.process(doc)
32
+ document_store.write_documents(preprocessed_docs)
 
33
  temp_file.close()
34
 
35
+ def summarize(file):
36
+ pdf_to_document_store(file)
37
  summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
38
  st.write('Summary')
39
  for summary in summaries:
 
41
 
42
  document_store, summarizer, preprocessor = start_haystack()
43
 
44
+ uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
45
 
46
+ if uploaded_file is not None:
47
+ if st.button('Summarize Document'):
48
+ summarize(uploaded_file)