Tuana commited on
Commit
9a54394
1 Parent(s): 725ac30

cache haystack setup

Browse files
Files changed (1) hide show
  1. app.py +15 -11
app.py CHANGED
@@ -4,17 +4,20 @@ from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConver
4
  from haystack.schema import Document
5
  import logging
6
 
7
- document_store = InMemoryDocumentStore()
8
- preprocessor = PreProcessor(
9
- clean_empty_lines=True,
10
- clean_whitespace=True,
11
- clean_header_footer=True,
12
- split_by="word",
13
- split_length=100,
14
- split_respect_sentence_boundary=True,
15
- split_overlap=3
16
- )
17
- summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
 
 
 
18
 
19
 
20
  def pdf_to_document_store(pdf_files):
@@ -33,6 +36,7 @@ def summarize(files):
33
  summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
34
  st.write(summary)
35
 
 
36
 
37
  uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
38
 
 
4
  from haystack.schema import Document
5
  import logging
6
 
7
+ @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True)
8
+ def start_haystack():
9
+ document_store = InMemoryDocumentStore()
10
+ preprocessor = PreProcessor(
11
+ clean_empty_lines=True,
12
+ clean_whitespace=True,
13
+ clean_header_footer=True,
14
+ split_by="word",
15
+ split_length=100,
16
+ split_respect_sentence_boundary=True,
17
+ split_overlap=3
18
+ )
19
+ summarizer = TransformersSummarizer(model_name_or_path="google/pegasus-xsum")
20
+ return document_store, summarizer, preprocessor
21
 
22
 
23
  def pdf_to_document_store(pdf_files):
 
36
  summary = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
37
  st.write(summary)
38
 
39
+ document_store, summarizer, preprocessor = start_haystack()
40
 
41
  uploaded_files = st.file_uploader("Choose PDF files", accept_multiple_files=True)
42