Spaces:

Tuana
/

PDF-Summarizer

Build error

Tuana commited on May 20, 2022

Commit

fe7b517

1 Parent(s): 67f4a7d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,7 +34,6 @@ def pdf_to_document_store(pdf_file):
 def summarize(file):
     pdf_to_document_store(file)
-    st.write('Number of documents', document_store.get_document_count())
     summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
     return summaries
@@ -45,6 +44,10 @@ def set_state_if_absent(key, value):
 set_state_if_absent("summaries", None)
 document_store, summarizer, preprocessor = start_haystack()
 uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
 if uploaded_file is not None:

 def summarize(file):
     pdf_to_document_store(file)
     summaries = summarizer.predict(documents=document_store.get_all_documents(), generate_single_summary=True)
     return summaries
 set_state_if_absent("summaries", None)
 document_store, summarizer, preprocessor = start_haystack()
+st.markdown( """
+This Summarization demo uses a [Haystack TransformerSummarizer node](https://haystack.deepset.ai/pipeline_nodes/summarizer). You can upload a PDF file, which will be converted to text with the [Haystack PDFtoTextConverter](https://haystack.deepset.ai/reference/file-converters#pdftotextconverter). In this demo, we produce 1 summary for the whole file you upload. So, the TransformerSummarizer treats the while thing as one string, which means along with the model limitations, PDFs that have a lot of unneeded text at the beginning produce poor results.
+""", unsafe_allow_html=True)
 uploaded_file = st.file_uploader("Choose a PDF file", accept_multiple_files=False)
 if uploaded_file is not None: