Tuana commited on
Commit
5721090
2 Parent(s): 28ec4f0 3986348

Merge branch 'main' of https://huggingface.co/spaces/Tuana/URL-Summarizer

Browse files
Files changed (3) hide show
  1. app.py +1 -3
  2. packages.txt +2 -0
  3. requirements.txt +1 -1
app.py CHANGED
@@ -18,9 +18,7 @@ preprocessor = PreProcessor(
18
  def pdf_to_document_store(pdf_files):
19
  document_store.delete_documents()
20
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
21
- documents = []
22
- for pdf in pdf_files:
23
- documents.append(converter.convert(file_path=pdf.name, meta=None))
24
  preprocessed_docs = preprocessor.process(documents)
25
  document_store.write_documents(preprocessed_docs)
26
  return None
 
18
  def pdf_to_document_store(pdf_files):
19
  document_store.delete_documents()
20
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
21
+ documents = [converter.convert(file_path=pdf_files.name, meta=None)[0]]
 
 
22
  preprocessed_docs = preprocessor.process(documents)
23
  document_store.write_documents(preprocessed_docs)
24
  return None
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ poppler-utils
2
+ xpdf
requirements.txt CHANGED
@@ -1 +1 @@
1
- farm-haystack[ocr]==1.4.0
 
1
+ farm-haystack[ocr]==1.4.0