Tuana commited on
Commit
836e16d
1 Parent(s): 589a29a

Document from dict

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import streamlit as st
2
  from haystack.document_stores import InMemoryDocumentStore
3
  from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
 
4
  import logging
5
 
6
  document_store = InMemoryDocumentStore()
@@ -18,7 +19,7 @@ preprocessor = PreProcessor(
18
  def pdf_to_document_store(pdf_files):
19
  document_store.delete_documents()
20
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
21
- documents = [converter.convert(file_path=pdf.name, meta=None) for pdf in pdf_files]
22
  preprocessed_docs = preprocessor.process(documents)
23
  document_store.write_documents(preprocessed_docs)
24
  return None
 
1
  import streamlit as st
2
  from haystack.document_stores import InMemoryDocumentStore
3
  from haystack.nodes import FARMReader, PreProcessor, PDFToTextConverter, TfidfRetriever
4
+ from haystack.schema import Document
5
  import logging
6
 
7
  document_store = InMemoryDocumentStore()
 
19
  def pdf_to_document_store(pdf_files):
20
  document_store.delete_documents()
21
  converter = PDFToTextConverter(remove_numeric_tables=True, valid_languages=["en"])
22
+ documents = [Document.from_dict(converter.convert(file_path=pdf.name, meta=None) for pdf in pdf_files)]
23
  preprocessed_docs = preprocessor.process(documents)
24
  document_store.write_documents(preprocessed_docs)
25
  return None