Tuana commited on
Commit
f6cc0cb
1 Parent(s): 5fdc2d5

converting to bytes like object

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -3,6 +3,8 @@ from haystack.document_stores import InMemoryDocumentStore
3
  from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConverter, TfidfRetriever
4
  from haystack.schema import Document
5
  import logging
 
 
6
 
7
  @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True)
8
  def start_haystack():
@@ -24,10 +26,12 @@ def pdf_to_document_store(pdf_files):
24
  documents = []
25
  for pdf in pdf_files:
26
  with open("temp-path.pdf", 'wb') as temp_file:
27
- temp_file.write(pdf)
 
28
  doc = converter.convert(file_path="temp-path.pdf", meta=None)[0]
29
  preprocessed_doc=preprocessor.process([doc])
30
  documents.append(preprocessed_doc)
 
31
  document_store.write_documents(documents)
32
  st.write('Document count: ', document_store.get_document_count())
33
 
 
3
  from haystack.nodes import TransformersSummarizer, PreProcessor, PDFToTextConverter, TfidfRetriever
4
  from haystack.schema import Document
5
  import logging
6
+ import base64
7
+
8
 
9
  @st.cache(hash_funcs={"builtins.SwigPyObject": lambda _: None},allow_output_mutation=True)
10
  def start_haystack():
 
26
  documents = []
27
  for pdf in pdf_files:
28
  with open("temp-path.pdf", 'wb') as temp_file:
29
+ base64_pdf = base64.b64encode(pdf.read()).decode('utf-8')
30
+ temp_file.write(base64.b64decode(base64_pdf))
31
  doc = converter.convert(file_path="temp-path.pdf", meta=None)[0]
32
  preprocessed_doc=preprocessor.process([doc])
33
  documents.append(preprocessed_doc)
34
+ temp_file.close()
35
  document_store.write_documents(documents)
36
  st.write('Document count: ', document_store.get_document_count())
37