rohan13 commited on
Commit
2d8d09e
1 Parent(s): 6922d1d

Ignoring book if missing.

Browse files
Files changed (1) hide show
  1. utils.py +7 -6
utils.py CHANGED
@@ -183,12 +183,13 @@ def get_links(index_url, paths):
183
 
184
  def get_document_data(book_file, book_url):
185
  document_list = []
186
- with open(book_file, 'rb') as f:
187
- pdf_reader = PdfReader(f)
188
- for i in range(len(pdf_reader.pages)):
189
- page_text = pdf_reader.pages[i].extract_text()
190
- metadata = {"source": book_url}
191
- document_list.append(Document(page_content=page_text, metadata=metadata))
 
192
 
193
  # print("document list" + str(len(document_list)))
194
  return document_list
 
183
 
184
  def get_document_data(book_file, book_url):
185
  document_list = []
186
+ if os.path.isfile(book_file):
187
+ with open(book_file, 'rb') as f:
188
+ pdf_reader = PdfReader(f)
189
+ for i in range(len(pdf_reader.pages)):
190
+ page_text = pdf_reader.pages[i].extract_text()
191
+ metadata = {"source": book_url}
192
+ document_list.append(Document(page_content=page_text, metadata=metadata))
193
 
194
  # print("document list" + str(len(document_list)))
195
  return document_list