pierreguillou commited on
Commit
6ec863b
1 Parent(s): f768a86

Update files/functions.py

Browse files
Files changed (1) hide show
  1. files/functions.py +14 -3
files/functions.py CHANGED
@@ -11,8 +11,7 @@ import pypdf
11
  from pypdf import PdfReader
12
  from pypdf.errors import PdfReadError
13
 
14
- import pdf2image
15
- from pdf2image import convert_from_path
16
  import langdetect
17
  from langdetect import detect_langs
18
 
@@ -363,7 +362,19 @@ def pdf_to_images(uploaded_pdf):
363
  images = [Image.open(image_blank)]
364
  else:
365
  try:
366
- images = convert_from_path(path_to_file, last_page=max_imgboxes)
 
 
 
 
 
 
 
 
 
 
 
 
367
  num_imgs = len(images)
368
  msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
369
  except:
 
11
  from pypdf import PdfReader
12
  from pypdf.errors import PdfReadError
13
 
14
+ import pypdfium2 as pdfium
 
15
  import langdetect
16
  from langdetect import detect_langs
17
 
 
362
  images = [Image.open(image_blank)]
363
  else:
364
  try:
365
+ # images = convert_from_path(path_to_file, last_page=max_imgboxes)
366
+
367
+ pdf = pdfium.PdfDocument(str(filename))
368
+ version = pdf.get_version() # get the PDF standard version
369
+ n_pages = len(pdf) # get the number of pages in the document
370
+ last_page = max_imgboxes
371
+ page_indices = [i for i in range(last_page)] # pages until last_page
372
+ images = list(pdf.render(
373
+ pdfium.PdfBitmap.to_pil,
374
+ page_indices = page_indices,
375
+ scale = 300/72, # 300dpi resolution
376
+ ))
377
+
378
  num_imgs = len(images)
379
  msg = f'The PDF "{filename}" was converted into {num_imgs} images.'
380
  except: