GERNET Enody commited on
Commit
6348126
1 Parent(s): 83fe45d

Multiple pdf file correction

Browse files
Files changed (1) hide show
  1. utilities/convert.py +6 -3
utilities/convert.py CHANGED
@@ -51,6 +51,9 @@ def convert_pdf_to_text(file):
51
  images = convert_from_bytes(file)
52
  else:
53
  images = convert_from_path(file)
54
- for i,img in enumerate(images):
55
- extraction = (pytesseract.image_to_string(img)[:-1])
56
- return extraction
 
 
 
 
51
  images = convert_from_bytes(file)
52
  else:
53
  images = convert_from_path(file)
54
+ extraction = []
55
+ for img in images:
56
+ text = pytesseract.image_to_string(img)
57
+ extraction.append(text)
58
+
59
+ return " ".join(extraction)