GERNET Enody
commited on
Commit
•
6348126
1
Parent(s):
83fe45d
Multiple pdf file correction
Browse files- utilities/convert.py +6 -3
utilities/convert.py
CHANGED
@@ -51,6 +51,9 @@ def convert_pdf_to_text(file):
|
|
51 |
images = convert_from_bytes(file)
|
52 |
else:
|
53 |
images = convert_from_path(file)
|
54 |
-
|
55 |
-
|
56 |
-
|
|
|
|
|
|
|
|
51 |
images = convert_from_bytes(file)
|
52 |
else:
|
53 |
images = convert_from_path(file)
|
54 |
+
extraction = []
|
55 |
+
for img in images:
|
56 |
+
text = pytesseract.image_to_string(img)
|
57 |
+
extraction.append(text)
|
58 |
+
|
59 |
+
return " ".join(extraction)
|