Spaces:
Runtime error
Runtime error
neerajkalyank
commited on
Update toshiba.py
Browse files- toshiba.py +7 -1
toshiba.py
CHANGED
@@ -1,8 +1,14 @@
|
|
1 |
import pdfplumber
|
2 |
from PIL import Image
|
3 |
import pytesseract
|
|
|
4 |
|
5 |
def extract_text_with_ocr(pdf_file):
|
|
|
|
|
|
|
|
|
|
|
6 |
with pdfplumber.open(pdf_file) as pdf:
|
7 |
for page_num, page in enumerate(pdf.pages):
|
8 |
# Convert the page to an image
|
@@ -15,5 +21,5 @@ def extract_text_with_ocr(pdf_file):
|
|
15 |
print(f"Page {page_num + 1} has no extractable text even with OCR.\n{'-' * 40}\n")
|
16 |
|
17 |
# Usage example
|
18 |
-
file_path = 'Toshiba PO.pdf' # Make sure this path
|
19 |
extract_text_with_ocr(file_path)
|
|
|
1 |
import pdfplumber
|
2 |
from PIL import Image
|
3 |
import pytesseract
|
4 |
+
import os
|
5 |
|
6 |
def extract_text_with_ocr(pdf_file):
|
7 |
+
# Check if the file exists before opening
|
8 |
+
if not os.path.exists(pdf_file):
|
9 |
+
print(f"Error: The file '{pdf_file}' does not exist.")
|
10 |
+
return
|
11 |
+
|
12 |
with pdfplumber.open(pdf_file) as pdf:
|
13 |
for page_num, page in enumerate(pdf.pages):
|
14 |
# Convert the page to an image
|
|
|
21 |
print(f"Page {page_num + 1} has no extractable text even with OCR.\n{'-' * 40}\n")
|
22 |
|
23 |
# Usage example
|
24 |
+
file_path = '/mnt/data/Toshiba PO.pdf' # Make sure this is the correct path to your PDF file
|
25 |
extract_text_with_ocr(file_path)
|