Spaces:

neerajkalyank
/

pdf1excel

Runtime error

neerajkalyank commited on Nov 8, 2024

Commit

2196fad

verified ·

1 Parent(s): cc095af

Update toshiba.py

Files changed (1) hide show

toshiba.py CHANGED Viewed

@@ -1,8 +1,14 @@
 import pdfplumber
 from PIL import Image
 import pytesseract
 def extract_text_with_ocr(pdf_file):
     with pdfplumber.open(pdf_file) as pdf:
         for page_num, page in enumerate(pdf.pages):
             # Convert the page to an image
@@ -15,5 +21,5 @@ def extract_text_with_ocr(pdf_file):
                 print(f"Page {page_num + 1} has no extractable text even with OCR.\n{'-' * 40}\n")
 # Usage example
-file_path = 'Toshiba PO.pdf'  # Make sure this path points to your PDF file
 extract_text_with_ocr(file_path)

 import pdfplumber
 from PIL import Image
 import pytesseract
+import os
 def extract_text_with_ocr(pdf_file):
+    # Check if the file exists before opening
+    if not os.path.exists(pdf_file):
+        print(f"Error: The file '{pdf_file}' does not exist.")
+        return
     with pdfplumber.open(pdf_file) as pdf:
         for page_num, page in enumerate(pdf.pages):
             # Convert the page to an image
                 print(f"Page {page_num + 1} has no extractable text even with OCR.\n{'-' * 40}\n")
 # Usage example
+file_path = '/mnt/data/Toshiba PO.pdf'  # Make sure this is the correct path to your PDF file
 extract_text_with_ocr(file_path)