Spaces:

DeepDiveDev
/

TransformoDocs-Demo

Sleeping

App Files Files Community

DeepDiveDev commited on Feb 28

Commit

6ec889f

verified ·

1 Parent(s): 2c5810c

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -26

app.py CHANGED Viewed

@@ -1,19 +1,19 @@
 import gradio as gr
-import pytesseract
 from PIL import Image
 import pdf2image
 import tempfile
 import os
 import cv2
 import numpy as np
-# You may need to set the path to tesseract executable if it's not in PATH
-# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'  # For Windows
-# For Linux/Mac, ensure Tesseract is installed
 def preprocess_image(img):
     """Preprocess image to improve OCR accuracy for handwritten text"""
-    # Convert to grayscale
     img_array = np.array(img)
     # Check if the image is already grayscale
@@ -22,27 +22,28 @@ def preprocess_image(img):
     else:
         gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
-    # Apply thresholding
-    _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
     # Noise removal
     kernel = np.ones((1, 1), np.uint8)
     binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
     binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
-    # Invert back
-    binary = 255 - binary
-    return Image.fromarray(binary)
 def extract_text_from_image(img):
-    """Extract text from an image using OCR"""
     # Preprocess for better handwriting recognition
     processed_img = preprocess_image(img)
-    # Use pytesseract with configuration optimized for handwritten text
-    custom_config = r'--oem 3 --psm 6 -l eng -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?@#$%^&*()-+=_:;\'\" "'
-    text = pytesseract.image_to_string(processed_img, config=custom_config)
     return text.strip()
@@ -65,17 +66,20 @@ def process_file(file):
     if file is None:
         return "No file uploaded. Please upload an image or PDF file."
-    file_extension = os.path.splitext(file.name)[1].lower()
-    if file_extension == ".pdf":
-        # Process PDF
-        return extract_text_from_pdf(file.name)
-    elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
-        # Process Image
-        img = Image.open(file.name)
-        return extract_text_from_image(img)
-    else:
-        return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
 # Create Gradio interface
 with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
@@ -95,6 +99,7 @@ with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
     gr.Markdown("### Notes:")
     gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
     gr.Markdown("- The system works best with dark text on light background")
     gr.Markdown("- Multiple page PDFs will show page breaks in the output")
 # Launch the app

 import gradio as gr
+import easyocr
 from PIL import Image
 import pdf2image
 import tempfile
 import os
 import cv2
 import numpy as np
+import torch
+# Initialize the OCR reader (this will download models on first run)
+reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
 def preprocess_image(img):
     """Preprocess image to improve OCR accuracy for handwritten text"""
+    # Convert PIL Image to numpy array
     img_array = np.array(img)
     # Check if the image is already grayscale
     else:
         gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
+    # Apply adaptive thresholding for better handling of different lighting conditions
+    binary = cv2.adaptiveThreshold(
+        gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
+    )
     # Noise removal
     kernel = np.ones((1, 1), np.uint8)
     binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
     binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
+    return binary
 def extract_text_from_image(img):
+    """Extract text from an image using EasyOCR"""
     # Preprocess for better handwriting recognition
     processed_img = preprocess_image(img)
+    # Use EasyOCR to extract text
+    results = reader.readtext(processed_img)
+    # Combine all detected text
+    text = '\n'.join([result[1] for result in results])
     return text.strip()
     if file is None:
         return "No file uploaded. Please upload an image or PDF file."
+    try:
+        file_extension = os.path.splitext(file.name)[1].lower()
+        if file_extension == ".pdf":
+            # Process PDF
+            return extract_text_from_pdf(file.name)
+        elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
+            # Process Image
+            img = Image.open(file.name)
+            return extract_text_from_image(img)
+        else:
+            return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
+    except Exception as e:
+        return f"Error processing file: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
     gr.Markdown("### Notes:")
     gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
     gr.Markdown("- The system works best with dark text on light background")
+    gr.Markdown("- The first run may take longer as it downloads the OCR models")
     gr.Markdown("- Multiple page PDFs will show page breaks in the output")
 # Launch the app