DeepDiveDev commited on
Commit
6ec889f
·
verified ·
1 Parent(s): 2c5810c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -26
app.py CHANGED
@@ -1,19 +1,19 @@
1
  import gradio as gr
2
- import pytesseract
3
  from PIL import Image
4
  import pdf2image
5
  import tempfile
6
  import os
7
  import cv2
8
  import numpy as np
 
9
 
10
- # You may need to set the path to tesseract executable if it's not in PATH
11
- # pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe' # For Windows
12
- # For Linux/Mac, ensure Tesseract is installed
13
 
14
  def preprocess_image(img):
15
  """Preprocess image to improve OCR accuracy for handwritten text"""
16
- # Convert to grayscale
17
  img_array = np.array(img)
18
 
19
  # Check if the image is already grayscale
@@ -22,27 +22,28 @@ def preprocess_image(img):
22
  else:
23
  gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
24
 
25
- # Apply thresholding
26
- _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
 
 
27
 
28
  # Noise removal
29
  kernel = np.ones((1, 1), np.uint8)
30
  binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
31
  binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
32
 
33
- # Invert back
34
- binary = 255 - binary
35
-
36
- return Image.fromarray(binary)
37
 
38
  def extract_text_from_image(img):
39
- """Extract text from an image using OCR"""
40
  # Preprocess for better handwriting recognition
41
  processed_img = preprocess_image(img)
42
 
43
- # Use pytesseract with configuration optimized for handwritten text
44
- custom_config = r'--oem 3 --psm 6 -l eng -c tessedit_char_whitelist="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789.,!?@#$%^&*()-+=_:;\'\" "'
45
- text = pytesseract.image_to_string(processed_img, config=custom_config)
 
 
46
 
47
  return text.strip()
48
 
@@ -65,17 +66,20 @@ def process_file(file):
65
  if file is None:
66
  return "No file uploaded. Please upload an image or PDF file."
67
 
68
- file_extension = os.path.splitext(file.name)[1].lower()
69
-
70
- if file_extension == ".pdf":
71
- # Process PDF
72
- return extract_text_from_pdf(file.name)
73
- elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
74
- # Process Image
75
- img = Image.open(file.name)
76
- return extract_text_from_image(img)
77
- else:
78
- return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
 
 
 
79
 
80
  # Create Gradio interface
81
  with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
@@ -95,6 +99,7 @@ with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
95
  gr.Markdown("### Notes:")
96
  gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
97
  gr.Markdown("- The system works best with dark text on light background")
 
98
  gr.Markdown("- Multiple page PDFs will show page breaks in the output")
99
 
100
  # Launch the app
 
1
  import gradio as gr
2
+ import easyocr
3
  from PIL import Image
4
  import pdf2image
5
  import tempfile
6
  import os
7
  import cv2
8
  import numpy as np
9
+ import torch
10
 
11
+ # Initialize the OCR reader (this will download models on first run)
12
+ reader = easyocr.Reader(['en'], gpu=torch.cuda.is_available())
 
13
 
14
  def preprocess_image(img):
15
  """Preprocess image to improve OCR accuracy for handwritten text"""
16
+ # Convert PIL Image to numpy array
17
  img_array = np.array(img)
18
 
19
  # Check if the image is already grayscale
 
22
  else:
23
  gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
24
 
25
+ # Apply adaptive thresholding for better handling of different lighting conditions
26
+ binary = cv2.adaptiveThreshold(
27
+ gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
28
+ )
29
 
30
  # Noise removal
31
  kernel = np.ones((1, 1), np.uint8)
32
  binary = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
33
  binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel)
34
 
35
+ return binary
 
 
 
36
 
37
  def extract_text_from_image(img):
38
+ """Extract text from an image using EasyOCR"""
39
  # Preprocess for better handwriting recognition
40
  processed_img = preprocess_image(img)
41
 
42
+ # Use EasyOCR to extract text
43
+ results = reader.readtext(processed_img)
44
+
45
+ # Combine all detected text
46
+ text = '\n'.join([result[1] for result in results])
47
 
48
  return text.strip()
49
 
 
66
  if file is None:
67
  return "No file uploaded. Please upload an image or PDF file."
68
 
69
+ try:
70
+ file_extension = os.path.splitext(file.name)[1].lower()
71
+
72
+ if file_extension == ".pdf":
73
+ # Process PDF
74
+ return extract_text_from_pdf(file.name)
75
+ elif file_extension in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif"]:
76
+ # Process Image
77
+ img = Image.open(file.name)
78
+ return extract_text_from_image(img)
79
+ else:
80
+ return "Unsupported file format. Please upload a PDF or image file (JPG, PNG, BMP, TIFF)."
81
+ except Exception as e:
82
+ return f"Error processing file: {str(e)}"
83
 
84
  # Create Gradio interface
85
  with gr.Blocks(title="Handwritten Text OCR Extractor") as app:
 
99
  gr.Markdown("### Notes:")
100
  gr.Markdown("- For best results, ensure the handwriting is clear and the image is well-lit")
101
  gr.Markdown("- The system works best with dark text on light background")
102
+ gr.Markdown("- The first run may take longer as it downloads the OCR models")
103
  gr.Markdown("- Multiple page PDFs will show page breaks in the output")
104
 
105
  # Launch the app