Spaces:
Sleeping
Sleeping
Update ocr_engine.py
Browse files- ocr_engine.py +43 -40
ocr_engine.py
CHANGED
|
@@ -10,39 +10,36 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
|
|
| 10 |
# Initialize EasyOCR
|
| 11 |
easyocr_reader = easyocr.Reader(['en'], gpu=False)
|
| 12 |
|
| 13 |
-
def
|
| 14 |
-
"""Estimate image
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
return cv2.Laplacian(gray, cv2.CV_64F).var()
|
| 18 |
-
except Exception as e:
|
| 19 |
-
logging.error(f"Blur estimation failed: {str(e)}")
|
| 20 |
-
return 100 # Default value for fallback
|
| 21 |
|
| 22 |
def detect_roi(img):
|
| 23 |
"""Detect and crop the region of interest (likely the digital display)"""
|
| 24 |
try:
|
| 25 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
# Find contours
|
| 33 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 34 |
if contours:
|
| 35 |
# Get the largest contour with reasonable size
|
| 36 |
-
valid_contours = [c for c in contours if cv2.contourArea(c) >
|
| 37 |
if valid_contours:
|
| 38 |
largest_contour = max(valid_contours, key=cv2.contourArea)
|
| 39 |
x, y, w, h = cv2.boundingRect(largest_contour)
|
| 40 |
-
# Add padding and ensure bounds
|
| 41 |
-
x, y = max(0, x-
|
| 42 |
-
w, h = min(w+
|
| 43 |
-
if w > 50 and h > 30:
|
| 44 |
return img[y:y+h, x:x+w]
|
| 45 |
-
return img
|
| 46 |
except Exception as e:
|
| 47 |
logging.error(f"ROI detection failed: {str(e)}")
|
| 48 |
return img
|
|
@@ -52,32 +49,34 @@ def enhance_image(img, mode="standard"):
|
|
| 52 |
try:
|
| 53 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 54 |
|
| 55 |
-
if mode == "
|
| 56 |
-
#
|
|
|
|
|
|
|
|
|
|
| 57 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
| 58 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
|
|
|
| 59 |
elif mode == "low_noise":
|
| 60 |
-
# Gentle denoising for clear but noisy images
|
| 61 |
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
|
| 62 |
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
|
|
|
| 63 |
else:
|
| 64 |
-
# Standard preprocessing
|
| 65 |
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
|
| 66 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
|
|
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
thresh = cv2.adaptiveThreshold(contrast, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 72 |
-
cv2.THRESH_BINARY, 11, 2)
|
| 73 |
|
| 74 |
# Morphological operations
|
| 75 |
kernel = np.ones((3, 3), np.uint8)
|
| 76 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
| 77 |
|
| 78 |
-
#
|
| 79 |
-
|
| 80 |
-
sharpen_strength =
|
| 81 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
| 82 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
| 83 |
|
|
@@ -99,19 +98,20 @@ def extract_weight_from_image(pil_img):
|
|
| 99 |
img = np.array(pil_img)
|
| 100 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 101 |
|
| 102 |
-
# Estimate
|
| 103 |
-
|
| 104 |
-
conf_threshold = 0.
|
| 105 |
|
| 106 |
# Detect ROI
|
| 107 |
roi_img = detect_roi(img)
|
| 108 |
|
| 109 |
# Process multiple image versions
|
| 110 |
images_to_process = [
|
| 111 |
-
("
|
| 112 |
-
("
|
| 113 |
-
("
|
| 114 |
-
("
|
|
|
|
| 115 |
]
|
| 116 |
|
| 117 |
best_weight = None
|
|
@@ -135,6 +135,7 @@ def extract_weight_from_image(pil_img):
|
|
| 135 |
text = text.replace("b", "8").replace("B", "8")
|
| 136 |
text = text.replace("z", "2").replace("Z", "2")
|
| 137 |
text = text.replace("q", "9").replace("Q", "9")
|
|
|
|
| 138 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
| 139 |
text = re.sub(r"[^\d\.]", "", text)
|
| 140 |
|
|
@@ -144,7 +145,9 @@ def extract_weight_from_image(pil_img):
|
|
| 144 |
weight = float(text)
|
| 145 |
# Score based on realistic weight range (0.1–500 kg)
|
| 146 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
| 147 |
-
|
|
|
|
|
|
|
| 148 |
if score > best_score and conf > conf_threshold:
|
| 149 |
best_weight = text
|
| 150 |
best_conf = conf
|
|
|
|
| 10 |
# Initialize EasyOCR
|
| 11 |
easyocr_reader = easyocr.Reader(['en'], gpu=False)
|
| 12 |
|
| 13 |
+
def estimate_brightness(img):
|
| 14 |
+
"""Estimate image brightness to detect illuminated displays"""
|
| 15 |
+
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 16 |
+
return np.mean(gray)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def detect_roi(img):
|
| 19 |
"""Detect and crop the region of interest (likely the digital display)"""
|
| 20 |
try:
|
| 21 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 22 |
+
# Threshold to isolate bright areas (like illuminated displays)
|
| 23 |
+
brightness = estimate_brightness(img)
|
| 24 |
+
thresh_value = 200 if brightness > 100 else 150 # Adjust based on brightness
|
| 25 |
+
_, thresh = cv2.threshold(gray, thresh_value, 255, cv2.THRESH_BINARY)
|
| 26 |
+
# Dilate to connect digits
|
| 27 |
+
kernel = np.ones((7, 7), np.uint8)
|
| 28 |
+
dilated = cv2.dilate(thresh, kernel, iterations=2)
|
| 29 |
# Find contours
|
| 30 |
contours, _ = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 31 |
if contours:
|
| 32 |
# Get the largest contour with reasonable size
|
| 33 |
+
valid_contours = [c for c in contours if cv2.contourArea(c) > 500]
|
| 34 |
if valid_contours:
|
| 35 |
largest_contour = max(valid_contours, key=cv2.contourArea)
|
| 36 |
x, y, w, h = cv2.boundingRect(largest_contour)
|
| 37 |
+
# Add more padding and ensure bounds
|
| 38 |
+
x, y = max(0, x-30), max(0, y-30)
|
| 39 |
+
w, h = min(w+60, img.shape[1]-x), min(h+60, img.shape[0]-y)
|
| 40 |
+
if w > 50 and h > 30:
|
| 41 |
return img[y:y+h, x:x+w]
|
| 42 |
+
return img
|
| 43 |
except Exception as e:
|
| 44 |
logging.error(f"ROI detection failed: {str(e)}")
|
| 45 |
return img
|
|
|
|
| 49 |
try:
|
| 50 |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
|
| 51 |
|
| 52 |
+
if mode == "seven_segment":
|
| 53 |
+
# Gentle preprocessing for seven-segment displays
|
| 54 |
+
denoised = cv2.GaussianBlur(gray, (5, 5), 0)
|
| 55 |
+
_, thresh = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
|
| 56 |
+
elif mode == "high_contrast":
|
| 57 |
denoised = cv2.bilateralFilter(gray, d=11, sigmaColor=100, sigmaSpace=100)
|
| 58 |
clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8))
|
| 59 |
+
thresh = clahe.apply(denoised)
|
| 60 |
elif mode == "low_noise":
|
|
|
|
| 61 |
denoised = cv2.bilateralFilter(gray, d=7, sigmaColor=50, sigmaSpace=50)
|
| 62 |
clahe = cv2.createCLAHE(clipLimit=1.5, tileGridSize=(8, 8))
|
| 63 |
+
thresh = clahe.apply(denoised)
|
| 64 |
else:
|
|
|
|
| 65 |
denoised = cv2.bilateralFilter(gray, d=9, sigmaColor=75, sigmaSpace=75)
|
| 66 |
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
| 67 |
+
thresh = clahe.apply(denoised)
|
| 68 |
|
| 69 |
+
if mode != "seven_segment":
|
| 70 |
+
thresh = cv2.adaptiveThreshold(thresh, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
|
| 71 |
+
cv2.THRESH_BINARY, 11, 2)
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# Morphological operations
|
| 74 |
kernel = np.ones((3, 3), np.uint8)
|
| 75 |
morphed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=1)
|
| 76 |
|
| 77 |
+
# Reduced sharpening for seven-segment displays
|
| 78 |
+
brightness = estimate_brightness(img)
|
| 79 |
+
sharpen_strength = 3 if mode == "seven_segment" or brightness > 100 else 5
|
| 80 |
sharpen_kernel = np.array([[0, -1, 0], [-1, sharpen_strength, -1], [0, -1, 0]])
|
| 81 |
sharpened = cv2.filter2D(morphed, -1, sharpen_kernel)
|
| 82 |
|
|
|
|
| 98 |
img = np.array(pil_img)
|
| 99 |
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
|
| 100 |
|
| 101 |
+
# Estimate brightness for adaptive thresholding
|
| 102 |
+
brightness = estimate_brightness(img)
|
| 103 |
+
conf_threshold = 0.5 if brightness > 100 else 0.4 # Stricter for bright displays
|
| 104 |
|
| 105 |
# Detect ROI
|
| 106 |
roi_img = detect_roi(img)
|
| 107 |
|
| 108 |
# Process multiple image versions
|
| 109 |
images_to_process = [
|
| 110 |
+
("seven_segment", enhance_image(roi_img, mode="seven_segment"), {'contrast_ths': 0.3, 'allowlist': '0123456789.'}),
|
| 111 |
+
("standard", enhance_image(roi_img, mode="standard"), {'contrast_ths': 0.1}),
|
| 112 |
+
("high_contrast", enhance_image(roi_img, mode="high_contrast"), {'contrast_ths': 0.1}),
|
| 113 |
+
("low_noise", enhance_image(roi_img, mode="low_noise"), {'contrast_ths': 0.1}),
|
| 114 |
+
("original", roi_img, {'contrast_ths': 0.3, 'allowlist': '0123456789.'})
|
| 115 |
]
|
| 116 |
|
| 117 |
best_weight = None
|
|
|
|
| 135 |
text = text.replace("b", "8").replace("B", "8")
|
| 136 |
text = text.replace("z", "2").replace("Z", "2")
|
| 137 |
text = text.replace("q", "9").replace("Q", "9")
|
| 138 |
+
text = text.replace("6", "2").replace("9", "2") # Specific correction for seven-segment
|
| 139 |
text = text.replace("kgs", "").replace("kg", "").replace("k", "")
|
| 140 |
text = re.sub(r"[^\d\.]", "", text)
|
| 141 |
|
|
|
|
| 145 |
weight = float(text)
|
| 146 |
# Score based on realistic weight range (0.1–500 kg)
|
| 147 |
range_score = 1.0 if 0.1 <= weight <= 500 else 0.3
|
| 148 |
+
# Prefer two-digit weights for scales
|
| 149 |
+
digit_score = 1.1 if 10 <= weight < 100 else 1.0
|
| 150 |
+
score = conf * range_score * digit_score
|
| 151 |
if score > best_score and conf > conf_threshold:
|
| 152 |
best_weight = text
|
| 153 |
best_conf = conf
|