import pytesseract import numpy as np import cv2 import re import logging from PIL import Image logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def preprocess_image(img): gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) resized = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR) blurred = cv2.GaussianBlur(resized, (3, 3), 0) thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) return thresh def extract_weight_from_image(pil_img): try: img = np.array(pil_img) img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) processed = preprocess_image(img) config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.kg' raw_text = pytesseract.image_to_string(processed, config=config) logging.info(f"OCR Raw Output: {raw_text}") cleaned = raw_text.replace(" ", "").replace("\n", "") match = re.search(r"(\d+\.?\d*)", cleaned) if match: value = float(match.group(1)) if 0 < value <= 5000: return str(value), 90.0 return "Not detected", 0.0 except Exception as e: logging.error(f"OCR error: {e}") return "Not detected", 0.0