import cv2 import re import pytesseract import numpy as np import gradio as gr import pandas as pd from matplotlib import pyplot as plt # get grayscale image def get_grayscale(image): return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # noise removal def remove_noise(image): return cv2.medianBlur(image,5) #thresholding def thresholding(image, thresh_hold=0, which='ostu'): if which == 'ostu': return cv2.threshold(image, thresh_hold, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1] elif which == 'simple': _, img = cv2.threshold(image,thresh_hold,255,cv2.THRESH_BINARY) return img elif which == 'adaptive': return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) #dilation def dilate(image): kernel = np.ones((5,5),np.uint8) return cv2.dilate(image, kernel, iterations = 1) #erosion def erode(image): kernel = np.ones((5,5),np.uint8) return cv2.erode(image, kernel, iterations = 1) #opening - erosion followed by dilation def opening(image): kernel = np.ones((5,5),np.uint8) return cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) #canny edge detection def canny(image): return cv2.Canny(image, 100, 200) #skew correction def deskew(image): coords = np.column_stack(np.where(image > 0)) angle = cv2.minAreaRect(coords)[-1] if angle < -45: angle = -(90 + angle) else: angle = -angle (h, w) = image.shape[:2] center = (w // 2, h // 2) M = cv2.getRotationMatrix2D(center, angle, 1.0) rotated = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE) return rotated #template matching def match_template(image, template): return cv2.matchTemplate(image, template, cv2.TM_CCOEFF_NORMED) def show_cvimg(img, figsize=(15, 15)): fig, ax = plt.subplots(dpi=80, figsize=figsize) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) ax.imshow(img) def extract_purchased_items(txt): pat = '.*\s*(FB|FA|NB)' p = re.compile("(.*) (\d+[,\.\/\:\']*\d+) (FB|FA|NB)") txts = txt.split('\n') items = [] not_parsed = [] for t in txts: if re.match(pat, t): result = p.search(t) if result is not None: items.append({'item': result.group(1), 'price': re.sub('[,\.\/\:\']', '.', result.group(2)), 'type': result.group(3) }) else: not_parsed.append({'not parsed': t}) return pd.DataFrame(items), pd.DataFrame(not_parsed) def parse_receipt(img, **kwargs): # preprocessing gray = get_grayscale(img) thresh = thresholding(gray, **kwargs) # ocr custom_config = r'--oem 3 --psm 6' txt = pytesseract.image_to_string(thresh, config=custom_config) return extract_purchased_items(txt) iface = gr.Interface(fn=parse_receipt, inputs="image", outputs=["dataframe", "dataframe"]) iface.launch()