Spaces:
Sleeping
Sleeping
| import cv2 | |
| import numpy as np | |
| from PIL import Image, ImageEnhance, ImageFilter | |
| class DocumentScanner: | |
| def __init__(self): | |
| pass | |
| def order_points(self, pts): | |
| rect = np.zeros((4, 2), dtype="float32") | |
| s = pts.sum(axis=1) | |
| rect[0] = pts[np.argmin(s)] | |
| rect[2] = pts[np.argmax(s)] | |
| diff = np.diff(pts, axis=1) | |
| rect[1] = pts[np.argmin(diff)] | |
| rect[3] = pts[np.argmax(diff)] | |
| return rect | |
| def four_point_transform(self, image, pts): | |
| rect = self.order_points(pts) | |
| (tl, tr, br, bl) = rect | |
| widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | |
| widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | |
| maxWidth = max(int(widthA), int(widthB)) | |
| heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | |
| heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | |
| maxHeight = max(int(heightA), int(heightB)) | |
| dst = np.array([ | |
| [0, 0], | |
| [maxWidth - 1, 0], | |
| [maxWidth - 1, maxHeight - 1], | |
| [0, maxHeight - 1]], dtype="float32") | |
| M = cv2.getPerspectiveTransform(rect, dst) | |
| warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | |
| return warped | |
| def detect_document(self, image): | |
| orig = image.copy() | |
| height, width = image.shape[:2] | |
| ratio = height / 500.0 | |
| new_width = int(width / ratio) | |
| resized = cv2.resize(image, (new_width, 500)) | |
| gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| edged = cv2.Canny(blurred, 50, 200) | |
| kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)) | |
| edged = cv2.dilate(edged, kernel, iterations=1) | |
| contours, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) | |
| contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10] | |
| screen_cnt = None | |
| for c in contours: | |
| peri = cv2.arcLength(c, True) | |
| approx = cv2.approxPolyDP(c, 0.02 * peri, True) | |
| if len(approx) == 4: | |
| screen_cnt = approx | |
| break | |
| if screen_cnt is None: | |
| edge_margin = 0.02 | |
| h, w = resized.shape[:2] | |
| margin_x = int(w * edge_margin) | |
| margin_y = int(h * edge_margin) | |
| screen_cnt = np.array([ | |
| [[margin_x, margin_y]], | |
| [[w - margin_x, margin_y]], | |
| [[w - margin_x, h - margin_y]], | |
| [[margin_x, h - margin_y]] | |
| ]) | |
| return screen_cnt.reshape(4, 2) * ratio | |
| def auto_crop_and_align(self, image): | |
| if isinstance(image, Image.Image): | |
| image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| doc_contour = self.detect_document(image) | |
| warped = self.four_point_transform(image, doc_contour) | |
| return warped | |
| def enhance_sharpness(self, image, amount=1.5): | |
| if isinstance(image, np.ndarray): | |
| pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
| else: | |
| pil_image = image | |
| blurred = pil_image.filter(ImageFilter.GaussianBlur(radius=1)) | |
| blurred_np = np.array(blurred).astype(np.float32) | |
| original_np = np.array(pil_image).astype(np.float32) | |
| sharpened = original_np + (original_np - blurred_np) * amount | |
| sharpened = np.clip(sharpened, 0, 255).astype(np.uint8) | |
| return Image.fromarray(sharpened) | |
| def adaptive_contrast(self, image): | |
| if isinstance(image, Image.Image): | |
| image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) | |
| l, a, b = cv2.split(lab) | |
| clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) | |
| l = clahe.apply(l) | |
| lab = cv2.merge([l, a, b]) | |
| result = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) | |
| return result | |
| def denoise_preserve_details(self, image, strength=3): | |
| if isinstance(image, Image.Image): | |
| image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
| denoised = cv2.bilateralFilter(image, 9, strength * 10, strength * 10) | |
| return denoised | |
| def process_document(self, pil_image, enhance_hd=True, scale=2): | |
| img_array = np.array(pil_image) | |
| if len(img_array.shape) == 2: | |
| img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2BGR) | |
| else: | |
| img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR) | |
| cropped = self.auto_crop_and_align(img_array) | |
| denoised = self.denoise_preserve_details(cropped, strength=2) | |
| contrasted = self.adaptive_contrast(denoised) | |
| result_rgb = cv2.cvtColor(contrasted, cv2.COLOR_BGR2RGB) | |
| result_pil = Image.fromarray(result_rgb) | |
| sharpened = self.enhance_sharpness(result_pil, amount=0.8) | |
| enhancer = ImageEnhance.Brightness(sharpened) | |
| brightened = enhancer.enhance(1.05) | |
| if enhance_hd: | |
| try: | |
| from enhancer import ImageEnhancer | |
| ai_enhancer = ImageEnhancer() | |
| hd_image = ai_enhancer.enhance(brightened, scale=scale) | |
| return hd_image | |
| except Exception as e: | |
| print(f"[DocScan] Using fallback upscaling (AI models load on Hugging Face deployment)") | |
| new_size = (brightened.width * scale, brightened.height * scale) | |
| hd_image = brightened.resize(new_size, Image.LANCZOS) | |
| return self.enhance_sharpness(hd_image, amount=0.5) | |
| return brightened | |
| class FallbackDocumentScanner: | |
| def process_document(self, pil_image, enhance_hd=True, scale=2): | |
| if pil_image.mode != "RGB": | |
| pil_image = pil_image.convert("RGB") | |
| enhancer = ImageEnhance.Contrast(pil_image) | |
| contrasted = enhancer.enhance(1.15) | |
| enhancer = ImageEnhance.Sharpness(contrasted) | |
| sharpened = enhancer.enhance(1.3) | |
| enhancer = ImageEnhance.Brightness(sharpened) | |
| brightened = enhancer.enhance(1.05) | |
| if enhance_hd: | |
| new_size = (brightened.width * scale, brightened.height * scale) | |
| hd_image = brightened.resize(new_size, Image.LANCZOS) | |
| enhancer = ImageEnhance.Sharpness(hd_image) | |
| final = enhancer.enhance(1.2) | |
| return final | |
| return brightened | |
| def get_document_scanner(): | |
| try: | |
| import cv2 | |
| return DocumentScanner() | |
| except ImportError: | |
| print("OpenCV not available, using fallback scanner") | |
| return FallbackDocumentScanner() | |