import cv2 import numpy as np from PIL import Image import torch import torch.nn.functional as F from transformers import CLIPProcessor, CLIPModel from ultralytics import YOLO from skimage.feature import hog from skimage.metrics import structural_similarity as ssim class SignatureSimilarity: def __init__(self, yolo_model_path, clip_threshold=0.8): self.yolo_model = YOLO(yolo_model_path) clip = "openai/clip-vit-base-patch32" self.clip_model = CLIPModel.from_pretrained(clip) self.clip_processor = CLIPProcessor.from_pretrained(clip) self.clip_threshold = clip_threshold self.target_size = (224, 224) # Default size for CLIP def ensure_rgb(self, image): """Convert image to RGB if it's not already""" if isinstance(image, np.ndarray): if len(image.shape) == 2: # Grayscale return cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) elif image.shape[2] == 3: # RGB if image.dtype == np.uint8: return image return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) elif image.shape[2] == 4: # RGBA return cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) elif isinstance(image, Image.Image): return image.convert('RGB') return image def normalize_size(self, image1, image2): """Resize images to same dimensions while preserving aspect ratio""" # Ensure we're working with PIL images if isinstance(image1, np.ndarray): image1 = Image.fromarray(image1) if isinstance(image2, np.ndarray): image2 = Image.fromarray(image2) w1, h1 = image1.size w2, h2 = image2.size # Find the target size that preserves aspect ratio aspect_ratio1 = w1 / h1 aspect_ratio2 = w2 / h2 if aspect_ratio1 > aspect_ratio2: new_width = min(w1, w2) new_height1 = int(new_width / aspect_ratio1) new_height2 = int(new_width / aspect_ratio2) new_width1 = new_width new_width2 = new_width else: new_height = min(h1, h2) new_width1 = int(new_height * aspect_ratio1) new_width2 = int(new_height * aspect_ratio2) new_height1 = new_height new_height2 = new_height # Resize images image1 = image1.resize((new_width1, new_height1), Image.Resampling.LANCZOS) image2 = image2.resize((new_width2, new_height2), Image.Resampling.LANCZOS) return image1, image2 def preprocess_image(self, image): """Enhance image quality for better matching""" # Ensure we're working with a PIL image if isinstance(image, np.ndarray): image = Image.fromarray(image) # Convert to numpy array for OpenCV operations img_array = np.array(image) # Convert to grayscale if len(img_array.shape) == 3: gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) else: gray = img_array # Denoise denoised = cv2.fastNlMeansDenoising(gray) # Adaptive thresholding binary = cv2.adaptiveThreshold( denoised, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2 ) # Remove small noise kernel = np.ones((2,2), np.uint8) cleaned = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel) # Convert back to RGB for CLIP (maintaining original orientation) cleaned_rgb = cv2.cvtColor(cleaned, cv2.COLOR_GRAY2RGB) return Image.fromarray(cleaned_rgb) def calculate_clip_similarity(self, image1, image2): """Calculate CLIP similarity for normalized images""" # Normalize sizes first norm_img1, norm_img2 = self.normalize_size(image1, image2) # Resize both images to CLIP's expected size img1_resized = norm_img1.resize(self.target_size, Image.Resampling.LANCZOS) img2_resized = norm_img2.resize(self.target_size, Image.Resampling.LANCZOS) inputs1 = self.clip_processor(images=img1_resized, return_tensors="pt", padding=True) inputs2 = self.clip_processor(images=img2_resized, return_tensors="pt", padding=True) with torch.no_grad(): emb1 = self.clip_model.get_image_features(**inputs1) emb2 = self.clip_model.get_image_features(**inputs2) emb1 = F.normalize(emb1, p=2, dim=1) emb2 = F.normalize(emb2, p=2, dim=1) similarity = F.cosine_similarity(emb1, emb2).item() return max(0, (similarity - self.clip_threshold) / (1 - self.clip_threshold)) def calculate_shape_similarity(self, img1_array, img2_array): """Calculate shape-based similarities""" try: # Ensure we're working with numpy arrays if isinstance(img1_array, Image.Image): img1_array = np.array(img1_array) if isinstance(img2_array, Image.Image): img2_array = np.array(img2_array) # Convert to grayscale if needed if len(img1_array.shape) == 3: img1_gray = cv2.cvtColor(img1_array, cv2.COLOR_RGB2GRAY) img2_gray = cv2.cvtColor(img2_array, cv2.COLOR_RGB2GRAY) else: img1_gray = img1_array img2_gray = img2_array # Ensure same size for comparison h1, w1 = img1_gray.shape h2, w2 = img2_gray.shape target_size = (min(w1, w2), min(h1, h2)) img1_resized = cv2.resize(img1_gray, target_size) img2_resized = cv2.resize(img2_gray, target_size) # Calculate HOG features hog_1 = hog(img1_resized, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=False) hog_2 = hog(img2_resized, orientations=8, pixels_per_cell=(16, 16), cells_per_block=(1, 1), visualize=False) # Calculate HOG similarity hog_similarity = np.dot(hog_1, hog_2) / (np.linalg.norm(hog_1) * np.linalg.norm(hog_2)) # Calculate SSIM ssim_score = ssim(img1_resized, img2_resized) return (hog_similarity + ssim_score) / 2 except Exception as e: print(f"Error in shape similarity calculation: {str(e)}") return 0.0 def calculate_similarity_metrics(self, cropped1, cropped2): """Calculate combined similarity metrics""" try: # Ensure RGB images cropped1 = self.ensure_rgb(cropped1) cropped2 = self.ensure_rgb(cropped2) # Normalize sizes norm_img1, norm_img2 = self.normalize_size(cropped1, cropped2) # Preprocess images proc_img1 = self.preprocess_image(norm_img1) proc_img2 = self.preprocess_image(norm_img2) # Calculate CLIP similarity clip_sim = self.calculate_clip_similarity(proc_img1, proc_img2) # Calculate shape similarity shape_sim = self.calculate_shape_similarity( np.array(proc_img1), np.array(proc_img2) ) # Combined metrics with weights metrics = { 'clip': clip_sim, 'shape': shape_sim, 'combined': 0.7 * clip_sim + 0.3 * shape_sim } return metrics except Exception as e: print(f"Error in similarity calculation: {str(e)}") return {'clip': 0.0, 'shape': 0.0, 'combined': 0.0} def process_images(self, image1, image2, match_threshold=0.4): try: # Convert images to PIL format if needed if isinstance(image1, str): image1 = Image.open(image1) if isinstance(image2, str): image2 = Image.open(image2) if isinstance(image1, np.ndarray): image1 = Image.fromarray(image1) if isinstance(image2, np.ndarray): image2 = Image.fromarray(image2) # Ensure RGB image1 = self.ensure_rgb(image1) image2 = self.ensure_rgb(image2) # Get YOLO predictions with confidence results1 = self.yolo_model.predict(np.array(image1), conf=0.5) results2 = self.yolo_model.predict(np.array(image2), conf=0.5) # Crop detections with padding cropped1 = None cropped2 = None padding = 10 if len(results1[0].boxes.data) > 0: # Get box with highest confidence boxes = results1[0].boxes.data conf_scores = boxes[:, 4] best_box_idx = torch.argmax(conf_scores) box1 = boxes[best_box_idx] x_min, y_min, x_max, y_max = map(int, box1[:4].tolist()) # Add padding h, w = np.array(image1).shape[:2] x_min = max(0, x_min - padding) y_min = max(0, y_min - padding) x_max = min(w, x_max + padding) y_max = min(h, y_max + padding) cropped1 = image1.crop((x_min, y_min, x_max, y_max)) if len(results2[0].boxes.data) > 0: # Get box with highest confidence boxes = results2[0].boxes.data conf_scores = boxes[:, 4] best_box_idx = torch.argmax(conf_scores) box2 = boxes[best_box_idx] x_min, y_min, x_max, y_max = map(int, box2[:4].tolist()) # Add padding h, w = np.array(image2).shape[:2] x_min = max(0, x_min - padding) y_min = max(0, y_min - padding) x_max = min(w, x_max + padding) y_max = min(h, y_max + padding) cropped2 = image2.crop((x_min, y_min, x_max, y_max)) if cropped1 is None: cropped1 = image1 if cropped2 is None: cropped2 = image2 # Calculate similarity metrics metrics = self.calculate_similarity_metrics(cropped1, cropped2) # Format results with more detailed information similarity_info = f"""{'Signatures Match!' if metrics['combined'] > match_threshold else 'Signatures Do Not Match!'} - Combined Similarity: {metrics['combined']:.3f} - CLIP Similarity: {metrics['clip']:.3f} - Shape Similarity: {metrics['shape']:.3f} """ return cropped1, cropped2, similarity_info except Exception as e: print(f"Error in process_images: {str(e)}") return None, None, f"Error processing images: {str(e)}"