File size: 2,838 Bytes
6064a78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import cv2
import numpy as np
from typing import Union
from PIL import Image
from io import BytesIO
import imghdr
from fastapi import HTTPException
from app.utils.file_utils import get_file_content

def preprocess_image(image: Union[str, np.ndarray, Image.Image], hash_size: int = 32) -> np.ndarray:
    if isinstance(image, str):
        content = get_file_content(image)
        img = Image.open(BytesIO(content))
        img = strip_metadata(img)
        image = np.array(img)
    elif isinstance(image, Image.Image):
        image = strip_metadata(image)
        image = np.array(image)
    
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    
    image = cv2.resize(image, (hash_size, hash_size), interpolation=cv2.INTER_AREA)
    image = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX)
    
    return image

def strip_metadata(img: Image.Image) -> Image.Image:
    data = list(img.getdata())
    img_without_exif = Image.new(img.mode, img.size)
    img_without_exif.putdata(data)
    return img_without_exif

def perceptual_image_hash(image: Union[str, np.ndarray, Image.Image], hash_size: int = 32) -> str:
    processed_image = preprocess_image(image, hash_size)
    dct = cv2.dct(np.float32(processed_image))
    dct_low = dct[:8, :8]
    median = np.median(dct_low[1:])
    
    hash_value = ''
    for i in range(8):
        for j in range(8):
            hash_value += '1' if dct_low[i, j] > median else '0'
    
    return hash_value

def hamming_distance(hash1: str, hash2: str) -> int:
    return sum(c1 != c2 for c1, c2 in zip(hash1, hash2))

def are_images_similar(hash1: str, hash2: str, threshold: int = 5) -> bool:
    distance = hamming_distance(hash1, hash2)
    return distance <= threshold

def process_image(filename: str):
    try:
        content = get_file_content(filename)
        img = Image.open(BytesIO(content))
        image_hash = perceptual_image_hash(img)
        
        return image_hash
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")

def compare_images(filename1: str, filename2: str):
    try:
        content1 = get_file_content(filename1)
        content2 = get_file_content(filename2)
        img1 = Image.open(BytesIO(content1))
        img2 = Image.open(BytesIO(content2))
        hash1 = perceptual_image_hash(img1)
        hash2 = perceptual_image_hash(img2)
        
        are_similar = are_images_similar(hash1, hash2)
        distance = hamming_distance(hash1, hash2)
        
        return {
            "image1_hash": hash1,
            "image2_hash": hash2,
            "are_similar": are_similar,
            "hamming_distance": distance
        }
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error comparing images: {str(e)}")