Spaces:

generativeai
/

test-image-similarity

Runtime error

App Files Files Community

matheus-erthal commited on Sep 20, 2023

Commit

daa1246

•

1 Parent(s): 064bf02

Commit inicial

Browse files

Files changed (3) hide show

app.py +11 -0
image_similarity.py +57 -0
requirements.txt +1 -0

app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import gradio as gr
+from image_similarity import ImageSimilarity
+def greet(name):
+    return "Hello " + name + "!!"
+def image_similarity(images):
+    image_similarity = ImageSimilarity(1).check(images)
+iface = gr.Interface(fn=greet, inputs="text", outputs="text")
+iface.launch()

image_similarity.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from sentence_transformers import SentenceTransformer, util
+from PIL import Image
+# import glob
+class ImageSimilarity(object):
+    def __init__(self, minimum_commutative_image_diff):
+        self.minimum_commutative_image_diff = minimum_commutative_image_diff
+    def check(self, image_names):
+        # Load the OpenAI CLIP Model
+        print('Loading CLIP Model...')
+        model = SentenceTransformer('clip-ViT-B-32')
+        # Next we compute the embeddings
+        # To encode an image, you can use the following code:
+        # from PIL import Image
+        # encoded_image = model.encode(Image.open(filepath))
+        # image_names = list(glob.glob('./*.jpg'))
+        print("Images:", len(image_names))
+        encoded_image = model.encode([Image.open(filepath) for filepath in image_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
+        # Now we run the clustering algorithm. This function compares images aganist
+        # all other images and returns a list with the pairs that have the highest
+        # cosine similarity score
+        processed_images = util.paraphrase_mining_embeddings(encoded_image)
+        NUM_SIMILAR_IMAGES = 10
+        # =================
+        # DUPLICATES
+        # =================
+        print('Finding duplicate images...')
+        # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
+        # A duplicate image will have a score of 1.00
+        # It may be 0.9999 due to lossy image compression (.jpg)
+        duplicates = [image for image in processed_images if image[0] >= 0.999]
+        # Output the top X duplicate images
+        for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
+            print("\nScore: {:.3f}%".format(score * 100))
+            print(image_names[image_id1])
+            print(image_names[image_id2])
+        # =================
+        # NEAR DUPLICATES
+        # =================
+        print('Finding near duplicate images...')
+        # Use a threshold parameter to identify two images as similar. By setting the threshold lower,
+        # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
+        # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
+        # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
+        threshold = 0.99
+        near_duplicates = [image for image in processed_images if image[0] < threshold]
+        for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
+            print("\nScore: {:.3f}%".format(score * 100))
+            print(image_names[image_id1])
+            print(image_names[image_id2])

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ sentence_transformers==2.2.2