Spaces:
Runtime error
Runtime error
matheus-erthal
commited on
Commit
•
daa1246
1
Parent(s):
064bf02
Commit inicial
Browse files- app.py +11 -0
- image_similarity.py +57 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from image_similarity import ImageSimilarity
|
3 |
+
|
4 |
+
def greet(name):
|
5 |
+
return "Hello " + name + "!!"
|
6 |
+
|
7 |
+
def image_similarity(images):
|
8 |
+
image_similarity = ImageSimilarity(1).check(images)
|
9 |
+
|
10 |
+
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
|
11 |
+
iface.launch()
|
image_similarity.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer, util
|
2 |
+
from PIL import Image
|
3 |
+
# import glob
|
4 |
+
|
5 |
+
class ImageSimilarity(object):
|
6 |
+
def __init__(self, minimum_commutative_image_diff):
|
7 |
+
self.minimum_commutative_image_diff = minimum_commutative_image_diff
|
8 |
+
|
9 |
+
def check(self, image_names):
|
10 |
+
# Load the OpenAI CLIP Model
|
11 |
+
print('Loading CLIP Model...')
|
12 |
+
model = SentenceTransformer('clip-ViT-B-32')
|
13 |
+
|
14 |
+
# Next we compute the embeddings
|
15 |
+
# To encode an image, you can use the following code:
|
16 |
+
# from PIL import Image
|
17 |
+
# encoded_image = model.encode(Image.open(filepath))
|
18 |
+
# image_names = list(glob.glob('./*.jpg'))
|
19 |
+
print("Images:", len(image_names))
|
20 |
+
encoded_image = model.encode([Image.open(filepath) for filepath in image_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
|
21 |
+
|
22 |
+
# Now we run the clustering algorithm. This function compares images aganist
|
23 |
+
# all other images and returns a list with the pairs that have the highest
|
24 |
+
# cosine similarity score
|
25 |
+
processed_images = util.paraphrase_mining_embeddings(encoded_image)
|
26 |
+
NUM_SIMILAR_IMAGES = 10
|
27 |
+
|
28 |
+
# =================
|
29 |
+
# DUPLICATES
|
30 |
+
# =================
|
31 |
+
print('Finding duplicate images...')
|
32 |
+
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
33 |
+
# A duplicate image will have a score of 1.00
|
34 |
+
# It may be 0.9999 due to lossy image compression (.jpg)
|
35 |
+
duplicates = [image for image in processed_images if image[0] >= 0.999]
|
36 |
+
|
37 |
+
# Output the top X duplicate images
|
38 |
+
for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
39 |
+
print("\nScore: {:.3f}%".format(score * 100))
|
40 |
+
print(image_names[image_id1])
|
41 |
+
print(image_names[image_id2])
|
42 |
+
|
43 |
+
# =================
|
44 |
+
# NEAR DUPLICATES
|
45 |
+
# =================
|
46 |
+
print('Finding near duplicate images...')
|
47 |
+
# Use a threshold parameter to identify two images as similar. By setting the threshold lower,
|
48 |
+
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
49 |
+
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
50 |
+
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
51 |
+
threshold = 0.99
|
52 |
+
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
53 |
+
|
54 |
+
for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|
55 |
+
print("\nScore: {:.3f}%".format(score * 100))
|
56 |
+
print(image_names[image_id1])
|
57 |
+
print(image_names[image_id2])
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
sentence_transformers==2.2.2
|