Spaces:

generativeai
/

test-image-similarity

Runtime error

App Files Files Community

dannop commited on Sep 22, 2023

Commit

cabbe93

•

1 Parent(s): 44e2df3

Configurando analise das imagens com s3

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +19 -5
image_similarity.py +22 -20
requirements.txt +5 -1
services/aws_service.py +25 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__
+flagged
+.env

app.py CHANGED Viewed

@@ -1,11 +1,25 @@
 import gradio as gr
 from image_similarity import ImageSimilarity
-def greet(name):
-    return "Hello " + name + "!!"
-def image_similarity(image1, image2):
-    return ImageSimilarity(1).check([image1.filename, image2.filename])
-iface = gr.Interface(fn=image_similarity, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Image(type="pil")], outputs="text")
 iface.launch()

+import os
 import gradio as gr
 from image_similarity import ImageSimilarity
+from services.aws_service import AwsService
+from dotenv import load_dotenv
+load_dotenv()
+def check_image_similarity(photo_shoot_id):
+  folder = "PhotoShoots/" + str(photo_shoot_id) + "/Inputs"
+  files = AwsService.get_files_from_s3(os.environ.get('AWS_S3_BUCKET'), folder)
+  images = []
+  for file in files:
+    images.append(AwsService.get_image_from_s3(os.environ.get('AWS_S3_BUCKET'), file['Key']))
+  return ImageSimilarity(1).check(images)
+iface = gr.Interface(
+  fn=check_image_similarity,
+  inputs=[gr.Textbox(lines=1, placeholder="Photo Shoot ID")],
+  outputs="text"
+)
 iface.launch()

image_similarity.py CHANGED Viewed

@@ -1,23 +1,18 @@
 from sentence_transformers import SentenceTransformer, util
-from PIL import Image
-# import glob
 class ImageSimilarity(object):
     def __init__(self, minimum_commutative_image_diff):
         self.minimum_commutative_image_diff = minimum_commutative_image_diff
-    def check(self, image_names):
         # Load the OpenAI CLIP Model
         print('Loading CLIP Model...')
         model = SentenceTransformer('clip-ViT-B-32')
-        # Next we compute the embeddings
-        # To encode an image, you can use the following code:
-        # from PIL import Image
-        # encoded_image = model.encode(Image.open(filepath))
-        # image_names = list(glob.glob('./*.jpg'))
-        print("Images:", len(image_names))
-        encoded_image = model.encode([Image.open(filepath) for filepath in image_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
         # Now we run the clustering algorithm. This function compares images aganist
         # all other images and returns a list with the pairs that have the highest
@@ -28,17 +23,17 @@ class ImageSimilarity(object):
         # =================
         # DUPLICATES
         # =================
-        print('Finding duplicate images...')
         # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
         # A duplicate image will have a score of 1.00
         # It may be 0.9999 due to lossy image compression (.jpg)
-        duplicates = [image for image in processed_images if image[0] >= 0.999]
         # Output the top X duplicate images
-        for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
-            print("\nScore: {:.3f}%".format(score * 100))
-            print(image_names[image_id1])
-            print(image_names[image_id2])
         # =================
         # NEAR DUPLICATES
@@ -48,10 +43,17 @@ class ImageSimilarity(object):
         # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
         # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
         # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
-        threshold = 0.99
         near_duplicates = [image for image in processed_images if image[0] < threshold]
         for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
-            print("\nScore: {:.3f}%".format(score * 100))
-            print(image_names[image_id1])
-            print(image_names[image_id2])

 from sentence_transformers import SentenceTransformer, util
 class ImageSimilarity(object):
     def __init__(self, minimum_commutative_image_diff):
         self.minimum_commutative_image_diff = minimum_commutative_image_diff
+    def check(self, pil_images):
+        results = []
         # Load the OpenAI CLIP Model
         print('Loading CLIP Model...')
         model = SentenceTransformer('clip-ViT-B-32')
+        print("Images:", len(pil_images))
+        encoded_image = model.encode([image["pil"] for image in pil_images], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
         # Now we run the clustering algorithm. This function compares images aganist
         # all other images and returns a list with the pairs that have the highest
         # =================
         # DUPLICATES
         # =================
+        # print('Finding duplicate images...')
         # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
         # A duplicate image will have a score of 1.00
         # It may be 0.9999 due to lossy image compression (.jpg)
+        # duplicates = [image for image in processed_images if image[0] >= 0.999]
         # Output the top X duplicate images
+        # for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
+        #     print("\nScore: {:.3f}%".format(score * 100))
+        #     print(pil_images[image_id1])
+        #     print(pil_images[image_id2])
         # =================
         # NEAR DUPLICATES
         # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
         # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
         # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
+        threshold = 0.90
         near_duplicates = [image for image in processed_images if image[0] < threshold]
         for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
+            results.append({
+                'score': score,
+                'image1': pil_images[image_id1]["key"],
+                'image2': pil_images[image_id2]["key"]
+            })
+            # print("\nScore: {:.3f}%".format(score * 100))
+            # print(pil_images[image_id1]["key"])
+            # print(pil_images[image_id2]["key"])
+        return results

requirements.txt CHANGED Viewed

	@@ -1 +1,5 @@
1	- sentence_transformers==2.2.2

+sentence_transformers==2.2.2
+gradio==3.44.4
+markupsafe==2.0.1
+boto3==1.26.25
+botocore==1.29.25

services/aws_service.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+import boto3
+from PIL import Image
+from io import BytesIO
+class AwsService:
+  def session():
+    return boto3.Session(
+      aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID'),
+      aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY'),
+      region_name=os.environ.get('AWS_REGION')
+    )
+  def s3_client():
+    return AwsService.session().client('s3')
+  def get_files_from_s3(bucket, prefix):
+    return AwsService.s3_client().list_objects(Bucket=bucket, Prefix=prefix)['Contents']
+  def get_image_from_s3(bucket, key):
+    file_byte_string = AwsService.s3_client().get_object(Bucket=bucket, Key=key)['Body'].read()
+    return {
+      'key': key,
+      'pil': Image.open(BytesIO(file_byte_string))
+    }