dannop commited on
Commit
cabbe93
1 Parent(s): 44e2df3

Configurando analise das imagens com s3

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. app.py +19 -5
  3. image_similarity.py +22 -20
  4. requirements.txt +5 -1
  5. services/aws_service.py +25 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__
2
+ flagged
3
+ .env
app.py CHANGED
@@ -1,11 +1,25 @@
 
1
  import gradio as gr
2
  from image_similarity import ImageSimilarity
 
 
3
 
4
- def greet(name):
5
- return "Hello " + name + "!!"
6
 
7
- def image_similarity(image1, image2):
8
- return ImageSimilarity(1).check([image1.filename, image2.filename])
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- iface = gr.Interface(fn=image_similarity, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Image(type="pil")], outputs="text")
11
  iface.launch()
 
1
+ import os
2
  import gradio as gr
3
  from image_similarity import ImageSimilarity
4
+ from services.aws_service import AwsService
5
+ from dotenv import load_dotenv
6
 
7
+ load_dotenv()
 
8
 
9
+ def check_image_similarity(photo_shoot_id):
10
+ folder = "PhotoShoots/" + str(photo_shoot_id) + "/Inputs"
11
+ files = AwsService.get_files_from_s3(os.environ.get('AWS_S3_BUCKET'), folder)
12
+
13
+ images = []
14
+ for file in files:
15
+ images.append(AwsService.get_image_from_s3(os.environ.get('AWS_S3_BUCKET'), file['Key']))
16
+
17
+ return ImageSimilarity(1).check(images)
18
+
19
+ iface = gr.Interface(
20
+ fn=check_image_similarity,
21
+ inputs=[gr.Textbox(lines=1, placeholder="Photo Shoot ID")],
22
+ outputs="text"
23
+ )
24
 
 
25
  iface.launch()
image_similarity.py CHANGED
@@ -1,23 +1,18 @@
1
  from sentence_transformers import SentenceTransformer, util
2
- from PIL import Image
3
- # import glob
4
 
5
  class ImageSimilarity(object):
6
  def __init__(self, minimum_commutative_image_diff):
7
  self.minimum_commutative_image_diff = minimum_commutative_image_diff
8
 
9
- def check(self, image_names):
 
 
10
  # Load the OpenAI CLIP Model
11
  print('Loading CLIP Model...')
12
  model = SentenceTransformer('clip-ViT-B-32')
13
 
14
- # Next we compute the embeddings
15
- # To encode an image, you can use the following code:
16
- # from PIL import Image
17
- # encoded_image = model.encode(Image.open(filepath))
18
- # image_names = list(glob.glob('./*.jpg'))
19
- print("Images:", len(image_names))
20
- encoded_image = model.encode([Image.open(filepath) for filepath in image_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
21
 
22
  # Now we run the clustering algorithm. This function compares images aganist
23
  # all other images and returns a list with the pairs that have the highest
@@ -28,17 +23,17 @@ class ImageSimilarity(object):
28
  # =================
29
  # DUPLICATES
30
  # =================
31
- print('Finding duplicate images...')
32
  # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
33
  # A duplicate image will have a score of 1.00
34
  # It may be 0.9999 due to lossy image compression (.jpg)
35
- duplicates = [image for image in processed_images if image[0] >= 0.999]
36
 
37
  # Output the top X duplicate images
38
- for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
39
- print("\nScore: {:.3f}%".format(score * 100))
40
- print(image_names[image_id1])
41
- print(image_names[image_id2])
42
 
43
  # =================
44
  # NEAR DUPLICATES
@@ -48,10 +43,17 @@ class ImageSimilarity(object):
48
  # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
49
  # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
50
  # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
51
- threshold = 0.99
52
  near_duplicates = [image for image in processed_images if image[0] < threshold]
53
 
54
  for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
55
- print("\nScore: {:.3f}%".format(score * 100))
56
- print(image_names[image_id1])
57
- print(image_names[image_id2])
 
 
 
 
 
 
 
 
1
  from sentence_transformers import SentenceTransformer, util
 
 
2
 
3
  class ImageSimilarity(object):
4
  def __init__(self, minimum_commutative_image_diff):
5
  self.minimum_commutative_image_diff = minimum_commutative_image_diff
6
 
7
+ def check(self, pil_images):
8
+ results = []
9
+
10
  # Load the OpenAI CLIP Model
11
  print('Loading CLIP Model...')
12
  model = SentenceTransformer('clip-ViT-B-32')
13
 
14
+ print("Images:", len(pil_images))
15
+ encoded_image = model.encode([image["pil"] for image in pil_images], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
 
 
 
 
 
16
 
17
  # Now we run the clustering algorithm. This function compares images aganist
18
  # all other images and returns a list with the pairs that have the highest
 
23
  # =================
24
  # DUPLICATES
25
  # =================
26
+ # print('Finding duplicate images...')
27
  # Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
28
  # A duplicate image will have a score of 1.00
29
  # It may be 0.9999 due to lossy image compression (.jpg)
30
+ # duplicates = [image for image in processed_images if image[0] >= 0.999]
31
 
32
  # Output the top X duplicate images
33
+ # for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
34
+ # print("\nScore: {:.3f}%".format(score * 100))
35
+ # print(pil_images[image_id1])
36
+ # print(pil_images[image_id2])
37
 
38
  # =================
39
  # NEAR DUPLICATES
 
43
  # you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
44
  # A threshold of 1.00 means the two images are exactly the same. Since we are finding near
45
  # duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
46
+ threshold = 0.90
47
  near_duplicates = [image for image in processed_images if image[0] < threshold]
48
 
49
  for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
50
+ results.append({
51
+ 'score': score,
52
+ 'image1': pil_images[image_id1]["key"],
53
+ 'image2': pil_images[image_id2]["key"]
54
+ })
55
+ # print("\nScore: {:.3f}%".format(score * 100))
56
+ # print(pil_images[image_id1]["key"])
57
+ # print(pil_images[image_id2]["key"])
58
+
59
+ return results
requirements.txt CHANGED
@@ -1 +1,5 @@
1
- sentence_transformers==2.2.2
 
 
 
 
 
1
+ sentence_transformers==2.2.2
2
+ gradio==3.44.4
3
+ markupsafe==2.0.1
4
+ boto3==1.26.25
5
+ botocore==1.29.25
services/aws_service.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import boto3
3
+ from PIL import Image
4
+ from io import BytesIO
5
+
6
+ class AwsService:
7
+ def session():
8
+ return boto3.Session(
9
+ aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID'),
10
+ aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY'),
11
+ region_name=os.environ.get('AWS_REGION')
12
+ )
13
+
14
+ def s3_client():
15
+ return AwsService.session().client('s3')
16
+
17
+ def get_files_from_s3(bucket, prefix):
18
+ return AwsService.s3_client().list_objects(Bucket=bucket, Prefix=prefix)['Contents']
19
+
20
+ def get_image_from_s3(bucket, key):
21
+ file_byte_string = AwsService.s3_client().get_object(Bucket=bucket, Key=key)['Body'].read()
22
+ return {
23
+ 'key': key,
24
+ 'pil': Image.open(BytesIO(file_byte_string))
25
+ }