Spaces:
Runtime error
Runtime error
Configurando analise das imagens com s3
Browse files- .gitignore +3 -0
- app.py +19 -5
- image_similarity.py +22 -20
- requirements.txt +5 -1
- services/aws_service.py +25 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__
|
2 |
+
flagged
|
3 |
+
.env
|
app.py
CHANGED
@@ -1,11 +1,25 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
from image_similarity import ImageSimilarity
|
|
|
|
|
3 |
|
4 |
-
|
5 |
-
return "Hello " + name + "!!"
|
6 |
|
7 |
-
def
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
-
iface = gr.Interface(fn=image_similarity, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Image(type="pil")], outputs="text")
|
11 |
iface.launch()
|
|
|
1 |
+
import os
|
2 |
import gradio as gr
|
3 |
from image_similarity import ImageSimilarity
|
4 |
+
from services.aws_service import AwsService
|
5 |
+
from dotenv import load_dotenv
|
6 |
|
7 |
+
load_dotenv()
|
|
|
8 |
|
9 |
+
def check_image_similarity(photo_shoot_id):
|
10 |
+
folder = "PhotoShoots/" + str(photo_shoot_id) + "/Inputs"
|
11 |
+
files = AwsService.get_files_from_s3(os.environ.get('AWS_S3_BUCKET'), folder)
|
12 |
+
|
13 |
+
images = []
|
14 |
+
for file in files:
|
15 |
+
images.append(AwsService.get_image_from_s3(os.environ.get('AWS_S3_BUCKET'), file['Key']))
|
16 |
+
|
17 |
+
return ImageSimilarity(1).check(images)
|
18 |
+
|
19 |
+
iface = gr.Interface(
|
20 |
+
fn=check_image_similarity,
|
21 |
+
inputs=[gr.Textbox(lines=1, placeholder="Photo Shoot ID")],
|
22 |
+
outputs="text"
|
23 |
+
)
|
24 |
|
|
|
25 |
iface.launch()
|
image_similarity.py
CHANGED
@@ -1,23 +1,18 @@
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
2 |
-
from PIL import Image
|
3 |
-
# import glob
|
4 |
|
5 |
class ImageSimilarity(object):
|
6 |
def __init__(self, minimum_commutative_image_diff):
|
7 |
self.minimum_commutative_image_diff = minimum_commutative_image_diff
|
8 |
|
9 |
-
def check(self,
|
|
|
|
|
10 |
# Load the OpenAI CLIP Model
|
11 |
print('Loading CLIP Model...')
|
12 |
model = SentenceTransformer('clip-ViT-B-32')
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
# from PIL import Image
|
17 |
-
# encoded_image = model.encode(Image.open(filepath))
|
18 |
-
# image_names = list(glob.glob('./*.jpg'))
|
19 |
-
print("Images:", len(image_names))
|
20 |
-
encoded_image = model.encode([Image.open(filepath) for filepath in image_names], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
|
21 |
|
22 |
# Now we run the clustering algorithm. This function compares images aganist
|
23 |
# all other images and returns a list with the pairs that have the highest
|
@@ -28,17 +23,17 @@ class ImageSimilarity(object):
|
|
28 |
# =================
|
29 |
# DUPLICATES
|
30 |
# =================
|
31 |
-
print('Finding duplicate images...')
|
32 |
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
33 |
# A duplicate image will have a score of 1.00
|
34 |
# It may be 0.9999 due to lossy image compression (.jpg)
|
35 |
-
duplicates = [image for image in processed_images if image[0] >= 0.999]
|
36 |
|
37 |
# Output the top X duplicate images
|
38 |
-
for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
|
43 |
# =================
|
44 |
# NEAR DUPLICATES
|
@@ -48,10 +43,17 @@ class ImageSimilarity(object):
|
|
48 |
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
49 |
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
50 |
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
51 |
-
threshold = 0.
|
52 |
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
53 |
|
54 |
for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|
55 |
-
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from sentence_transformers import SentenceTransformer, util
|
|
|
|
|
2 |
|
3 |
class ImageSimilarity(object):
|
4 |
def __init__(self, minimum_commutative_image_diff):
|
5 |
self.minimum_commutative_image_diff = minimum_commutative_image_diff
|
6 |
|
7 |
+
def check(self, pil_images):
|
8 |
+
results = []
|
9 |
+
|
10 |
# Load the OpenAI CLIP Model
|
11 |
print('Loading CLIP Model...')
|
12 |
model = SentenceTransformer('clip-ViT-B-32')
|
13 |
|
14 |
+
print("Images:", len(pil_images))
|
15 |
+
encoded_image = model.encode([image["pil"] for image in pil_images], batch_size=128, convert_to_tensor=True, show_progress_bar=True)
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Now we run the clustering algorithm. This function compares images aganist
|
18 |
# all other images and returns a list with the pairs that have the highest
|
|
|
23 |
# =================
|
24 |
# DUPLICATES
|
25 |
# =================
|
26 |
+
# print('Finding duplicate images...')
|
27 |
# Filter list for duplicates. Results are triplets (score, image_id1, image_id2) and is scorted in decreasing order
|
28 |
# A duplicate image will have a score of 1.00
|
29 |
# It may be 0.9999 due to lossy image compression (.jpg)
|
30 |
+
# duplicates = [image for image in processed_images if image[0] >= 0.999]
|
31 |
|
32 |
# Output the top X duplicate images
|
33 |
+
# for score, image_id1, image_id2 in duplicates[0:NUM_SIMILAR_IMAGES]:
|
34 |
+
# print("\nScore: {:.3f}%".format(score * 100))
|
35 |
+
# print(pil_images[image_id1])
|
36 |
+
# print(pil_images[image_id2])
|
37 |
|
38 |
# =================
|
39 |
# NEAR DUPLICATES
|
|
|
43 |
# you will get larger clusters which have less similar images in it. Threshold 0 - 1.00
|
44 |
# A threshold of 1.00 means the two images are exactly the same. Since we are finding near
|
45 |
# duplicate images, we can set it at 0.99 or any number 0 < X < 1.00.
|
46 |
+
threshold = 0.90
|
47 |
near_duplicates = [image for image in processed_images if image[0] < threshold]
|
48 |
|
49 |
for score, image_id1, image_id2 in near_duplicates[0:NUM_SIMILAR_IMAGES]:
|
50 |
+
results.append({
|
51 |
+
'score': score,
|
52 |
+
'image1': pil_images[image_id1]["key"],
|
53 |
+
'image2': pil_images[image_id2]["key"]
|
54 |
+
})
|
55 |
+
# print("\nScore: {:.3f}%".format(score * 100))
|
56 |
+
# print(pil_images[image_id1]["key"])
|
57 |
+
# print(pil_images[image_id2]["key"])
|
58 |
+
|
59 |
+
return results
|
requirements.txt
CHANGED
@@ -1 +1,5 @@
|
|
1 |
-
sentence_transformers==2.2.2
|
|
|
|
|
|
|
|
|
|
1 |
+
sentence_transformers==2.2.2
|
2 |
+
gradio==3.44.4
|
3 |
+
markupsafe==2.0.1
|
4 |
+
boto3==1.26.25
|
5 |
+
botocore==1.29.25
|
services/aws_service.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import boto3
|
3 |
+
from PIL import Image
|
4 |
+
from io import BytesIO
|
5 |
+
|
6 |
+
class AwsService:
|
7 |
+
def session():
|
8 |
+
return boto3.Session(
|
9 |
+
aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID'),
|
10 |
+
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY'),
|
11 |
+
region_name=os.environ.get('AWS_REGION')
|
12 |
+
)
|
13 |
+
|
14 |
+
def s3_client():
|
15 |
+
return AwsService.session().client('s3')
|
16 |
+
|
17 |
+
def get_files_from_s3(bucket, prefix):
|
18 |
+
return AwsService.s3_client().list_objects(Bucket=bucket, Prefix=prefix)['Contents']
|
19 |
+
|
20 |
+
def get_image_from_s3(bucket, key):
|
21 |
+
file_byte_string = AwsService.s3_client().get_object(Bucket=bucket, Key=key)['Body'].read()
|
22 |
+
return {
|
23 |
+
'key': key,
|
24 |
+
'pil': Image.open(BytesIO(file_byte_string))
|
25 |
+
}
|