Spaces:
Runtime error
Runtime error
Update off_topic.py
Browse files- off_topic.py +7 -1
off_topic.py
CHANGED
@@ -9,6 +9,7 @@ import matplotlib.pyplot as plt
|
|
9 |
import numpy as np
|
10 |
import torch
|
11 |
import PIL
|
|
|
12 |
from transformers import CLIPModel, CLIPProcessor
|
13 |
from PIL import Image
|
14 |
|
@@ -95,7 +96,12 @@ class OffTopicDetector:
|
|
95 |
domain = re.sub("_", " ", response["domain_id"].split("-")[-1]).lower()
|
96 |
img_urls = [x["url"] for x in response["pictures"]]
|
97 |
images = self.get_images(img_urls)
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
def get_images(self, urls: List[str]):
|
101 |
start = time.time()
|
|
|
9 |
import numpy as np
|
10 |
import torch
|
11 |
import PIL
|
12 |
+
import imagehash
|
13 |
from transformers import CLIPModel, CLIPProcessor
|
14 |
from PIL import Image
|
15 |
|
|
|
96 |
domain = re.sub("_", " ", response["domain_id"].split("-")[-1]).lower()
|
97 |
img_urls = [x["url"] for x in response["pictures"]]
|
98 |
images = self.get_images(img_urls)
|
99 |
+
hashes = {}
|
100 |
+
for img in images:
|
101 |
+
hashes.update({str(imagehash.average_hash(img)): img})
|
102 |
+
dedup_hashes = list(dict.fromkeys(hashes))
|
103 |
+
dedup_images = [img for hash, img in hashes.items() if hash in dedup_hashes]
|
104 |
+
return dedup_images, domain
|
105 |
|
106 |
def get_images(self, urls: List[str]):
|
107 |
start = time.time()
|