Spaces:
Running
Running
Fix: deduplicate images
Browse files
app.py
CHANGED
@@ -3,7 +3,7 @@ from qdrant_client import QdrantClient
|
|
3 |
|
4 |
from config import api_key, article, collection_name, description, examples, host_url
|
5 |
from encoder import encode_text
|
6 |
-
from utils import
|
7 |
|
8 |
client = QdrantClient(host=host_url,
|
9 |
port=443,
|
@@ -14,10 +14,9 @@ def search_images(query, modality):
|
|
14 |
query_vector = encode_text(query)
|
15 |
vector_name = "image" if modality == "images" else "text"
|
16 |
results = client.search(
|
17 |
-
collection_name, (vector_name, query_vector), limit=
|
18 |
|
19 |
-
images =
|
20 |
-
" - " + str(result.score)) for result in results]
|
21 |
|
22 |
return images
|
23 |
|
|
|
3 |
|
4 |
from config import api_key, article, collection_name, description, examples, host_url
|
5 |
from encoder import encode_text
|
6 |
+
from utils import get_images
|
7 |
|
8 |
client = QdrantClient(host=host_url,
|
9 |
port=443,
|
|
|
14 |
query_vector = encode_text(query)
|
15 |
vector_name = "image" if modality == "images" else "text"
|
16 |
results = client.search(
|
17 |
+
collection_name, (vector_name, query_vector), limit=20, with_payload=True)
|
18 |
|
19 |
+
images = get_images(results)
|
|
|
20 |
|
21 |
return images
|
22 |
|
utils.py
CHANGED
@@ -8,3 +8,17 @@ from PIL import Image
|
|
8 |
@lru_cache
|
9 |
def get_file(url):
|
10 |
return Image.open(io.BytesIO(requests.get(url).content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
@lru_cache
|
9 |
def get_file(url):
|
10 |
return Image.open(io.BytesIO(requests.get(url).content))
|
11 |
+
|
12 |
+
|
13 |
+
def get_images(results):
|
14 |
+
seen_urls = set()
|
15 |
+
unique_results = []
|
16 |
+
for result in results:
|
17 |
+
if not result.payload['url'] in seen_urls:
|
18 |
+
seen_urls.add(result.payload['url'])
|
19 |
+
unique_results.append(result)
|
20 |
+
|
21 |
+
images = [(get_file(result.payload['url']), result.payload['caption'] +
|
22 |
+
" - " + str(result.score)) for result in unique_results]
|
23 |
+
|
24 |
+
return images
|