mys commited on
Commit
a4294e4
1 Parent(s): fc02443

Fix: deduplicate images

Browse files
Files changed (2) hide show
  1. app.py +3 -4
  2. utils.py +14 -0
app.py CHANGED
@@ -3,7 +3,7 @@ from qdrant_client import QdrantClient
3
 
4
  from config import api_key, article, collection_name, description, examples, host_url
5
  from encoder import encode_text
6
- from utils import get_file
7
 
8
  client = QdrantClient(host=host_url,
9
  port=443,
@@ -14,10 +14,9 @@ def search_images(query, modality):
14
  query_vector = encode_text(query)
15
  vector_name = "image" if modality == "images" else "text"
16
  results = client.search(
17
- collection_name, (vector_name, query_vector), limit=6, with_payload=True)
18
 
19
- images = [(get_file(result.payload['url']), result.payload['caption'] +
20
- " - " + str(result.score)) for result in results]
21
 
22
  return images
23
 
 
3
 
4
  from config import api_key, article, collection_name, description, examples, host_url
5
  from encoder import encode_text
6
+ from utils import get_images
7
 
8
  client = QdrantClient(host=host_url,
9
  port=443,
 
14
  query_vector = encode_text(query)
15
  vector_name = "image" if modality == "images" else "text"
16
  results = client.search(
17
+ collection_name, (vector_name, query_vector), limit=20, with_payload=True)
18
 
19
+ images = get_images(results)
 
20
 
21
  return images
22
 
utils.py CHANGED
@@ -8,3 +8,17 @@ from PIL import Image
8
  @lru_cache
9
  def get_file(url):
10
  return Image.open(io.BytesIO(requests.get(url).content))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  @lru_cache
9
  def get_file(url):
10
  return Image.open(io.BytesIO(requests.get(url).content))
11
+
12
+
13
+ def get_images(results):
14
+ seen_urls = set()
15
+ unique_results = []
16
+ for result in results:
17
+ if not result.payload['url'] in seen_urls:
18
+ seen_urls.add(result.payload['url'])
19
+ unique_results.append(result)
20
+
21
+ images = [(get_file(result.payload['url']), result.payload['caption'] +
22
+ " - " + str(result.score)) for result in unique_results]
23
+
24
+ return images