Spaces:
Running
Running
Use conf values from `config.py`
Browse files- README.md +1 -1
- create_index.py +4 -18
- requirements-indexing.txt +5 -0
README.md
CHANGED
@@ -11,4 +11,4 @@ license: apache-2.0
|
|
11 |
---
|
12 |
|
13 |
# hf-spaces-demo
|
14 |
-
A semantic search demo on HuggingFace Spaces backed by Qdrant Cloud
|
|
|
11 |
---
|
12 |
|
13 |
# hf-spaces-demo
|
14 |
+
A semantic image search demo on HuggingFace Spaces backed by Qdrant Cloud
|
create_index.py
CHANGED
@@ -13,16 +13,10 @@ from qdrant_client import QdrantClient
|
|
13 |
from qdrant_client.models import Distance, Record, VectorParams, OptimizersConfigDiff, Payload
|
14 |
|
15 |
|
16 |
-
|
17 |
|
|
|
18 |
|
19 |
-
def truncate(n, decimals=0):
|
20 |
-
try:
|
21 |
-
multiplier = 10 ** decimals
|
22 |
-
return int(n * multiplier) / multiplier
|
23 |
-
except:
|
24 |
-
LOGGER.warn(f"Cannot {n} as a number, returning 0.0")
|
25 |
-
return 0.0
|
26 |
|
27 |
|
28 |
def get_vector_size_and_number(img_emb_files):
|
@@ -43,9 +37,7 @@ def get_embeddings_and_records(img_emb_files, txt_emb_files, metadata_files):
|
|
43 |
payload_data.drop(columns=["image_path", "hash", "key", "status",
|
44 |
"error_message", "width", "height", "exif", "sha256", "original_width", "original_height"], errors="ignore", inplace=True)
|
45 |
payload_data = payload_data.to_dict(orient='records')
|
46 |
-
|
47 |
-
v, str) else v for k, v in p.items()} for p in payload_data]
|
48 |
-
|
49 |
img_embeddings = np.load(img_file)
|
50 |
txt_embeddings = np.load(txt_file)
|
51 |
|
@@ -57,12 +49,6 @@ def get_embeddings_and_records(img_emb_files, txt_emb_files, metadata_files):
|
|
57 |
|
58 |
def clip_index(
|
59 |
embeddings_folder,
|
60 |
-
collection_name,
|
61 |
-
host="localhost",
|
62 |
-
api_key=None,
|
63 |
-
port=6333,
|
64 |
-
grpc_port=6334,
|
65 |
-
prefer_grpc=True,
|
66 |
batch_size=64,
|
67 |
parallel=2,
|
68 |
max_retries=5,
|
@@ -71,7 +57,7 @@ def clip_index(
|
|
71 |
):
|
72 |
"""indexes clip embeddings using Qdrant"""
|
73 |
client = QdrantClient(
|
74 |
-
host=
|
75 |
|
76 |
image_folder = f"{embeddings_folder}/{image_subfolder}"
|
77 |
text_folder = f"{embeddings_folder}/{text_subfolder}"
|
|
|
13 |
from qdrant_client.models import Distance, Record, VectorParams, OptimizersConfigDiff, Payload
|
14 |
|
15 |
|
16 |
+
from config import api_key, collection_name, host_url
|
17 |
|
18 |
+
LOGGER = logging.getLogger(__name__)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
def get_vector_size_and_number(img_emb_files):
|
|
|
37 |
payload_data.drop(columns=["image_path", "hash", "key", "status",
|
38 |
"error_message", "width", "height", "exif", "sha256", "original_width", "original_height"], errors="ignore", inplace=True)
|
39 |
payload_data = payload_data.to_dict(orient='records')
|
40 |
+
|
|
|
|
|
41 |
img_embeddings = np.load(img_file)
|
42 |
txt_embeddings = np.load(txt_file)
|
43 |
|
|
|
49 |
|
50 |
def clip_index(
|
51 |
embeddings_folder,
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
batch_size=64,
|
53 |
parallel=2,
|
54 |
max_retries=5,
|
|
|
57 |
):
|
58 |
"""indexes clip embeddings using Qdrant"""
|
59 |
client = QdrantClient(
|
60 |
+
host=host_url, api_key=api_key, prefer_grpc=True)
|
61 |
|
62 |
image_folder = f"{embeddings_folder}/{image_subfolder}"
|
63 |
text_folder = f"{embeddings_folder}/{text_subfolder}"
|
requirements-indexing.txt
CHANGED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
torch
|
2 |
+
tqdm
|
3 |
+
qdrant-client
|
4 |
+
fire
|
5 |
+
pandas
|