Spaces:
Sleeping
Sleeping
import uuid | |
from qdrant_client.http import models | |
from qdrant_client import QdrantClient | |
class QdrantU: | |
def __init__(self, collection_name): | |
self.client = QdrantClient( | |
url="https://5c32ac64-b1f7-4665-91eb-e321a98c02f6.europe-west3-0.gcp.cloud.qdrant.io:6333", | |
api_key="Wd_RTregmznFMCyDLagJHM_7a5TjJJuFLVTuMgfjQD44-BHLnhYbUg", | |
) | |
self.collection_name = collection_name | |
def _upload_documents_to_Qdrant(self, data, source): | |
points = [] | |
for title, content, publishdate, embedding in zip(data["title"], data["content"], data["publishdate"], data["embedding"]): | |
new_id = str(uuid.uuid4()) # Generate a new UUID for each document | |
point = models.PointStruct( | |
id=new_id, | |
vector=embedding, | |
payload={ | |
"title": title, | |
"content": content, | |
"publishdate": publishdate, | |
"source" : source | |
} | |
) | |
points.append(point) | |
self.client.upsert( | |
collection_name=self.collection_name, | |
points=points | |
) | |
print("Uploaded:", len(data["embedding"]), "documents to the Qdrant database") | |
def upload_to_Qdrant(self, data, batch_size=35, source=''): | |
for i in range(0, len(data), batch_size): | |
batch = data[i:i + batch_size] | |
self._upload_documents_to_Qdrant(batch , source) | |
print(f"Uploaded {i + len(batch)} documents") | |
def get_number_of_vectors(self): | |
collection_info = self.client.get_collection(self.collection_name) | |
num_vectors = collection_info.points_count | |
return num_vectors | |
def close_connection(self): | |
self.client.close() | |
def search(self, query, text_embedder, limit): | |
query_vector = text_embedder.embed_query(query_text=query) | |
query_result = self.client.search( | |
collection_name=self.collection_name, | |
query_vector=query_vector[0].tolist(), # Convert tensor to list | |
limit=limit, | |
with_payload=True | |
) | |
return query_result |