Islam YAHIAOUI
Update space
1e4288a
raw
history blame
2.18 kB
import uuid
from qdrant_client.http import models
from qdrant_client import QdrantClient
class QdrantU:
def __init__(self, collection_name):
self.client = QdrantClient(
url="https://5c32ac64-b1f7-4665-91eb-e321a98c02f6.europe-west3-0.gcp.cloud.qdrant.io:6333",
api_key="Wd_RTregmznFMCyDLagJHM_7a5TjJJuFLVTuMgfjQD44-BHLnhYbUg",
)
self.collection_name = collection_name
def _upload_documents_to_Qdrant(self, data, source):
points = []
for title, content, publishdate, embedding in zip(data["title"], data["content"], data["publishdate"], data["embedding"]):
new_id = str(uuid.uuid4()) # Generate a new UUID for each document
point = models.PointStruct(
id=new_id,
vector=embedding,
payload={
"title": title,
"content": content,
"publishdate": publishdate,
"source" : source
}
)
points.append(point)
self.client.upsert(
collection_name=self.collection_name,
points=points
)
print("Uploaded:", len(data["embedding"]), "documents to the Qdrant database")
def upload_to_Qdrant(self, data, batch_size=35, source=''):
for i in range(0, len(data), batch_size):
batch = data[i:i + batch_size]
self._upload_documents_to_Qdrant(batch , source)
print(f"Uploaded {i + len(batch)} documents")
def get_number_of_vectors(self):
collection_info = self.client.get_collection(self.collection_name)
num_vectors = collection_info.points_count
return num_vectors
def close_connection(self):
self.client.close()
def search(self, query, text_embedder, limit):
query_vector = text_embedder.embed_query(query_text=query)
query_result = self.client.search(
collection_name=self.collection_name,
query_vector=query_vector[0].tolist(), # Convert tensor to list
limit=limit,
with_payload=True
)
return query_result