Spaces:

islam23
/

llama3-8b-RAG_News_Finance

Sleeping

Islam YAHIAOUI

Update space

1e4288a 6 months ago

2.18 kB

	import uuid
	from qdrant_client.http import models
	from qdrant_client import QdrantClient


	class QdrantU:
	def __init__(self, collection_name):
	self.client = QdrantClient(
	url="https://5c32ac64-b1f7-4665-91eb-e321a98c02f6.europe-west3-0.gcp.cloud.qdrant.io:6333",
	api_key="Wd_RTregmznFMCyDLagJHM_7a5TjJJuFLVTuMgfjQD44-BHLnhYbUg",
	)
	self.collection_name = collection_name

	def _upload_documents_to_Qdrant(self, data, source):
	points = []
	for title, content, publishdate, embedding in zip(data["title"], data["content"], data["publishdate"], data["embedding"]):
	new_id = str(uuid.uuid4()) # Generate a new UUID for each document
	point = models.PointStruct(
	id=new_id,
	vector=embedding,
	payload={
	"title": title,
	"content": content,
	"publishdate": publishdate,
	"source" : source
	}
	)
	points.append(point)

	self.client.upsert(
	collection_name=self.collection_name,
	points=points
	)

	print("Uploaded:", len(data["embedding"]), "documents to the Qdrant database")


	def upload_to_Qdrant(self, data, batch_size=35, source=''):
	for i in range(0, len(data), batch_size):
	batch = data[i:i + batch_size]
	self._upload_documents_to_Qdrant(batch , source)
	print(f"Uploaded {i + len(batch)} documents")


	def get_number_of_vectors(self):
	collection_info = self.client.get_collection(self.collection_name)
	num_vectors = collection_info.points_count
	return num_vectors

	def close_connection(self):
	self.client.close()

	def search(self, query, text_embedder, limit):
	query_vector = text_embedder.embed_query(query_text=query)
	query_result = self.client.search(
	collection_name=self.collection_name,
	query_vector=query_vector[0].tolist(), # Convert tensor to list
	limit=limit,
	with_payload=True
	)
	return query_result