Spaces:

hoshingakag
/

Chat-Exp-2025

Sleeping

App Files Files Community

Chat-Exp-2025 / src /llamaindex_backend.py

hoshingakag

Update src/llamaindex_backend.py

0a3d21d verified 5 months ago

raw

history blame contribute delete

5.33 kB

	from src.embeddings_model import GEmbeddings
	from src.text_generation_model import GLLM
	from src.pinecone_index import PineconeIndex

	from typing import Dict, List, Any, Union
	import datetime
	import asyncio

	from llama_index.core.evaluation import SemanticSimilarityEvaluator
	from llama_index.core.base.embeddings.base import SimilarityMode

	prompt_template = """
	<system instruction>
	You are Gerard Lee, a data enthusiast with 6 years of experience in the field and humble about his success. Imagine you are in a conversation with someone who interested in your portfolio.
	Reply as faifhfully as possible and in no more than 5 complete sentences unless the <user query> requests to elaborate in details. Use contents from <context> only without prior knowledge except referring to <chat history> for seamless conversatation.
	</system instruction>

	<chat history>
	{context_history}
	</chat history>

	<context>
	{context_from_index}
	</context>

	<user query>
	{user_query}
	</user query>
	"""

	class GLlamaIndex():
	def __init__(
	self,
	logger,
	emb_model: GEmbeddings,
	text_model: GLLM,
	index: PineconeIndex,
	similarity_threshold: float
	) -> None:
	self.logger = logger
	self.emb_model = emb_model
	self.llm = text_model
	self.index = index
	self.evaluator = self._set_evaluator(similarity_threshold)
	self.prompt_template = prompt_template

	def _set_evaluator(self, similarity_threshold: float) -> SemanticSimilarityEvaluator:
	sem_evaluator = SemanticSimilarityEvaluator(
	similarity_mode=SimilarityMode.DEFAULT,
	similarity_threshold=similarity_threshold,
	)
	return sem_evaluator

	def format_history(self, history: List[str]) -> str:
	return "\n".join(list(filter(None, history)))

	async def aget_context_with_history(
	self,
	query: str,
	history: List[str]
	) -> str:
	if not history:
	result = await self.index.retrieve_context(query)
	return result["result"]

	extended_query = f"{self.format_history(history[-2:])}\n{query}"

	results = await self.index.aretrieve_context_multi(
	[query, extended_query]
	)
	self.logger.info(f"retrieval results: {results}")
	eval_results = await self.aevaluate_context_multi(
	[query, extended_query],
	[r["result"] for r in results]
	)
	self.logger.info(f"eval results: {eval_results}")
	return results[0]["result"] if eval_results[0].score > eval_results[1].score \
	else results[1]["result"]

	async def aevaluate_context(
	self,
	query: str,
	returned_context: str
	) -> Dict[str, Any]:
	result = await self.evaluator.aevaluate(
	response=returned_context,
	reference=query,
	)
	return result

	async def aevaluate_context_multi(
	self,
	query_list: List[str],
	returned_context_list: List[str]
	) -> List[Dict]:
	result = await asyncio.gather(*(self.aevaluate_context(query, returned_context) for query, returned_context in zip(query_list, returned_context_list)))
	return result

	def generate_text(
	self,
	query: str,
	history: List[str],
	) -> str:
	# get chat history
	context_history = self.format_history(history=history)

	# get retrieval context(s) from llama-index vectorstore index
	try:
	# without history, single context retrieval without evaluation
	if not history:
	# w&b trace retrieval context
	result_query_only = self.index.retrieve_context(query)
	context_from_index_selected = result_query_only["result"]

	# with history, multiple context retrieval with async, then evaluation to determine which context to choose
	else:
	context_from_index_selected = asyncio.run(self.aget_context_with_history(query=query, history=history))

	except Exception as e:
	self.logger.error(f"Exception {e} occured when retriving context\n")

	llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
	result = "Something went wrong. Please try again later."
	return result

	self.logger.info(f"Context from Llama-Index:\n{context_from_index_selected}\n")

	# generate text with prompt template to roleplay myself
	prompt_with_context = self.prompt_template.format(context_history=context_history, context_from_index=context_from_index_selected, user_query=query)
	try:
	result = self.llm.gai_generate_content(
	prompt=prompt_with_context,
	temperature=0.5,
	)
	success_flag = "success"
	if result is None:
	result = "Seems something went wrong. Please try again later."
	self.logger.error(f"Result with 'None' received\n")
	success_flag = "fail"

	except Exception as e:
	result = "Seems something went wrong. Please try again later."
	self.logger.error(f"Exception {e} occured\n")
	success_flag = "fail"

	return result