Spaces:
Running
Running
from src.embeddings_model import GEmbeddings | |
from src.text_generation_model import GLLM | |
from src.pinecone_index import PineconeIndex | |
from typing import Dict, List, Any, Union | |
import datetime | |
import asyncio | |
from llama_index.core.evaluation import SemanticSimilarityEvaluator | |
from llama_index.core.base.embeddings.base import SimilarityMode | |
prompt_template = """ | |
<system instruction> | |
You are Gerard Lee, a data enthusiast with 6 years of experience in the field and humble about his success. Imagine you are in a conversation with someone who interested in your portfolio. | |
Reply as faifhfully as possible and in no more than 5 complete sentences unless the <user query> requests to elaborate in details. Use contents from <context> only without prior knowledge except referring to <chat history> for seamless conversatation. | |
</system instruction> | |
<chat history> | |
{context_history} | |
</chat history> | |
<context> | |
{context_from_index} | |
</context> | |
<user query> | |
{user_query} | |
</user query> | |
""" | |
class GLlamaIndex(): | |
def __init__( | |
self, | |
logger, | |
emb_model: GEmbeddings, | |
text_model: GLLM, | |
index: PineconeIndex, | |
similarity_threshold: float | |
) -> None: | |
self.logger = logger | |
self.emb_model = emb_model | |
self.llm = text_model | |
self.index = index | |
self.evaluator = self._set_evaluator(similarity_threshold) | |
self.prompt_template = prompt_template | |
def _set_evaluator(self, similarity_threshold: float) -> SemanticSimilarityEvaluator: | |
sem_evaluator = SemanticSimilarityEvaluator( | |
similarity_mode=SimilarityMode.DEFAULT, | |
similarity_threshold=similarity_threshold, | |
) | |
return sem_evaluator | |
def format_history(self, history: List[str]) -> str: | |
return "\n".join(list(filter(None, history))) | |
async def aget_context_with_history( | |
self, | |
query: str, | |
history: List[str] | |
) -> str: | |
if not history: | |
result = await self.index.retrieve_context(query) | |
return result["result"] | |
extended_query = f"{self.format_history(history[-2:])}\n{query}" | |
results = await self.index.aretrieve_context_multi( | |
[query, extended_query] | |
) | |
self.logger.info(f"retrieval results: {results}") | |
eval_results = await self.aevaluate_context_multi( | |
[query, extended_query], | |
[r["result"] for r in results] | |
) | |
self.logger.info(f"eval results: {eval_results}") | |
return results[0]["result"] if eval_results[0].score > eval_results[1].score \ | |
else results[1]["result"] | |
async def aevaluate_context( | |
self, | |
query: str, | |
returned_context: str | |
) -> Dict[str, Any]: | |
result = await self.evaluator.aevaluate( | |
response=returned_context, | |
reference=query, | |
) | |
return result | |
async def aevaluate_context_multi( | |
self, | |
query_list: List[str], | |
returned_context_list: List[str] | |
) -> List[Dict]: | |
result = await asyncio.gather(*(self.aevaluate_context(query, returned_context) for query, returned_context in zip(query_list, returned_context_list))) | |
return result | |
def generate_text( | |
self, | |
query: str, | |
history: List[str], | |
) -> str: | |
# get chat history | |
context_history = self.format_history(history=history) | |
# get retrieval context(s) from llama-index vectorstore index | |
try: | |
# without history, single context retrieval without evaluation | |
if not history: | |
# w&b trace retrieval context | |
result_query_only = self.index.retrieve_context(query) | |
context_from_index_selected = result_query_only["result"] | |
# with history, multiple context retrieval with async, then evaluation to determine which context to choose | |
else: | |
context_from_index_selected = asyncio.run(self.aget_context_with_history(query=query, history=history)) | |
except Exception as e: | |
self.logger.error(f"Exception {e} occured when retriving context\n") | |
llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000) | |
result = "Something went wrong. Please try again later." | |
return result | |
self.logger.info(f"Context from Llama-Index:\n{context_from_index_selected}\n") | |
# generate text with prompt template to roleplay myself | |
prompt_with_context = self.prompt_template.format(context_history=context_history, context_from_index=context_from_index_selected, user_query=query) | |
try: | |
result = self.llm.gai_generate_content( | |
prompt=prompt_with_context, | |
temperature=0.5, | |
) | |
success_flag = "success" | |
if result is None: | |
result = "Seems something went wrong. Please try again later." | |
self.logger.error(f"Result with 'None' received\n") | |
success_flag = "fail" | |
except Exception as e: | |
result = "Seems something went wrong. Please try again later." | |
self.logger.error(f"Exception {e} occured\n") | |
success_flag = "fail" | |
return result |