Spaces:
Sleeping
Sleeping
| from src.embeddings_model import GEmbeddings | |
| from src.text_generation_model import GLLM | |
| from src.pinecone_index import PineconeIndex | |
| from typing import Dict, List, Any, Union | |
| import datetime | |
| import asyncio | |
| from llama_index.core.evaluation import SemanticSimilarityEvaluator | |
| from llama_index.core.base.embeddings.base import SimilarityMode | |
| prompt_template = """ | |
| <system instruction> | |
| You are Gerard Lee, a data enthusiast with 6 years of experience in the field and humble about his success. Imagine you are in a conversation with someone who interested in your portfolio. | |
| Reply as faifhfully as possible and in no more than 5 complete sentences unless the <user query> requests to elaborate in details. Use contents from <context> only without prior knowledge except referring to <chat history> for seamless conversatation. | |
| </system instruction> | |
| <chat history> | |
| {context_history} | |
| </chat history> | |
| <context> | |
| {context_from_index} | |
| </context> | |
| <user query> | |
| {user_query} | |
| </user query> | |
| """ | |
| class GLlamaIndex(): | |
| def __init__( | |
| self, | |
| logger, | |
| emb_model: GEmbeddings, | |
| text_model: GLLM, | |
| index: PineconeIndex, | |
| similarity_threshold: float | |
| ) -> None: | |
| self.logger = logger | |
| self.emb_model = emb_model | |
| self.llm = text_model | |
| self.index = index | |
| self.evaluator = self._set_evaluator(similarity_threshold) | |
| self.prompt_template = prompt_template | |
| def _set_evaluator(self, similarity_threshold: float) -> SemanticSimilarityEvaluator: | |
| sem_evaluator = SemanticSimilarityEvaluator( | |
| similarity_mode=SimilarityMode.DEFAULT, | |
| similarity_threshold=similarity_threshold, | |
| ) | |
| return sem_evaluator | |
| def format_history(self, history: List[str]) -> str: | |
| return "\n".join(list(filter(None, history))) | |
| async def aget_context_with_history( | |
| self, | |
| query: str, | |
| history: List[str] | |
| ) -> str: | |
| if not history: | |
| result = await self.index.retrieve_context(query) | |
| return result["result"] | |
| extended_query = f"{self.format_history(history[-2:])}\n{query}" | |
| results = await self.index.aretrieve_context_multi( | |
| [query, extended_query] | |
| ) | |
| self.logger.info(f"retrieval results: {results}") | |
| eval_results = await self.aevaluate_context_multi( | |
| [query, extended_query], | |
| [r["result"] for r in results] | |
| ) | |
| self.logger.info(f"eval results: {eval_results}") | |
| return results[0]["result"] if eval_results[0].score > eval_results[1].score \ | |
| else results[1]["result"] | |
| async def aevaluate_context( | |
| self, | |
| query: str, | |
| returned_context: str | |
| ) -> Dict[str, Any]: | |
| result = await self.evaluator.aevaluate( | |
| response=returned_context, | |
| reference=query, | |
| ) | |
| return result | |
| async def aevaluate_context_multi( | |
| self, | |
| query_list: List[str], | |
| returned_context_list: List[str] | |
| ) -> List[Dict]: | |
| result = await asyncio.gather(*(self.aevaluate_context(query, returned_context) for query, returned_context in zip(query_list, returned_context_list))) | |
| return result | |
| def generate_text( | |
| self, | |
| query: str, | |
| history: List[str], | |
| ) -> str: | |
| # get chat history | |
| context_history = self.format_history(history=history) | |
| # get retrieval context(s) from llama-index vectorstore index | |
| try: | |
| # without history, single context retrieval without evaluation | |
| if not history: | |
| # w&b trace retrieval context | |
| result_query_only = self.index.retrieve_context(query) | |
| context_from_index_selected = result_query_only["result"] | |
| # with history, multiple context retrieval with async, then evaluation to determine which context to choose | |
| else: | |
| context_from_index_selected = asyncio.run(self.aget_context_with_history(query=query, history=history)) | |
| except Exception as e: | |
| self.logger.error(f"Exception {e} occured when retriving context\n") | |
| llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000) | |
| result = "Something went wrong. Please try again later." | |
| return result | |
| self.logger.info(f"Context from Llama-Index:\n{context_from_index_selected}\n") | |
| # generate text with prompt template to roleplay myself | |
| prompt_with_context = self.prompt_template.format(context_history=context_history, context_from_index=context_from_index_selected, user_query=query) | |
| try: | |
| result = self.llm.gai_generate_content( | |
| prompt=prompt_with_context, | |
| temperature=0.5, | |
| ) | |
| success_flag = "success" | |
| if result is None: | |
| result = "Seems something went wrong. Please try again later." | |
| self.logger.error(f"Result with 'None' received\n") | |
| success_flag = "fail" | |
| except Exception as e: | |
| result = "Seems something went wrong. Please try again later." | |
| self.logger.error(f"Exception {e} occured\n") | |
| success_flag = "fail" | |
| return result |