lchakkei
/

Mistral-7B-V2-Traditional-Chinese

@@ -21,6 +21,9 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.messages import HumanMessage
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnableBranch
 class EndpointHandler():
     def __init__(self, path=""):
@@ -58,57 +61,71 @@ class EndpointHandler():
         compressor = LLMChainExtractor.from_llm(self.llm)
         retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
-        SYSTEM_TEMPLATE = """
-        Answer the user's questions based on the below context.
-        If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know":
-        <context>
         {context}
-        </context>
-        """
-        question_answering_prompt = ChatPromptTemplate.from_messages(
-            [
-                (
-                    "system",
-                    SYSTEM_TEMPLATE,
-                ),
-                MessagesPlaceholder(variable_name="messages"),
-            ]
-        )
-        # Wrap the retriever
-        query_transforming_retriever_chain = RunnableBranch(
-            (
-                lambda x: len(x.get("messages", [])) == 1,
-                # If only one message, then we just pass that message's content to retriever
-                (lambda x: x["messages"][-1].content) | retriever,
-            ),
-            # If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
-            question_answering_prompt | chat | StrOutputParser() | retriever,
-        ).with_config(run_name="chat_retriever_chain")
-        document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
-        self.conversational_retrieval_chain = RunnablePassthrough.assign(
-            context=query_transforming_retriever_chain,
-        ).assign(
-            answer=document_chain,
         )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # pseudo
         # self.model(input)
         inputs = data.pop("inputs", data)
-        output = self.conversational_retrieval_chain.invoke(
-                    {
-                        "messages": [
-                            HumanMessage(content=inputs)
-                        ],
-                    }
-                )
         print(output['answer'])
         return output

 from langchain_core.messages import HumanMessage
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnableBranch
+from operator import itemgetter
+from langchain.memory import ConversationBufferMemory
 class EndpointHandler():
     def __init__(self, path=""):
         compressor = LLMChainExtractor.from_llm(self.llm)
         retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
+        _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
+        Chat History:
+        {chat_history}
+        Follow Up Input: {question}
+        Standalone question:"""
+        CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
+        template = """Answer the question based only on the following context:
         {context}
+        Question: {question}
+        """
+        ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
+        self.memory = ConversationBufferMemory(
+            return_messages=True, output_key="answer", input_key="question"
+        )
+        # First we add a step to load memory
+        # This adds a "memory" key to the input object
+        loaded_memory = RunnablePassthrough.assign(
+            chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
         )
+        # Now we calculate the standalone question
+        standalone_question = {
+            "standalone_question": {
+                "question": lambda x: x["question"],
+                "chat_history": lambda x: get_buffer_string(x["chat_history"]),
+            }
+            | CONDENSE_QUESTION_PROMPT
+            | ChatOpenAI(temperature=0)
+            | StrOutputParser(),
+        }
+        # Now we retrieve the documents
+        retrieved_documents = {
+            "docs": itemgetter("standalone_question") | retriever,
+            "question": lambda x: x["standalone_question"],
+        }
+        # Now we construct the inputs for the final prompt
+        final_inputs = {
+            "context": lambda x: _combine_documents(x["docs"]),
+            "question": itemgetter("question"),
+        }
+        # And finally, we do the part that returns the answers
+        answer = {
+            "answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(),
+            "docs": itemgetter("docs"),
+        }
+        # And now we put it all together!
+        self.final_chain = loaded_memory | standalone_question | retrieved_documents | answer
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         # pseudo
         # self.model(input)
         inputs = data.pop("inputs", data)
+        output = self.final_chain.invoke(inputs)
         print(output['answer'])
+        # Note that the memory does not save automatically
+        # This will be improved in the future
+        # For now you need to save it yourself
+        self.memory.save_context(inputs, {"answer": result["answer"].content})
+        memory.load_memory_variables({})
         return output