Update handler.py
Browse files- handler.py +56 -39
handler.py
CHANGED
@@ -21,6 +21,9 @@ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
|
21 |
from langchain_core.messages import HumanMessage
|
22 |
from langchain_core.output_parsers import StrOutputParser
|
23 |
from langchain_core.runnables import RunnableBranch
|
|
|
|
|
|
|
24 |
|
25 |
class EndpointHandler():
|
26 |
def __init__(self, path=""):
|
@@ -58,57 +61,71 @@ class EndpointHandler():
|
|
58 |
|
59 |
compressor = LLMChainExtractor.from_llm(self.llm)
|
60 |
retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
-
|
63 |
-
Answer the user's questions based on the below context.
|
64 |
-
If the context doesn't contain any relevant information to the question, don't make something up and just say "I don't know":
|
65 |
-
|
66 |
-
<context>
|
67 |
{context}
|
68 |
-
</context>
|
69 |
-
"""
|
70 |
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
"system",
|
75 |
-
SYSTEM_TEMPLATE,
|
76 |
-
),
|
77 |
-
MessagesPlaceholder(variable_name="messages"),
|
78 |
-
]
|
79 |
-
)
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
lambda x: len(x.get("messages", [])) == 1,
|
85 |
-
# If only one message, then we just pass that message's content to retriever
|
86 |
-
(lambda x: x["messages"][-1].content) | retriever,
|
87 |
-
),
|
88 |
-
# If messages, then we pass inputs to LLM chain to transform the query, then pass to retriever
|
89 |
-
question_answering_prompt | chat | StrOutputParser() | retriever,
|
90 |
-
).with_config(run_name="chat_retriever_chain")
|
91 |
-
|
92 |
-
document_chain = create_stuff_documents_chain(chat, question_answering_prompt)
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
101 |
# pseudo
|
102 |
# self.model(input)
|
103 |
inputs = data.pop("inputs", data)
|
104 |
-
output = self.
|
105 |
-
{
|
106 |
-
"messages": [
|
107 |
-
HumanMessage(content=inputs)
|
108 |
-
],
|
109 |
-
}
|
110 |
-
)
|
111 |
print(output['answer'])
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
|
113 |
return output
|
114 |
|
|
|
21 |
from langchain_core.messages import HumanMessage
|
22 |
from langchain_core.output_parsers import StrOutputParser
|
23 |
from langchain_core.runnables import RunnableBranch
|
24 |
+
from operator import itemgetter
|
25 |
+
|
26 |
+
from langchain.memory import ConversationBufferMemory
|
27 |
|
28 |
class EndpointHandler():
|
29 |
def __init__(self, path=""):
|
|
|
61 |
|
62 |
compressor = LLMChainExtractor.from_llm(self.llm)
|
63 |
retriever = ContextualCompressionRetriever(base_compressor=compressor, base_retriever=retriever)
|
64 |
+
|
65 |
+
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
|
66 |
+
|
67 |
+
Chat History:
|
68 |
+
{chat_history}
|
69 |
+
Follow Up Input: {question}
|
70 |
+
Standalone question:"""
|
71 |
+
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
|
72 |
|
73 |
+
template = """Answer the question based only on the following context:
|
|
|
|
|
|
|
|
|
74 |
{context}
|
|
|
|
|
75 |
|
76 |
+
Question: {question}
|
77 |
+
"""
|
78 |
+
ANSWER_PROMPT = ChatPromptTemplate.from_template(template)
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
|
80 |
+
self.memory = ConversationBufferMemory(
|
81 |
+
return_messages=True, output_key="answer", input_key="question"
|
82 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
# First we add a step to load memory
|
85 |
+
# This adds a "memory" key to the input object
|
86 |
+
loaded_memory = RunnablePassthrough.assign(
|
87 |
+
chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"),
|
88 |
)
|
89 |
+
# Now we calculate the standalone question
|
90 |
+
standalone_question = {
|
91 |
+
"standalone_question": {
|
92 |
+
"question": lambda x: x["question"],
|
93 |
+
"chat_history": lambda x: get_buffer_string(x["chat_history"]),
|
94 |
+
}
|
95 |
+
| CONDENSE_QUESTION_PROMPT
|
96 |
+
| ChatOpenAI(temperature=0)
|
97 |
+
| StrOutputParser(),
|
98 |
+
}
|
99 |
+
# Now we retrieve the documents
|
100 |
+
retrieved_documents = {
|
101 |
+
"docs": itemgetter("standalone_question") | retriever,
|
102 |
+
"question": lambda x: x["standalone_question"],
|
103 |
+
}
|
104 |
+
# Now we construct the inputs for the final prompt
|
105 |
+
final_inputs = {
|
106 |
+
"context": lambda x: _combine_documents(x["docs"]),
|
107 |
+
"question": itemgetter("question"),
|
108 |
+
}
|
109 |
+
# And finally, we do the part that returns the answers
|
110 |
+
answer = {
|
111 |
+
"answer": final_inputs | ANSWER_PROMPT | ChatOpenAI(),
|
112 |
+
"docs": itemgetter("docs"),
|
113 |
+
}
|
114 |
+
# And now we put it all together!
|
115 |
+
self.final_chain = loaded_memory | standalone_question | retrieved_documents | answer
|
116 |
|
117 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
118 |
# pseudo
|
119 |
# self.model(input)
|
120 |
inputs = data.pop("inputs", data)
|
121 |
+
output = self.final_chain.invoke(inputs)
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
print(output['answer'])
|
123 |
+
|
124 |
+
# Note that the memory does not save automatically
|
125 |
+
# This will be improved in the future
|
126 |
+
# For now you need to save it yourself
|
127 |
+
self.memory.save_context(inputs, {"answer": result["answer"].content})
|
128 |
+
memory.load_memory_variables({})
|
129 |
|
130 |
return output
|
131 |
|