learn-ai

Runtime error

App Files Files Community

dh-mc commited on Aug 20, 2023

Commit

6011708

•

1 Parent(s): 7fded8d

use ConversationChain + ConversationSummaryBufferMemory

Browse files

Files changed (6) hide show

Makefile +6 -0
app_modules/llm_chat_chain.py +14 -11
app_modules/llm_inference.py +12 -4
app_modules/llm_loader.py +1 -0
server.py +6 -5
unit_test.py +1 -1

Makefile CHANGED Viewed

@@ -12,9 +12,15 @@ endif
 test:
 	python test.py
 chat:
 	python test.py chat
 unittest:
 	python unit_test.py $(TEST)

 test:
 	python test.py
+test2:
+	python server.py
 chat:
 	python test.py chat
+chat2:
+	python unit_test.py chat
 unittest:
 	python unit_test.py $(TEST)

app_modules/llm_chat_chain.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import os
-from langchain import LLMChain, PromptTemplate
-from langchain.chains import ConversationalRetrievalChain
 from langchain.chains.base import Chain
-from langchain.memory import ConversationBufferMemory
 from app_modules.llm_inference import LLMInference
@@ -12,7 +12,7 @@ def get_llama_2_prompt_template():
     B_INST, E_INST = "[INST]", "[/INST]"
     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
-    instruction = "Chat History:\n\n{chat_history} \n\nUser: {question}"
     system_prompt = "You are a helpful assistant, you always only answer for the assistant then you stop. Read the chat history to get context"
     # system_prompt = """\
     # You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. \n\nDo not output any emotional expression. Read the chat history to get context.\
@@ -32,20 +32,20 @@ class ChatChain(LLMInference):
             get_llama_2_prompt_template()
             if os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"
             else """You are a chatbot having a conversation with a human.
-{chat_history}
-Human: {question}
 Chatbot:"""
         )
         print(f"template: {template}")
-        prompt = PromptTemplate(
-            input_variables=["chat_history", "question"], template=template
-        )
-        memory = ConversationBufferMemory(memory_key="chat_history")
-        llm_chain = LLMChain(
             llm=self.llm_loader.llm,
             prompt=prompt,
             verbose=True,
@@ -53,3 +53,6 @@ Chatbot:"""
         )
         return llm_chain

 import os
+from typing import List, Optional
+from langchain import ConversationChain, PromptTemplate
 from langchain.chains.base import Chain
+from langchain.memory import ConversationSummaryBufferMemory
 from app_modules.llm_inference import LLMInference
     B_INST, E_INST = "[INST]", "[/INST]"
     B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
+    instruction = "Chat History:\n\n{history} \n\nUser: {input}"
     system_prompt = "You are a helpful assistant, you always only answer for the assistant then you stop. Read the chat history to get context"
     # system_prompt = """\
     # You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information. \n\nDo not output any emotional expression. Read the chat history to get context.\
             get_llama_2_prompt_template()
             if os.environ.get("USE_LLAMA_2_PROMPT_TEMPLATE") == "true"
             else """You are a chatbot having a conversation with a human.
+{history}
+Human: {input}
 Chatbot:"""
         )
         print(f"template: {template}")
+        prompt = PromptTemplate(input_variables=["history", "input"], template=template)
+        memory = ConversationSummaryBufferMemory(
+            llm=self.llm_loader.llm, max_token_limit=1024, return_messages=True
+        )
+        llm_chain = ConversationChain(
             llm=self.llm_loader.llm,
             prompt=prompt,
             verbose=True,
         )
         return llm_chain
+    def run_chain(self, chain, inputs, callbacks: Optional[List] = []):
+        return chain({"input": inputs["question"]}, callbacks)

app_modules/llm_inference.py CHANGED Viewed

@@ -4,6 +4,7 @@ import time
 import urllib
 from queue import Queue
 from threading import Thread
 from langchain.chains.base import Chain
@@ -29,6 +30,9 @@ class LLMInference(metaclass=abc.ABCMeta):
         return self.chain
     def call_chain(
         self,
         inputs,
@@ -45,9 +49,11 @@ class LLMInference(metaclass=abc.ABCMeta):
             chain = self.get_chain()
             result = (
-                self._run_chain(chain, inputs, streaming_handler, testing)
                 if streaming_handler is not None
-                else chain(inputs)
             )
             if "answer" in result:
@@ -67,9 +73,11 @@ class LLMInference(metaclass=abc.ABCMeta):
                 self.llm_loader.lock.release()
     def _execute_chain(self, chain, inputs, q, sh):
-        q.put(chain(inputs, callbacks=[sh]))
-    def _run_chain(self, chain, inputs, streaming_handler, testing):
         que = Queue()
         t = Thread(

 import urllib
 from queue import Queue
 from threading import Thread
+from typing import List, Optional
 from langchain.chains.base import Chain
         return self.chain
+    def run_chain(self, chain, inputs, callbacks: Optional[List] = []):
+        return chain(inputs, callbacks)
     def call_chain(
         self,
         inputs,
             chain = self.get_chain()
             result = (
+                self._run_chain_with_streaming_handler(
+                    chain, inputs, streaming_handler, testing
+                )
                 if streaming_handler is not None
+                else self.run_chain(chain, inputs)
             )
             if "answer" in result:
                 self.llm_loader.lock.release()
     def _execute_chain(self, chain, inputs, q, sh):
+        q.put(self.run_chain(chain, inputs, callbacks=[sh]))
+    def _run_chain_with_streaming_handler(
+        self, chain, inputs, streaming_handler, testing
+    ):
         que = Queue()
         t = Thread(

app_modules/llm_loader.py CHANGED Viewed

@@ -188,6 +188,7 @@ class LLMLoader:
                 )
             elif self.llm_model_type == "hftgi":
                 HFTGI_SERVER_URL = os.environ.get("HFTGI_SERVER_URL")
                 self.llm = HuggingFaceTextGenInference(
                     inference_server_url=HFTGI_SERVER_URL,
                     max_new_tokens=self.max_tokens_limit / 2,

                 )
             elif self.llm_model_type == "hftgi":
                 HFTGI_SERVER_URL = os.environ.get("HFTGI_SERVER_URL")
+                self.max_tokens_limit = 4096
                 self.llm = HuggingFaceTextGenInference(
                     inference_server_url=HFTGI_SERVER_URL,
                     max_new_tokens=self.max_tokens_limit / 2,

server.py CHANGED Viewed

@@ -78,17 +78,18 @@ def chat_sync(
 ) -> str:
     print("question@chat_sync:", question)
     result = do_chat(question, history, chat_id, None)
-    return result["text"]
 if __name__ == "__main__":
     # print_llm_response(json.loads(chat("What's deep learning?", [])))
     chat_start = timer()
-    chat_sync("What's generative AI?", chat_id="test_user")
     chat_sync("more on finance", chat_id="test_user")
-    # chat_sync("给我讲一个年轻人奋斗创业最终取得成功的故事。", chat_id="test_user")
-    # chat_sync("给这个故事起一个标题", chat_id="test_user")
-    # chat_sync("Write the game 'snake' in python", chat_id="test_user")
     chat_end = timer()
     total_time = chat_end - chat_start
     print(f"Total time used: {total_time:.3f} s")

 ) -> str:
     print("question@chat_sync:", question)
     result = do_chat(question, history, chat_id, None)
+    return result["response"]
 if __name__ == "__main__":
     # print_llm_response(json.loads(chat("What's deep learning?", [])))
     chat_start = timer()
+    chat_sync("what's deep learning?", chat_id="test_user")
     chat_sync("more on finance", chat_id="test_user")
+    chat_sync("more on Sentiment analysis", chat_id="test_user")
+    chat_sync("Write the game 'snake' in python", chat_id="test_user")
+    chat_sync("给我讲一个年轻人奋斗创业最终取得成功的故事。", chat_id="test_user")
+    chat_sync("给这个故事起一个标题", chat_id="test_user")
     chat_end = timer()
     total_time = chat_end - chat_start
     print(f"Total time used: {total_time:.3f} s")

unit_test.py CHANGED Viewed

@@ -170,7 +170,7 @@ def chat():
         end = timer()
         print(f"Completed in {end - start:.3f}s")
-        chat_history.append((query, result["text"]))
     chat_end = timer()
     print(f"Total time used: {chat_end - chat_start:.3f}s")

         end = timer()
         print(f"Completed in {end - start:.3f}s")
+        chat_history.append((query, result["response"]))
     chat_end = timer()
     print(f"Total time used: {chat_end - chat_start:.3f}s")