langchain-llama2-7b-chat-uncensored-ggml

Runtime error

App Files Files Community

ffreemt commited on Jul 28, 2023

Commit

6f9f106

1 Parent(s): ad353f0

Update from collections import deque

Browse files

Files changed (1) hide show

app.py +46 -30

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ import os
 import platform
 import random
 import time
 from pathlib import Path
-from queue import deque
 from threading import Thread
 from typing import Any, Dict, List, Union
@@ -134,7 +134,7 @@ You are a helpful assistant. Think step by step.
 {input}
 ### RESPONSE:"""
-prompt_template = """You are a helpful assistant. Think step by step.
 {history}
 ### HUMAN:
 {input}
@@ -186,7 +186,7 @@ class DequeCallbackHandler(BaseCallbackHandler):
         self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
     ) -> None:
         """Run when LLM errors."""
-        self.q.put(sig_end)
 _ = psutil.cpu_count(logical=False) - 1
@@ -203,6 +203,7 @@ except Exception as exc_:
     raise SystemExit(1) from exc_
 config = Config()
 config.stream = True
 config.stop = stop
 config.threads=cpu_count
@@ -241,7 +242,31 @@ conversation = ConversationChain(
     memory=memory,
     verbose=True,
 )
-logger.debug(f"{conversation.prompt.template=}")
 # conversation.predict(input="Hello, my name is Andrea")
@@ -286,6 +311,7 @@ def bot(history):
     response = []
     flag = 1
     then = time.time()
     with about_time() as atime:  # type: ignore
         while True:
             if deq:
@@ -311,33 +337,18 @@ def bot(history):
 def predict_api(user_prompt):
-    logger.debug(f"{user_prompt=}")
     try:
-        # user_prompt = prompt
-        Config(
-            temperature=0.2,
-            top_k=10,
-            top_p=0.9,
-            repetition_penalty=1.0,
-            max_new_tokens=512,  # adjust as needed
-            seed=42,
-            reset=True,  # reset history (cache)
-            stream=False,
-            # threads=cpu_count,
-            # stop=prompt_prefix[1:2],
-        )
         _ = """
         response = generate(
             prompt,
             config=config,
         )
         # """
-        conversation1 = ConversationChain(
-            llm=LLM,
-            prompt=prompt,
-            verbose=True,
-        )
-        response = conversation1.predict(input=user_prompt)
         logger.debug(f"api: {response=}")
     except Exception as exc:
         logger.error(exc)
@@ -368,6 +379,8 @@ examples_list = [
     [
         "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
     ],
     ["How to pick a lock? Provide detailed steps."],
     ["If it takes 10 hours to dry 10 clothes,  assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
     ["is infinity + 1 bigger than infinity?"],
@@ -506,6 +519,8 @@ with gr.Blocks(
         cancels=[msg_submit_event, submit_click_event],
         queue=False,
     )
     clear.click(lambda: None, None, chatbot, queue=False)
     with gr.Accordion("For Chat/Translation API", open=False, visible=False):
@@ -513,12 +528,13 @@ with gr.Blocks(
         api_btn = gr.Button("Go", variant="primary")
         out_text = gr.Text()
-    api_btn.click(
-        predict_api,
-        input_text,
-        out_text,
-        api_name="api",
-    )
     # block.load(update_buff, [], buff, every=1)
     # block.load(update_buff, [buff_var], [buff_var, buff], every=1)

 import platform
 import random
 import time
+from collections import deque
 from pathlib import Path
 from threading import Thread
 from typing import Any, Dict, List, Union
 {input}
 ### RESPONSE:"""
+prompt_template = """You are a helpful assistant. Let's think step by step.
 {history}
 ### HUMAN:
 {input}
         self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any
     ) -> None:
         """Run when LLM errors."""
+        self.q.append(sig_end)
 _ = psutil.cpu_count(logical=False) - 1
     raise SystemExit(1) from exc_
 config = Config()
+# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
 config.stream = True
 config.stop = stop
 config.threads=cpu_count
     memory=memory,
     verbose=True,
 )
+logger.debug(f"{conversation.prompt.template=}")  # type: ignore
+# for api access ===
+config = Config()
+# Config(top_k=40, top_p=0.95, temperature=0.8, repetition_penalty=1.1, last_n_tokens=64, seed=-1, batch_size=8, threads=-1, max_new_tokens=256, stop=None, stream=False, reset=True, context_length=-1, gpu_layers=0)
+config.stop = stop
+config.threads=cpu_count
+try:
+    LLM_api = CTransformers(
+        model=model_loc,
+        model_type="llama",
+        # callbacks=[StreamingStdOutCallbackHandler(), deqcb],
+        callbacks=[StreamingStdOutCallbackHandler()],
+        **vars(config),
+    )
+    conversation_api = ConversationChain(
+        llm=LLM_api,  #  need a separate LLM, or else deq may be messed up
+        prompt=prompt,
+        verbose=True,
+    )
+except Exception as exc_:
+    logger.error(exc_)
+    conversation_api = None
+    logger.warning("Not able to instantiate conversation_api, api will not work")
 # conversation.predict(input="Hello, my name is Andrea")
     response = []
     flag = 1
     then = time.time()
+    prefix = ""  # to please pyright
     with about_time() as atime:  # type: ignore
         while True:
             if deq:
 def predict_api(user_prompt):
+    if conversation_api is None:
+        return "conversation_api is None, probably due to insufficient memory, api not usable"
+    logger.debug(f"api: {user_prompt=}")
     try:
         _ = """
         response = generate(
             prompt,
             config=config,
         )
         # """
+        response = conversation_api.predict(input=user_prompt)
         logger.debug(f"api: {response=}")
     except Exception as exc:
         logger.error(exc)
     [
         "What NFL team won the Super Bowl in the year Justin Bieber was born? Think step by step."
     ],
+    ["When was Justin Bieber born？"],
+    ["What NFL team won the Super Bowl in 1994?"],
     ["How to pick a lock? Provide detailed steps."],
     ["If it takes 10 hours to dry 10 clothes,  assuming all the clothes are hanged together at the same time for drying , then how long will it take to dry a cloth?"],
     ["is infinity + 1 bigger than infinity?"],
         cancels=[msg_submit_event, submit_click_event],
         queue=False,
     )
+    # TODO: clear conversation memory as well
     clear.click(lambda: None, None, chatbot, queue=False)
     with gr.Accordion("For Chat/Translation API", open=False, visible=False):
         api_btn = gr.Button("Go", variant="primary")
         out_text = gr.Text()
+    if conversation_api is not None:
+        api_btn.click(
+            predict_api,
+            input_text,
+            out_text,
+            api_name="api",
+        )
     # block.load(update_buff, [], buff, every=1)
     # block.load(update_buff, [buff_var], [buff_var, buff], every=1)