talk-to-me

Sleeping

Yingxu He commited on Jan 31, 2024

Commit

97830bc

verified ·

1 Parent(s): 1ccdf9c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ import time
 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import LlamaCpp
-from langchain.llms import DeepSparse
 from langchain_experimental.chat_models import Llama2Chat
 from langchain.prompts.chat import (
@@ -19,8 +18,8 @@ from langchain.schema import SystemMessage
 import urllib
 urllib.request.urlretrieve(
-    "https://huggingface.co/hfl/chinese-alpaca-2-7b-rlhf-gguf/resolve/main/ggml-model-q6_k.gguf?download=true",
-    "ggml-model-q6_k.gguf"
     )
 template_messages = [
@@ -32,17 +31,10 @@ template_messages = [
 prompt_template = ChatPromptTemplate.from_messages(template_messages)
 llm = LlamaCpp(
-    model_path="ggml-model-q6_k.gguf",
     temperature=0.75,
     max_tokens=64
 )
-llm = DeepSparse(
-    model="zoo:llama2-7b-llama2_chat_llama2_pretrain-base_quantized",
-    model_config={"sequence_length": 2048},
-    stop=["<|im_end|>", "<|endoftext|>"]
-)
 model = Llama2Chat(llm=llm)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -57,13 +49,9 @@ def add_text(history, text):
 def bot(history):
     print(history)
     response = infer(history[-1][0])
-    history[-1][1] = ""
-    for character in response:
-        history[-1][1] += character
-        time.sleep(0.05)
-        print(history)
-        yield history
 def infer(question):

 from langchain.chains import LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain_community.llms import LlamaCpp
 from langchain_experimental.chat_models import Llama2Chat
 from langchain.prompts.chat import (
 import urllib
 urllib.request.urlretrieve(
+    "https://huggingface.co/hfl/chinese-alpaca-2-1.3b-gguf/resolve/main/ggml-model-q8_0.gguf?download=true",
+    "ggml-model-q8_0.gguf"
     )
 template_messages = [
 prompt_template = ChatPromptTemplate.from_messages(template_messages)
 llm = LlamaCpp(
+    model_path="ggml-model-q8_0.gguf",
     temperature=0.75,
     max_tokens=64
 )
 model = Llama2Chat(llm=llm)
 memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 def bot(history):
     print(history)
     response = infer(history[-1][0])
+    history[-1][1] = response
+    return response
 def infer(question):