Spaces:
Sleeping
Sleeping
Yingxu He
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ import time
|
|
5 |
from langchain.chains import LLMChain
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain_community.llms import LlamaCpp
|
8 |
-
from langchain.llms import DeepSparse
|
9 |
from langchain_experimental.chat_models import Llama2Chat
|
10 |
|
11 |
from langchain.prompts.chat import (
|
@@ -19,8 +18,8 @@ from langchain.schema import SystemMessage
|
|
19 |
import urllib
|
20 |
|
21 |
urllib.request.urlretrieve(
|
22 |
-
"https://huggingface.co/hfl/chinese-alpaca-2-
|
23 |
-
"ggml-model-
|
24 |
)
|
25 |
|
26 |
template_messages = [
|
@@ -32,17 +31,10 @@ template_messages = [
|
|
32 |
prompt_template = ChatPromptTemplate.from_messages(template_messages)
|
33 |
|
34 |
llm = LlamaCpp(
|
35 |
-
model_path="ggml-model-
|
36 |
temperature=0.75,
|
37 |
max_tokens=64
|
38 |
)
|
39 |
-
|
40 |
-
llm = DeepSparse(
|
41 |
-
model="zoo:llama2-7b-llama2_chat_llama2_pretrain-base_quantized",
|
42 |
-
model_config={"sequence_length": 2048},
|
43 |
-
stop=["<|im_end|>", "<|endoftext|>"]
|
44 |
-
)
|
45 |
-
|
46 |
model = Llama2Chat(llm=llm)
|
47 |
|
48 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
@@ -57,13 +49,9 @@ def add_text(history, text):
|
|
57 |
def bot(history):
|
58 |
print(history)
|
59 |
response = infer(history[-1][0])
|
60 |
-
history[-1][1] =
|
61 |
-
|
62 |
-
|
63 |
-
history[-1][1] += character
|
64 |
-
time.sleep(0.05)
|
65 |
-
print(history)
|
66 |
-
yield history
|
67 |
|
68 |
|
69 |
def infer(question):
|
|
|
5 |
from langchain.chains import LLMChain
|
6 |
from langchain.memory import ConversationBufferMemory
|
7 |
from langchain_community.llms import LlamaCpp
|
|
|
8 |
from langchain_experimental.chat_models import Llama2Chat
|
9 |
|
10 |
from langchain.prompts.chat import (
|
|
|
18 |
import urllib
|
19 |
|
20 |
urllib.request.urlretrieve(
|
21 |
+
"https://huggingface.co/hfl/chinese-alpaca-2-1.3b-gguf/resolve/main/ggml-model-q8_0.gguf?download=true",
|
22 |
+
"ggml-model-q8_0.gguf"
|
23 |
)
|
24 |
|
25 |
template_messages = [
|
|
|
31 |
prompt_template = ChatPromptTemplate.from_messages(template_messages)
|
32 |
|
33 |
llm = LlamaCpp(
|
34 |
+
model_path="ggml-model-q8_0.gguf",
|
35 |
temperature=0.75,
|
36 |
max_tokens=64
|
37 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
model = Llama2Chat(llm=llm)
|
39 |
|
40 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
|
|
49 |
def bot(history):
|
50 |
print(history)
|
51 |
response = infer(history[-1][0])
|
52 |
+
history[-1][1] = response
|
53 |
+
|
54 |
+
return response
|
|
|
|
|
|
|
|
|
55 |
|
56 |
|
57 |
def infer(question):
|