Yingxu He commited on
Commit
97830bc
·
verified ·
1 Parent(s): 1ccdf9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -18
app.py CHANGED
@@ -5,7 +5,6 @@ import time
5
  from langchain.chains import LLMChain
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain_community.llms import LlamaCpp
8
- from langchain.llms import DeepSparse
9
  from langchain_experimental.chat_models import Llama2Chat
10
 
11
  from langchain.prompts.chat import (
@@ -19,8 +18,8 @@ from langchain.schema import SystemMessage
19
  import urllib
20
 
21
  urllib.request.urlretrieve(
22
- "https://huggingface.co/hfl/chinese-alpaca-2-7b-rlhf-gguf/resolve/main/ggml-model-q6_k.gguf?download=true",
23
- "ggml-model-q6_k.gguf"
24
  )
25
 
26
  template_messages = [
@@ -32,17 +31,10 @@ template_messages = [
32
  prompt_template = ChatPromptTemplate.from_messages(template_messages)
33
 
34
  llm = LlamaCpp(
35
- model_path="ggml-model-q6_k.gguf",
36
  temperature=0.75,
37
  max_tokens=64
38
  )
39
-
40
- llm = DeepSparse(
41
- model="zoo:llama2-7b-llama2_chat_llama2_pretrain-base_quantized",
42
- model_config={"sequence_length": 2048},
43
- stop=["<|im_end|>", "<|endoftext|>"]
44
- )
45
-
46
  model = Llama2Chat(llm=llm)
47
 
48
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
@@ -57,13 +49,9 @@ def add_text(history, text):
57
  def bot(history):
58
  print(history)
59
  response = infer(history[-1][0])
60
- history[-1][1] = ""
61
-
62
- for character in response:
63
- history[-1][1] += character
64
- time.sleep(0.05)
65
- print(history)
66
- yield history
67
 
68
 
69
  def infer(question):
 
5
  from langchain.chains import LLMChain
6
  from langchain.memory import ConversationBufferMemory
7
  from langchain_community.llms import LlamaCpp
 
8
  from langchain_experimental.chat_models import Llama2Chat
9
 
10
  from langchain.prompts.chat import (
 
18
  import urllib
19
 
20
  urllib.request.urlretrieve(
21
+ "https://huggingface.co/hfl/chinese-alpaca-2-1.3b-gguf/resolve/main/ggml-model-q8_0.gguf?download=true",
22
+ "ggml-model-q8_0.gguf"
23
  )
24
 
25
  template_messages = [
 
31
  prompt_template = ChatPromptTemplate.from_messages(template_messages)
32
 
33
  llm = LlamaCpp(
34
+ model_path="ggml-model-q8_0.gguf",
35
  temperature=0.75,
36
  max_tokens=64
37
  )
 
 
 
 
 
 
 
38
  model = Llama2Chat(llm=llm)
39
 
40
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
49
  def bot(history):
50
  print(history)
51
  response = infer(history[-1][0])
52
+ history[-1][1] = response
53
+
54
+ return response
 
 
 
 
55
 
56
 
57
  def infer(question):