xu song commited on
Commit
f29252d
1 Parent(s): f60f1b9
Files changed (1) hide show
  1. models/cpp_qwen2.py +5 -5
models/cpp_qwen2.py CHANGED
@@ -6,6 +6,7 @@ https://github.com/awinml/llama-cpp-python-bindings
6
  from simulator import Simulator
7
  from llama_cpp import Llama
8
  import llama_cpp.llama_tokenizer
 
9
 
10
 
11
  class Qwen2Simulator(Simulator):
@@ -20,14 +21,13 @@ class Qwen2Simulator(Simulator):
20
  # verbose=False,
21
  # )
22
 
 
23
  self.llm = Llama(
24
  model_path="/workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf",
25
  # n_gpu_layers=-1, # Uncomment to use GPU acceleration
26
  # seed=1337, # Uncomment to set a specific seed
27
  # n_ctx=2048, # Uncomment to increase the context window
28
- tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
29
- "/workspace/czy/model_weights/Qwen1.5-0.5B-Chat/"
30
- ),
31
  verbose=False,
32
  )
33
 
@@ -38,7 +38,7 @@ class Qwen2Simulator(Simulator):
38
  :return:
39
  """
40
  assert messages[-1]["role"] != "user"
41
- inputs = self.tokenizer.apply_chat_template(
42
  messages,
43
  tokenize=False,
44
  add_generation_prompt=False,
@@ -50,7 +50,7 @@ class Qwen2Simulator(Simulator):
50
 
51
  def generate_response(self, messages):
52
  assert messages[-1]["role"] == "user"
53
- inputs = self.tokenizer.apply_chat_template(
54
  messages,
55
  tokenize=False,
56
  add_generation_prompt=True
 
6
  from simulator import Simulator
7
  from llama_cpp import Llama
8
  import llama_cpp.llama_tokenizer
9
+ from transformers import AutoTokenizer
10
 
11
 
12
  class Qwen2Simulator(Simulator):
 
21
  # verbose=False,
22
  # )
23
 
24
+ self.hf_tokenizer = AutoTokenizer.from_pretrained("/workspace/czy/model_weights/Qwen1.5-0.5B-Chat/")
25
  self.llm = Llama(
26
  model_path="/workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf",
27
  # n_gpu_layers=-1, # Uncomment to use GPU acceleration
28
  # seed=1337, # Uncomment to set a specific seed
29
  # n_ctx=2048, # Uncomment to increase the context window
30
+ tokenizer=self.hf_tokenizer,
 
 
31
  verbose=False,
32
  )
33
 
 
38
  :return:
39
  """
40
  assert messages[-1]["role"] != "user"
41
+ inputs = self.hf_tokenizer.apply_chat_template(
42
  messages,
43
  tokenize=False,
44
  add_generation_prompt=False,
 
50
 
51
  def generate_response(self, messages):
52
  assert messages[-1]["role"] == "user"
53
+ inputs = self.hf_tokenizer.apply_chat_template(
54
  messages,
55
  tokenize=False,
56
  add_generation_prompt=True