xu song commited on
Commit
c619300
1 Parent(s): 6729913
Files changed (2) hide show
  1. models/cpp_qwen2.py +11 -6
  2. requirements.txt +1 -0
models/cpp_qwen2.py CHANGED
@@ -66,10 +66,11 @@ llama_print_timings: total time = 22517.08 ms / 96 tokens
66
  import json
67
  import copy
68
  import os
69
-
70
- from models.base_model import Simulator
71
  import llama_cpp
72
  from transformers import AutoTokenizer
 
 
73
  from utils.logging_util import logger
74
  import config
75
 
@@ -206,11 +207,11 @@ class Qwen2Simulator(Simulator):
206
  max_tokens=1,
207
  top_k=1
208
  )
209
- logger.info(f"cache size {self.llm.cache.cache_size}")
 
 
 
210
 
211
- # disable cache after
212
- llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
213
- llama_cpp.Llama.save_state = lambda *args: None
214
 
215
  def post_cache(self, suffix_tokens):
216
  """ warmup for next turn generation
@@ -223,6 +224,10 @@ class Qwen2Simulator(Simulator):
223
  logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
224
 
225
 
 
 
 
 
226
  bot = Qwen2Simulator()
227
 
228
  if __name__ == "__main__":
 
66
  import json
67
  import copy
68
  import os
69
+ import psutil
 
70
  import llama_cpp
71
  from transformers import AutoTokenizer
72
+
73
+ from models.base_model import Simulator
74
  from utils.logging_util import logger
75
  import config
76
 
 
207
  max_tokens=1,
208
  top_k=1
209
  )
210
+ logger.info(f"cache size {self.llm.cache.cache_size}, process_mem: "
211
+ f"{psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024:.2f} GB")
212
+
213
+ self._disable_cache()
214
 
 
 
 
215
 
216
  def post_cache(self, suffix_tokens):
217
  """ warmup for next turn generation
 
224
  logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
225
 
226
 
227
+ def _disable_cache(self):
228
+ llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
229
+ llama_cpp.Llama.save_state = lambda *args: None
230
+
231
  bot = Qwen2Simulator()
232
 
233
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -2,4 +2,5 @@ huggingface_hub==0.22.2
2
  transformers
3
  torch
4
  accelerate
 
5
  git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
 
2
  transformers
3
  torch
4
  accelerate
5
+ psutil
6
  git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"