xu song
commited on
Commit
•
c619300
1
Parent(s):
6729913
update
Browse files- models/cpp_qwen2.py +11 -6
- requirements.txt +1 -0
models/cpp_qwen2.py
CHANGED
@@ -66,10 +66,11 @@ llama_print_timings: total time = 22517.08 ms / 96 tokens
|
|
66 |
import json
|
67 |
import copy
|
68 |
import os
|
69 |
-
|
70 |
-
from models.base_model import Simulator
|
71 |
import llama_cpp
|
72 |
from transformers import AutoTokenizer
|
|
|
|
|
73 |
from utils.logging_util import logger
|
74 |
import config
|
75 |
|
@@ -206,11 +207,11 @@ class Qwen2Simulator(Simulator):
|
|
206 |
max_tokens=1,
|
207 |
top_k=1
|
208 |
)
|
209 |
-
logger.info(f"cache size {self.llm.cache.cache_size}"
|
|
|
|
|
|
|
210 |
|
211 |
-
# disable cache after
|
212 |
-
llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
|
213 |
-
llama_cpp.Llama.save_state = lambda *args: None
|
214 |
|
215 |
def post_cache(self, suffix_tokens):
|
216 |
""" warmup for next turn generation
|
@@ -223,6 +224,10 @@ class Qwen2Simulator(Simulator):
|
|
223 |
logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
|
224 |
|
225 |
|
|
|
|
|
|
|
|
|
226 |
bot = Qwen2Simulator()
|
227 |
|
228 |
if __name__ == "__main__":
|
|
|
66 |
import json
|
67 |
import copy
|
68 |
import os
|
69 |
+
import psutil
|
|
|
70 |
import llama_cpp
|
71 |
from transformers import AutoTokenizer
|
72 |
+
|
73 |
+
from models.base_model import Simulator
|
74 |
from utils.logging_util import logger
|
75 |
import config
|
76 |
|
|
|
207 |
max_tokens=1,
|
208 |
top_k=1
|
209 |
)
|
210 |
+
logger.info(f"cache size {self.llm.cache.cache_size}, process_mem: "
|
211 |
+
f"{psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024:.2f} GB")
|
212 |
+
|
213 |
+
self._disable_cache()
|
214 |
|
|
|
|
|
|
|
215 |
|
216 |
def post_cache(self, suffix_tokens):
|
217 |
""" warmup for next turn generation
|
|
|
224 |
logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
|
225 |
|
226 |
|
227 |
+
def _disable_cache(self):
|
228 |
+
llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
|
229 |
+
llama_cpp.Llama.save_state = lambda *args: None
|
230 |
+
|
231 |
bot = Qwen2Simulator()
|
232 |
|
233 |
if __name__ == "__main__":
|
requirements.txt
CHANGED
@@ -2,4 +2,5 @@ huggingface_hub==0.22.2
|
|
2 |
transformers
|
3 |
torch
|
4 |
accelerate
|
|
|
5 |
git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|
|
|
2 |
transformers
|
3 |
torch
|
4 |
accelerate
|
5 |
+
psutil
|
6 |
git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"
|