Spaces:

xu-song
/

self-chat

Running

xu song commited on Aug 24, 2024

Commit

c619300

•

1 Parent(s): 6729913

update

Files changed (2) hide show

models/cpp_qwen2.py CHANGED Viewed

@@ -66,10 +66,11 @@ llama_print_timings:       total time =   22517.08 ms /    96 tokens
 import json
 import copy
 import os
-from models.base_model import Simulator
 import llama_cpp
 from transformers import AutoTokenizer
 from utils.logging_util import logger
 import config
@@ -206,11 +207,11 @@ class Qwen2Simulator(Simulator):
                 max_tokens=1,
                 top_k=1
             )
-            logger.info(f"cache size {self.llm.cache.cache_size}")
-        # disable cache after
-        llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
-        llama_cpp.Llama.save_state = lambda *args: None
     def post_cache(self, suffix_tokens):
         """ warmup for next turn generation
@@ -223,6 +224,10 @@ class Qwen2Simulator(Simulator):
             logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
 bot = Qwen2Simulator()
 if __name__ == "__main__":

 import json
 import copy
 import os
+import psutil
 import llama_cpp
 from transformers import AutoTokenizer
+from models.base_model import Simulator
 from utils.logging_util import logger
 import config
                 max_tokens=1,
                 top_k=1
             )
+            logger.info(f"cache size {self.llm.cache.cache_size}, process_mem: "
+                        f"{psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024 / 1024:.2f} GB")
+        self._disable_cache()
     def post_cache(self, suffix_tokens):
         """ warmup for next turn generation
             logger.info(f"after warmup: n_tokens = {self.llm.n_tokens}")
+    def _disable_cache(self):
+        llama_cpp.LlamaRAMCache.__setitem__ = lambda *args: None
+        llama_cpp.Llama.save_state = lambda *args: None
 bot = Qwen2Simulator()
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -2,4 +2,5 @@ huggingface_hub==0.22.2
 transformers
 torch
 accelerate
 git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"

 transformers
 torch
 accelerate
+psutil
 git+https://github.com/xu-song/llama-cpp-python.git -C cmake.args="-DGGML_BLAS=ON;-DGGML_BLAS_VENDOR=OpenBLAS"