Daniel Marques commited on
Commit
9ed25b8
1 Parent(s): ba8f696

fix: memory error

Browse files
Files changed (2) hide show
  1. constants.py +4 -4
  2. load_models.py +2 -1
constants.py CHANGED
@@ -101,11 +101,11 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Ac
101
  # MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
102
  # MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
103
 
104
- # MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
105
- # MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
106
 
107
- MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
108
- MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
109
 
110
  # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
111
  # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
 
101
  # MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
102
  # MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
103
 
104
+ MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
105
+ MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
106
 
107
+ # MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
108
+ # MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
109
 
110
  # MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
111
  # MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
load_models.py CHANGED
@@ -58,13 +58,14 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
 
61
  # set this based on your GPU & CPU RAM
62
  }
63
  if device_type.lower() == "mps":
64
  kwargs["n_gpu_layers"] = 1
65
  if device_type.lower() == "cuda":
66
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
67
- kwargs["n_batch"] = N_BATCH # set this based on your GPU
68
 
69
  # kwargs["stream"] = stream
70
 
 
58
  "model_path": model_path,
59
  "n_ctx": CONTEXT_WINDOW_SIZE,
60
  "max_tokens": MAX_NEW_TOKENS,
61
+ "n_batch": N_BATCH,
62
  # set this based on your GPU & CPU RAM
63
  }
64
  if device_type.lower() == "mps":
65
  kwargs["n_gpu_layers"] = 1
66
  if device_type.lower() == "cuda":
67
  kwargs["n_gpu_layers"] = N_GPU_LAYERS
68
+ # set this based on your GPU
69
 
70
  # kwargs["stream"] = stream
71