Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
•
9ed25b8
1
Parent(s):
ba8f696
fix: memory error
Browse files- constants.py +4 -4
- load_models.py +2 -1
constants.py
CHANGED
@@ -101,11 +101,11 @@ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Ac
|
|
101 |
# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
102 |
# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
103 |
|
104 |
-
|
105 |
-
|
106 |
|
107 |
-
MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
108 |
-
MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
|
109 |
|
110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
|
|
101 |
# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
|
102 |
# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
|
103 |
|
104 |
+
MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
|
105 |
+
MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
|
106 |
|
107 |
+
# MODEL_ID = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
|
108 |
+
# MODEL_BASENAME = "mistral-7b-instruct-v0.1.Q8_0.gguf"
|
109 |
|
110 |
# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
|
111 |
# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
|
load_models.py
CHANGED
@@ -58,13 +58,14 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
|
|
61 |
# set this based on your GPU & CPU RAM
|
62 |
}
|
63 |
if device_type.lower() == "mps":
|
64 |
kwargs["n_gpu_layers"] = 1
|
65 |
if device_type.lower() == "cuda":
|
66 |
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
67 |
-
|
68 |
|
69 |
# kwargs["stream"] = stream
|
70 |
|
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
61 |
+
"n_batch": N_BATCH,
|
62 |
# set this based on your GPU & CPU RAM
|
63 |
}
|
64 |
if device_type.lower() == "mps":
|
65 |
kwargs["n_gpu_layers"] = 1
|
66 |
if device_type.lower() == "cuda":
|
67 |
kwargs["n_gpu_layers"] = N_GPU_LAYERS
|
68 |
+
# set this based on your GPU
|
69 |
|
70 |
# kwargs["stream"] = stream
|
71 |
|