Spaces:
Paused
Paused
Update constants.py
Browse files- constants.py +4 -4
constants.py
CHANGED
@@ -33,8 +33,8 @@ MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
|
33 |
|
34 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
35 |
|
36 |
-
N_GPU_LAYERS =
|
37 |
-
N_BATCH =
|
38 |
|
39 |
### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
|
40 |
# N_GPU_LAYERS = 20
|
@@ -56,7 +56,7 @@ DOCUMENT_MAP = {
|
|
56 |
}
|
57 |
|
58 |
# Default Instructor Model
|
59 |
-
|
60 |
|
61 |
####
|
62 |
#### OTHER EMBEDDING MODEL OPTIONS
|
@@ -71,7 +71,7 @@ DOCUMENT_MAP = {
|
|
71 |
#### MULTILINGUAL EMBEDDING MODELS
|
72 |
####
|
73 |
|
74 |
-
EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
|
75 |
# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
|
76 |
|
77 |
|
|
|
33 |
|
34 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
35 |
|
36 |
+
N_GPU_LAYERS = 20 # Llama-2-70B has 83 layers
|
37 |
+
N_BATCH = 512
|
38 |
|
39 |
### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
|
40 |
# N_GPU_LAYERS = 20
|
|
|
56 |
}
|
57 |
|
58 |
# Default Instructor Model
|
59 |
+
EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
|
60 |
|
61 |
####
|
62 |
#### OTHER EMBEDDING MODEL OPTIONS
|
|
|
71 |
#### MULTILINGUAL EMBEDDING MODELS
|
72 |
####
|
73 |
|
74 |
+
# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
|
75 |
# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
|
76 |
|
77 |
|