dkdaniz commited on
Commit
ecf5eb4
1 Parent(s): d13603d

Update constants.py

Browse files
Files changed (1) hide show
  1. constants.py +4 -4
constants.py CHANGED
@@ -33,8 +33,8 @@ MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
33
 
34
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
35
 
36
- N_GPU_LAYERS = 8 # Llama-2-70B has 83 layers
37
- N_BATCH = 16
38
 
39
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
40
  # N_GPU_LAYERS = 20
@@ -56,7 +56,7 @@ DOCUMENT_MAP = {
56
  }
57
 
58
  # Default Instructor Model
59
- #EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
60
 
61
  ####
62
  #### OTHER EMBEDDING MODEL OPTIONS
@@ -71,7 +71,7 @@ DOCUMENT_MAP = {
71
  #### MULTILINGUAL EMBEDDING MODELS
72
  ####
73
 
74
- EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
75
  # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
76
 
77
 
 
33
 
34
  #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
35
 
36
+ N_GPU_LAYERS = 20 # Llama-2-70B has 83 layers
37
+ N_BATCH = 512
38
 
39
  ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
40
  # N_GPU_LAYERS = 20
 
56
  }
57
 
58
  # Default Instructor Model
59
+ EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
60
 
61
  ####
62
  #### OTHER EMBEDDING MODEL OPTIONS
 
71
  #### MULTILINGUAL EMBEDDING MODELS
72
  ####
73
 
74
+ # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM
75
  # EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM
76
 
77