lukestanley commited on
Commit
e327a9e
1 Parent(s): e01e28e

Update default GPU layer, temperature values

Browse files
Files changed (1) hide show
  1. utils.py +2 -2
utils.py CHANGED
@@ -19,12 +19,12 @@ from llama_cpp import Llama, LlamaGrammar, json_schema_to_gbnf
19
  URL = "http://localhost:5834/v1/chat/completions"
20
  in_memory_llm = None
21
 
22
- N_GPU_LAYERS = env.get("N_GPU_LAYERS", 10)
23
  CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
24
  LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
25
  USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
26
  MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
27
- TEMPERATURE = float(env.get("TEMPERATURE", 0.7))
28
 
29
  if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
30
  print(f"Using local model from {LLM_MODEL_PATH}")
 
19
  URL = "http://localhost:5834/v1/chat/completions"
20
  in_memory_llm = None
21
 
22
+ N_GPU_LAYERS = env.get("N_GPU_LAYERS", -1) # Default to -1, which means use all layers if available
23
  CONTEXT_SIZE = int(env.get("CONTEXT_SIZE", 4096))
24
  LLM_MODEL_PATH = env.get("LLM_MODEL_PATH", None)
25
  USE_HTTP_SERVER = env.get("USE_HTTP_SERVER", "false").lower() == "true"
26
  MAX_TOKENS = int(env.get("MAX_TOKENS", 1000))
27
+ TEMPERATURE = float(env.get("TEMPERATURE", 0.3))
28
 
29
  if LLM_MODEL_PATH and len(LLM_MODEL_PATH) > 0:
30
  print(f"Using local model from {LLM_MODEL_PATH}")