muryshev commited on
Commit
ad9c449
1 Parent(s): bd38085

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -3
app.py CHANGED
@@ -21,8 +21,6 @@ HF_CACHE_DIR = os.environ.get('HF_CACHE_DIR') or '/home/user/app/.cache'
21
  USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', '').lower() == "true" or False
22
  ENABLE_GPU = os.environ.get('ENABLE_GPU', '').lower() == "true" or False
23
  GPU_LAYERS = int(os.environ.get('GPU_LAYERS', '0'))
24
- n_gqa_str = os.environ.get('N_GQA')
25
- N_GQA = 8#int(n_gqa_str) if n_gqa_str is not None else None #must be set to 8 for 70b models
26
  CHAT_FORMAT = os.environ.get('CHAT_FORMAT') or 'llama-2'
27
  REPO_NAME = os.environ.get('REPO_NAME') or 'IlyaGusev/saiga2_7b_gguf'
28
  MODEL_NAME = os.environ.get('MODEL_NAME') or 'model-q4_K.gguf'
@@ -155,7 +153,7 @@ def generate_response():
155
  return Response(generate_and_log_tokens(user_request='1', generator=generator), content_type='text/plain', status=200, direct_passthrough=True)
156
 
157
  def init_model():
158
- llm.load_model(model_path=MODEL_PATH, context_size=CONTEXT_SIZE, enable_gpu=ENABLE_GPU, gpu_layer_number=GPU_LAYERS, n_gqa=N_GQA)
159
 
160
  # Function to check if no requests were made in the last 5 minutes
161
  def check_last_request_time():
 
21
  USE_SYSTEM_PROMPT = os.environ.get('USE_SYSTEM_PROMPT', '').lower() == "true" or False
22
  ENABLE_GPU = os.environ.get('ENABLE_GPU', '').lower() == "true" or False
23
  GPU_LAYERS = int(os.environ.get('GPU_LAYERS', '0'))
 
 
24
  CHAT_FORMAT = os.environ.get('CHAT_FORMAT') or 'llama-2'
25
  REPO_NAME = os.environ.get('REPO_NAME') or 'IlyaGusev/saiga2_7b_gguf'
26
  MODEL_NAME = os.environ.get('MODEL_NAME') or 'model-q4_K.gguf'
 
153
  return Response(generate_and_log_tokens(user_request='1', generator=generator), content_type='text/plain', status=200, direct_passthrough=True)
154
 
155
  def init_model():
156
+ llm.load_model(model_path=MODEL_PATH, context_size=CONTEXT_SIZE, enable_gpu=ENABLE_GPU, gpu_layer_number=GPU_LAYERS)
157
 
158
  # Function to check if no requests were made in the last 5 minutes
159
  def check_last_request_time():