KevinHuSh commited on
Commit
defd4c5
·
1 Parent(s): dc07f22

set ollama keep_alive (#985)

Browse files

### What problem does this PR solve?

#980

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

Files changed (2) hide show
  1. api/db/init_data.py +2 -2
  2. rag/llm/chat_model.py +4 -2
api/db/init_data.py CHANGED
@@ -532,8 +532,8 @@ def init_llm_factory():
532
  {
533
  "fid": factory_infos[12]["name"],
534
  "llm_name": "BAAI/bge-reranker-v2-m3",
535
- "tags": "LLM,CHAT,",
536
- "max_tokens": 16385,
537
  "model_type": LLMType.RERANK.value
538
  },
539
  ]
 
532
  {
533
  "fid": factory_infos[12]["name"],
534
  "llm_name": "BAAI/bge-reranker-v2-m3",
535
+ "tags": "RE-RANK,2k",
536
+ "max_tokens": 2048,
537
  "model_type": LLMType.RERANK.value
538
  },
539
  ]
rag/llm/chat_model.py CHANGED
@@ -303,7 +303,8 @@ class OllamaChat(Base):
303
  response = self.client.chat(
304
  model=self.model_name,
305
  messages=history,
306
- options=options
 
307
  )
308
  ans = response["message"]["content"].strip()
309
  return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
@@ -325,7 +326,8 @@ class OllamaChat(Base):
325
  model=self.model_name,
326
  messages=history,
327
  stream=True,
328
- options=options
 
329
  )
330
  for resp in response:
331
  if resp["done"]:
 
303
  response = self.client.chat(
304
  model=self.model_name,
305
  messages=history,
306
+ options=options,
307
+ keep_alive=-1
308
  )
309
  ans = response["message"]["content"].strip()
310
  return ans, response["eval_count"] + response.get("prompt_eval_count", 0)
 
326
  model=self.model_name,
327
  messages=history,
328
  stream=True,
329
+ options=options,
330
+ keep_alive=-1
331
  )
332
  for resp in response:
333
  if resp["done"]: