eggacheb's picture
Upload 105 files
1ea2ba0 verified
raw
history blame contribute delete
No virus
1.88 kB
import json
import logging
import textwrap
import uuid
from ollama import Client
from modules.presets import i18n
from ..index_func import construct_index
from ..utils import count_token
from .base_model import BaseLLMModel
class OllamaClient(BaseLLMModel):
def __init__(self, model_name, user_name="", ollama_host="", backend_model="") -> None:
super().__init__(model_name=model_name, user=user_name)
self.backend_model = backend_model
self.ollama_host = ollama_host
self.update_token_limit()
def get_model_list(self):
client = Client(host=self.ollama_host)
return client.list()
def update_token_limit(self):
lower_model_name = self.backend_model.lower()
if "mistral" in lower_model_name:
self.token_upper_limit = 8*1024
elif "gemma" in lower_model_name:
self.token_upper_limit = 8*1024
elif "codellama" in lower_model_name:
self.token_upper_limit = 4*1024
elif "llama2-chinese" in lower_model_name:
self.token_upper_limit = 4*1024
elif "llama2" in lower_model_name:
self.token_upper_limit = 4*1024
elif "mixtral" in lower_model_name:
self.token_upper_limit = 32*1024
elif "llava" in lower_model_name:
self.token_upper_limit = 4*1024
def get_answer_stream_iter(self):
if self.backend_model == "":
return i18n("请先选择Ollama后端模型\n\n")
client = Client(host=self.ollama_host)
response = client.chat(model=self.backend_model, messages=self.history,stream=True)
partial_text = ""
for i in response:
response = i['message']['content']
partial_text += response
yield partial_text
self.all_token_counts[-1] = count_token(partial_text)
yield partial_text