|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations |
|
|
|
|
|
from typing import Sequence, Type, Union |
|
|
|
|
|
from pydantic import BaseModel |
|
|
|
|
|
from camel.configs.base_config import BaseConfig |
|
|
from camel.types import NOT_GIVEN, NotGiven |
|
|
|
|
|
|
|
|
class OllamaConfig(BaseConfig): |
|
|
r"""Defines the parameters for generating chat completions using OpenAI |
|
|
compatibility |
|
|
|
|
|
Reference: https://github.com/ollama/ollama/blob/main/docs/openai.md |
|
|
|
|
|
Args: |
|
|
temperature (float, optional): Sampling temperature to use, between |
|
|
:obj:`0` and :obj:`2`. Higher values make the output more random, |
|
|
while lower values make it more focused and deterministic. |
|
|
(default: :obj:`0.2`) |
|
|
top_p (float, optional): An alternative to sampling with temperature, |
|
|
called nucleus sampling, where the model considers the results of |
|
|
the tokens with top_p probability mass. So :obj:`0.1` means only |
|
|
the tokens comprising the top 10% probability mass are considered. |
|
|
(default: :obj:`1.0`) |
|
|
response_format (object, optional): An object specifying the format |
|
|
that the model must output. Compatible with GPT-4 Turbo and all |
|
|
GPT-3.5 Turbo models newer than gpt-3.5-turbo-1106. Setting to |
|
|
{"type": "json_object"} enables JSON mode, which guarantees the |
|
|
message the model generates is valid JSON. Important: when using |
|
|
JSON mode, you must also instruct the model to produce JSON |
|
|
yourself via a system or user message. Without this, the model |
|
|
may generate an unending stream of whitespace until the generation |
|
|
reaches the token limit, resulting in a long-running and seemingly |
|
|
"stuck" request. Also note that the message content may be |
|
|
partially cut off if finish_reason="length", which indicates the |
|
|
generation exceeded max_tokens or the conversation exceeded the |
|
|
max context length. |
|
|
stream (bool, optional): If True, partial message deltas will be sent |
|
|
as data-only server-sent events as they become available. |
|
|
(default: :obj:`False`) |
|
|
stop (str or list, optional): Up to :obj:`4` sequences where the API |
|
|
will stop generating further tokens. (default: :obj:`None`) |
|
|
max_tokens (int, optional): The maximum number of tokens to generate |
|
|
in the chat completion. The total length of input tokens and |
|
|
generated tokens is limited by the model's context length. |
|
|
(default: :obj:`None`) |
|
|
presence_penalty (float, optional): Number between :obj:`-2.0` and |
|
|
:obj:`2.0`. Positive values penalize new tokens based on whether |
|
|
they appear in the text so far, increasing the model's likelihood |
|
|
to talk about new topics. See more information about frequency and |
|
|
presence penalties. (default: :obj:`0.0`) |
|
|
frequency_penalty (float, optional): Number between :obj:`-2.0` and |
|
|
:obj:`2.0`. Positive values penalize new tokens based on their |
|
|
existing frequency in the text so far, decreasing the model's |
|
|
likelihood to repeat the same line verbatim. See more information |
|
|
about frequency and presence penalties. (default: :obj:`0.0`) |
|
|
""" |
|
|
|
|
|
temperature: float = 0.2 |
|
|
top_p: float = 1.0 |
|
|
stream: bool = False |
|
|
stop: Union[str, Sequence[str], NotGiven] = NOT_GIVEN |
|
|
max_tokens: Union[int, NotGiven] = NOT_GIVEN |
|
|
presence_penalty: float = 0.0 |
|
|
response_format: Union[Type[BaseModel], dict, NotGiven] = NOT_GIVEN |
|
|
frequency_penalty: float = 0.0 |
|
|
|
|
|
|
|
|
OLLAMA_API_PARAMS = {param for param in OllamaConfig.model_fields.keys()} |
|
|
|