Spaces:
Sleeping
Sleeping
import requests | |
from typing import List, Dict, Any, Optional, Union | |
from .exceptions import UnauthorizedError | |
class ChatClient: | |
def __init__(self, base_url: str, api_key: Optional[str] = None): | |
""" | |
Initialize the ChatClient. | |
Args: | |
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000") | |
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token. | |
""" | |
self._base_url = base_url.rstrip("/") # Remove trailing slash if present | |
self._api_key = api_key | |
def _get_headers(self) -> Dict[str, str]: | |
""" | |
Get the headers for API requests, including authorization if api_key is set. | |
Returns: | |
Dict[str, str]: Headers to use for API requests | |
""" | |
headers = {"Content-Type": "application/json"} | |
if self._api_key: | |
headers["Authorization"] = f"Bearer {self._api_key}" | |
return headers | |
def completions( | |
self, | |
model: str, | |
messages: List[Dict[str, str]], | |
temperature: Optional[float] = None, | |
top_p: Optional[float] = None, | |
n: Optional[int] = None, | |
max_tokens: Optional[int] = None, | |
presence_penalty: Optional[float] = None, | |
frequency_penalty: Optional[float] = None, | |
user: Optional[str] = None, | |
return_request: bool = False, | |
) -> Union[Dict[str, Any], requests.Request]: | |
""" | |
Create a chat completion. | |
Args: | |
model (str): The model to use for completion | |
messages (List[Dict[str, str]]): The messages to generate a completion for | |
temperature (Optional[float]): Sampling temperature between 0 and 2 | |
top_p (Optional[float]): Nucleus sampling parameter between 0 and 1 | |
n (Optional[int]): Number of completions to generate | |
max_tokens (Optional[int]): Maximum number of tokens to generate | |
presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0 | |
frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0 | |
user (Optional[str]): Unique identifier for the end user | |
return_request (bool): If True, returns the prepared request object instead of executing it | |
Returns: | |
Union[Dict[str, Any], requests.Request]: Either the completion response from the server or | |
a prepared request object if return_request is True | |
Raises: | |
UnauthorizedError: If the request fails with a 401 status code | |
requests.exceptions.RequestException: If the request fails with any other error | |
""" | |
url = f"{self._base_url}/chat/completions" | |
# Build request data with required fields | |
data: Dict[str, Any] = {"model": model, "messages": messages} | |
# Add optional parameters if provided | |
if temperature is not None: | |
data["temperature"] = temperature | |
if top_p is not None: | |
data["top_p"] = top_p | |
if n is not None: | |
data["n"] = n | |
if max_tokens is not None: | |
data["max_tokens"] = max_tokens | |
if presence_penalty is not None: | |
data["presence_penalty"] = presence_penalty | |
if frequency_penalty is not None: | |
data["frequency_penalty"] = frequency_penalty | |
if user is not None: | |
data["user"] = user | |
request = requests.Request("POST", url, headers=self._get_headers(), json=data) | |
if return_request: | |
return request | |
# Prepare and send the request | |
session = requests.Session() | |
try: | |
response = session.send(request.prepare()) | |
response.raise_for_status() | |
return response.json() | |
except requests.exceptions.HTTPError as e: | |
if e.response.status_code == 401: | |
raise UnauthorizedError(e) | |
raise | |