Spaces:
Sleeping
Sleeping
File size: 3,996 Bytes
469eae6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import requests
from typing import List, Dict, Any, Optional, Union
from .exceptions import UnauthorizedError
class ChatClient:
def __init__(self, base_url: str, api_key: Optional[str] = None):
"""
Initialize the ChatClient.
Args:
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
"""
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
self._api_key = api_key
def _get_headers(self) -> Dict[str, str]:
"""
Get the headers for API requests, including authorization if api_key is set.
Returns:
Dict[str, str]: Headers to use for API requests
"""
headers = {"Content-Type": "application/json"}
if self._api_key:
headers["Authorization"] = f"Bearer {self._api_key}"
return headers
def completions(
self,
model: str,
messages: List[Dict[str, str]],
temperature: Optional[float] = None,
top_p: Optional[float] = None,
n: Optional[int] = None,
max_tokens: Optional[int] = None,
presence_penalty: Optional[float] = None,
frequency_penalty: Optional[float] = None,
user: Optional[str] = None,
return_request: bool = False,
) -> Union[Dict[str, Any], requests.Request]:
"""
Create a chat completion.
Args:
model (str): The model to use for completion
messages (List[Dict[str, str]]): The messages to generate a completion for
temperature (Optional[float]): Sampling temperature between 0 and 2
top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
n (Optional[int]): Number of completions to generate
max_tokens (Optional[int]): Maximum number of tokens to generate
presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
user (Optional[str]): Unique identifier for the end user
return_request (bool): If True, returns the prepared request object instead of executing it
Returns:
Union[Dict[str, Any], requests.Request]: Either the completion response from the server or
a prepared request object if return_request is True
Raises:
UnauthorizedError: If the request fails with a 401 status code
requests.exceptions.RequestException: If the request fails with any other error
"""
url = f"{self._base_url}/chat/completions"
# Build request data with required fields
data: Dict[str, Any] = {"model": model, "messages": messages}
# Add optional parameters if provided
if temperature is not None:
data["temperature"] = temperature
if top_p is not None:
data["top_p"] = top_p
if n is not None:
data["n"] = n
if max_tokens is not None:
data["max_tokens"] = max_tokens
if presence_penalty is not None:
data["presence_penalty"] = presence_penalty
if frequency_penalty is not None:
data["frequency_penalty"] = frequency_penalty
if user is not None:
data["user"] = user
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
if return_request:
return request
# Prepare and send the request
session = requests.Session()
try:
response = session.send(request.prepare())
response.raise_for_status()
return response.json()
except requests.exceptions.HTTPError as e:
if e.response.status_code == 401:
raise UnauthorizedError(e)
raise
|