Spaces:

ka1kuk
/

litellm

Runtime error

App Files Files Community

ka1kuk commited on Jan 15, 2024

Commit

7db0ae4

verified ·

1 Parent(s): 20a7d21

Upload 235 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

litellm/__init__.py +557 -0
litellm/_logging.py +30 -0
litellm/_redis.py +93 -0
litellm/_version.py +6 -0
litellm/budget_manager.py +206 -0
litellm/caching.py +678 -0
litellm/cost.json +5 -0
litellm/deprecated_litellm_server/.env.template +43 -0
litellm/deprecated_litellm_server/Dockerfile +10 -0
litellm/deprecated_litellm_server/README.md +3 -0
litellm/deprecated_litellm_server/__init__.py +2 -0
litellm/deprecated_litellm_server/main.py +193 -0
litellm/deprecated_litellm_server/requirements.txt +7 -0
litellm/deprecated_litellm_server/server_utils.py +85 -0
litellm/exceptions.py +200 -0
litellm/integrations/__init__.py +1 -0
litellm/integrations/aispend.py +177 -0
litellm/integrations/berrispend.py +184 -0
litellm/integrations/custom_logger.py +130 -0
litellm/integrations/dynamodb.py +92 -0
litellm/integrations/helicone.py +114 -0
litellm/integrations/langfuse.py +191 -0
litellm/integrations/langsmith.py +75 -0
litellm/integrations/litedebugger.py +262 -0
litellm/integrations/llmonitor.py +127 -0
litellm/integrations/prompt_layer.py +72 -0
litellm/integrations/s3.py +150 -0
litellm/integrations/supabase.py +117 -0
litellm/integrations/traceloop.py +114 -0
litellm/integrations/weights_biases.py +223 -0
litellm/llms/__init__.py +1 -0
litellm/llms/ai21.py +212 -0
litellm/llms/aleph_alpha.py +304 -0
litellm/llms/anthropic.py +215 -0
litellm/llms/azure.py +799 -0
litellm/llms/base.py +45 -0
litellm/llms/baseten.py +164 -0
litellm/llms/bedrock.py +799 -0
litellm/llms/cloudflare.py +176 -0
litellm/llms/cohere.py +293 -0
litellm/llms/custom_httpx/azure_dall_e_2.py +136 -0
litellm/llms/custom_httpx/bedrock_async.py +0 -0
litellm/llms/gemini.py +222 -0
litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt +2523 -0
litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt +0 -0
litellm/llms/huggingface_restapi.py +750 -0
litellm/llms/maritalk.py +189 -0
litellm/llms/nlp_cloud.py +243 -0
litellm/llms/ollama.py +400 -0
litellm/llms/ollama_chat.py +333 -0

litellm/__init__.py ADDED Viewed

	@@ -0,0 +1,557 @@

+### INIT VARIABLES ###
+import threading, requests
+from typing import Callable, List, Optional, Dict, Union, Any
+from litellm.caching import Cache
+from litellm._logging import set_verbose
+from litellm.proxy._types import KeyManagementSystem
+import httpx
+input_callback: List[Union[str, Callable]] = []
+success_callback: List[Union[str, Callable]] = []
+failure_callback: List[Union[str, Callable]] = []
+callbacks: List[Callable] = []
+_async_input_callback: List[
+    Callable
+] = []  # internal variable - async custom callbacks are routed here.
+_async_success_callback: List[
+    Union[str, Callable]
+] = []  # internal variable - async custom callbacks are routed here.
+_async_failure_callback: List[
+    Callable
+] = []  # internal variable - async custom callbacks are routed here.
+pre_call_rules: List[Callable] = []
+post_call_rules: List[Callable] = []
+email: Optional[
+    str
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+token: Optional[
+    str
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+telemetry = True
+max_tokens = 256  # OpenAI Defaults
+drop_params = False
+retry = True
+api_key: Optional[str] = None
+openai_key: Optional[str] = None
+azure_key: Optional[str] = None
+anthropic_key: Optional[str] = None
+replicate_key: Optional[str] = None
+cohere_key: Optional[str] = None
+maritalk_key: Optional[str] = None
+ai21_key: Optional[str] = None
+openrouter_key: Optional[str] = None
+huggingface_key: Optional[str] = None
+vertex_project: Optional[str] = None
+vertex_location: Optional[str] = None
+togetherai_api_key: Optional[str] = None
+cloudflare_api_key: Optional[str] = None
+baseten_key: Optional[str] = None
+aleph_alpha_key: Optional[str] = None
+nlp_cloud_key: Optional[str] = None
+use_client: bool = False
+logging: bool = True
+caching: bool = False  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+caching_with_models: bool = False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+cache: Optional[
+    Cache
+] = None  # cache object <- use this - https://docs.litellm.ai/docs/caching
+model_alias_map: Dict[str, str] = {}
+model_group_alias_map: Dict[str, str] = {}
+max_budget: float = 0.0  # set the max budget across all providers
+_openai_completion_params = [
+    "functions",
+    "function_call",
+    "temperature",
+    "temperature",
+    "top_p",
+    "n",
+    "stream",
+    "stop",
+    "max_tokens",
+    "presence_penalty",
+    "frequency_penalty",
+    "logit_bias",
+    "user",
+    "request_timeout",
+    "api_base",
+    "api_version",
+    "api_key",
+    "deployment_id",
+    "organization",
+    "base_url",
+    "default_headers",
+    "timeout",
+    "response_format",
+    "seed",
+    "tools",
+    "tool_choice",
+    "max_retries",
+]
+_litellm_completion_params = [
+    "metadata",
+    "acompletion",
+    "caching",
+    "mock_response",
+    "api_key",
+    "api_version",
+    "api_base",
+    "force_timeout",
+    "logger_fn",
+    "verbose",
+    "custom_llm_provider",
+    "litellm_logging_obj",
+    "litellm_call_id",
+    "use_client",
+    "id",
+    "fallbacks",
+    "azure",
+    "headers",
+    "model_list",
+    "num_retries",
+    "context_window_fallback_dict",
+    "roles",
+    "final_prompt_value",
+    "bos_token",
+    "eos_token",
+    "request_timeout",
+    "complete_response",
+    "self",
+    "client",
+    "rpm",
+    "tpm",
+    "input_cost_per_token",
+    "output_cost_per_token",
+    "hf_model_name",
+    "model_info",
+    "proxy_server_request",
+    "preset_cache_key",
+]
+_current_cost = 0  # private variable, used if max budget is set
+error_logs: Dict = {}
+add_function_to_prompt: bool = False  # if function calling not supported by api, append function call details to system prompt
+client_session: Optional[httpx.Client] = None
+aclient_session: Optional[httpx.AsyncClient] = None
+model_fallbacks: Optional[List] = None  # Deprecated for 'litellm.fallbacks'
+model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
+suppress_debug_info = False
+dynamodb_table_name: Optional[str] = None
+s3_callback_params: Optional[Dict] = None
+#### RELIABILITY ####
+request_timeout: Optional[float] = 6000
+num_retries: Optional[int] = None  # per model endpoint
+fallbacks: Optional[List] = None
+context_window_fallbacks: Optional[List] = None
+allowed_fails: int = 0
+num_retries_per_request: Optional[
+    int
+] = None  # for the request overall (incl. fallbacks + model retries)
+####### SECRET MANAGERS #####################
+secret_manager_client: Optional[
+    Any
+] = None  # list of instantiated key management clients - e.g. azure kv, infisical, etc.
+_google_kms_resource_name: Optional[str] = None
+_key_management_system: Optional[KeyManagementSystem] = None
+#############################################
+def get_model_cost_map(url: str):
+    try:
+        with requests.get(
+            url, timeout=5
+        ) as response:  # set a 5 second timeout for the get request
+            response.raise_for_status()  # Raise an exception if the request is unsuccessful
+            content = response.json()
+            return content
+    except Exception as e:
+        import importlib.resources
+        import json
+        with importlib.resources.open_text(
+            "litellm", "model_prices_and_context_window_backup.json"
+        ) as f:
+            content = json.load(f)
+            return content
+model_cost = get_model_cost_map(url=model_cost_map_url)
+custom_prompt_dict: Dict[str, dict] = {}
+####### THREAD-SPECIFIC DATA ###################
+class MyLocal(threading.local):
+    def __init__(self):
+        self.user = "Hello World"
+_thread_context = MyLocal()
+def identify(event_details):
+    # Store user in thread local data
+    if "user" in event_details:
+        _thread_context.user = event_details["user"]
+####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
+api_base = None
+headers = None
+api_version = None
+organization = None
+config_path = None
+####### COMPLETION MODELS ###################
+open_ai_chat_completion_models: List = []
+open_ai_text_completion_models: List = []
+cohere_models: List = []
+anthropic_models: List = []
+openrouter_models: List = []
+vertex_language_models: List = []
+vertex_vision_models: List = []
+vertex_chat_models: List = []
+vertex_code_chat_models: List = []
+vertex_text_models: List = []
+vertex_code_text_models: List = []
+ai21_models: List = []
+nlp_cloud_models: List = []
+aleph_alpha_models: List = []
+bedrock_models: List = []
+deepinfra_models: List = []
+perplexity_models: List = []
+for key, value in model_cost.items():
+    if value.get("litellm_provider") == "openai":
+        open_ai_chat_completion_models.append(key)
+    elif value.get("litellm_provider") == "text-completion-openai":
+        open_ai_text_completion_models.append(key)
+    elif value.get("litellm_provider") == "cohere":
+        cohere_models.append(key)
+    elif value.get("litellm_provider") == "anthropic":
+        anthropic_models.append(key)
+    elif value.get("litellm_provider") == "openrouter":
+        openrouter_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-text-models":
+        vertex_text_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-code-text-models":
+        vertex_code_text_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-language-models":
+        vertex_language_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-vision-models":
+        vertex_vision_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-chat-models":
+        vertex_chat_models.append(key)
+    elif value.get("litellm_provider") == "vertex_ai-code-chat-models":
+        vertex_code_chat_models.append(key)
+    elif value.get("litellm_provider") == "ai21":
+        ai21_models.append(key)
+    elif value.get("litellm_provider") == "nlp_cloud":
+        nlp_cloud_models.append(key)
+    elif value.get("litellm_provider") == "aleph_alpha":
+        aleph_alpha_models.append(key)
+    elif value.get("litellm_provider") == "bedrock":
+        bedrock_models.append(key)
+    elif value.get("litellm_provider") == "deepinfra":
+        deepinfra_models.append(key)
+    elif value.get("litellm_provider") == "perplexity":
+        perplexity_models.append(key)
+# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
+openai_compatible_endpoints: List = [
+    "api.perplexity.ai",
+    "api.endpoints.anyscale.com/v1",
+    "api.deepinfra.com/v1/openai",
+    "api.mistral.ai/v1",
+]
+# this is maintained for Exception Mapping
+openai_compatible_providers: List = [
+    "anyscale",
+    "mistral",
+    "deepinfra",
+    "perplexity",
+    "xinference",
+]
+# well supported replicate llms
+replicate_models: List = [
+    # llama replicate supported LLMs
+    "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
+    "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
+    "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
+    # Vicuna
+    "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
+    "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
+    # Flan T-5
+    "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
+    # Others
+    "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
+    "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
+]
+huggingface_models: List = [
+    "meta-llama/Llama-2-7b-hf",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-2-13b-hf",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "meta-llama/Llama-2-70b-hf",
+    "meta-llama/Llama-2-70b-chat-hf",
+    "meta-llama/Llama-2-7b",
+    "meta-llama/Llama-2-7b-chat",
+    "meta-llama/Llama-2-13b",
+    "meta-llama/Llama-2-13b-chat",
+    "meta-llama/Llama-2-70b",
+    "meta-llama/Llama-2-70b-chat",
+]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
+together_ai_models: List = [
+    # llama llms - chat
+    "togethercomputer/llama-2-70b-chat",
+    # llama llms - language / instruct
+    "togethercomputer/llama-2-70b",
+    "togethercomputer/LLaMA-2-7B-32K",
+    "togethercomputer/Llama-2-7B-32K-Instruct",
+    "togethercomputer/llama-2-7b",
+    # falcon llms
+    "togethercomputer/falcon-40b-instruct",
+    "togethercomputer/falcon-7b-instruct",
+    # alpaca
+    "togethercomputer/alpaca-7b",
+    # chat llms
+    "HuggingFaceH4/starchat-alpha",
+    # code llms
+    "togethercomputer/CodeLlama-34b",
+    "togethercomputer/CodeLlama-34b-Instruct",
+    "togethercomputer/CodeLlama-34b-Python",
+    "defog/sqlcoder",
+    "NumbersStation/nsql-llama-2-7B",
+    "WizardLM/WizardCoder-15B-V1.0",
+    "WizardLM/WizardCoder-Python-34B-V1.0",
+    # language llms
+    "NousResearch/Nous-Hermes-Llama2-13b",
+    "Austism/chronos-hermes-13b",
+    "upstage/SOLAR-0-70b-16bit",
+    "WizardLM/WizardLM-70B-V1.0",
+]  # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
+baseten_models: List = [
+    "qvv0xeq",
+    "q841o8w",
+    "31dxrj3",
+]  # FALCON 7B  # WizardLM  # Mosaic ML
+# used for Cost Tracking & Token counting
+# https://azure.microsoft.com/en-in/pricing/details/cognitive-services/openai-service/
+# Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting
+azure_llms = {
+    "gpt-35-turbo": "azure/gpt-35-turbo",
+    "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k",
+    "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct",
+}
+azure_embedding_models = {
+    "ada": "azure/ada",
+}
+petals_models = [
+    "petals-team/StableBeluga2",
+]
+ollama_models = ["llama2"]
+maritalk_models = ["maritalk"]
+model_list = (
+    open_ai_chat_completion_models
+    + open_ai_text_completion_models
+    + cohere_models
+    + anthropic_models
+    + replicate_models
+    + openrouter_models
+    + huggingface_models
+    + vertex_chat_models
+    + vertex_text_models
+    + ai21_models
+    + together_ai_models
+    + baseten_models
+    + aleph_alpha_models
+    + nlp_cloud_models
+    + ollama_models
+    + bedrock_models
+    + deepinfra_models
+    + perplexity_models
+    + maritalk_models
+)
+provider_list: List = [
+    "openai",
+    "custom_openai",
+    "text-completion-openai",
+    "cohere",
+    "anthropic",
+    "replicate",
+    "huggingface",
+    "together_ai",
+    "openrouter",
+    "vertex_ai",
+    "palm",
+    "gemini",
+    "ai21",
+    "baseten",
+    "azure",
+    "sagemaker",
+    "bedrock",
+    "vllm",
+    "nlp_cloud",
+    "petals",
+    "oobabooga",
+    "ollama",
+    "ollama_chat",
+    "deepinfra",
+    "perplexity",
+    "anyscale",
+    "mistral",
+    "maritalk",
+    "voyage",
+    "cloudflare",
+    "xinference",
+    "custom",  # custom apis
+]
+models_by_provider: dict = {
+    "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
+    "cohere": cohere_models,
+    "anthropic": anthropic_models,
+    "replicate": replicate_models,
+    "huggingface": huggingface_models,
+    "together_ai": together_ai_models,
+    "baseten": baseten_models,
+    "openrouter": openrouter_models,
+    "vertex_ai": vertex_chat_models + vertex_text_models,
+    "ai21": ai21_models,
+    "bedrock": bedrock_models,
+    "petals": petals_models,
+    "ollama": ollama_models,
+    "deepinfra": deepinfra_models,
+    "perplexity": perplexity_models,
+    "maritalk": maritalk_models,
+}
+# mapping for those models which have larger equivalents
+longer_context_model_fallback_dict: dict = {
+    # openai chat completion models
+    "gpt-3.5-turbo": "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
+    "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
+    "gpt-4": "gpt-4-32k",
+    "gpt-4-0314": "gpt-4-32k-0314",
+    "gpt-4-0613": "gpt-4-32k-0613",
+    # anthropic
+    "claude-instant-1": "claude-2",
+    "claude-instant-1.2": "claude-2",
+    # vertexai
+    "chat-bison": "chat-bison-32k",
+    "chat-bison@001": "chat-bison-32k",
+    "codechat-bison": "codechat-bison-32k",
+    "codechat-bison@001": "codechat-bison-32k",
+    # openrouter
+    "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
+    "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
+}
+####### EMBEDDING MODELS ###################
+open_ai_embedding_models: List = ["text-embedding-ada-002"]
+cohere_embedding_models: List = [
+    "embed-english-v3.0",
+    "embed-english-light-v3.0",
+    "embed-multilingual-v3.0",
+    "embed-english-v2.0",
+    "embed-english-light-v2.0",
+    "embed-multilingual-v2.0",
+]
+bedrock_embedding_models: List = [
+    "amazon.titan-embed-text-v1",
+    "cohere.embed-english-v3",
+    "cohere.embed-multilingual-v3",
+]
+all_embedding_models = (
+    open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
+)
+####### IMAGE GENERATION MODELS ###################
+openai_image_generation_models = ["dall-e-2", "dall-e-3"]
+from .timeout import timeout
+from .utils import (
+    client,
+    exception_type,
+    get_optional_params,
+    modify_integration,
+    token_counter,
+    cost_per_token,
+    completion_cost,
+    get_litellm_params,
+    Logging,
+    acreate,
+    get_model_list,
+    get_max_tokens,
+    get_model_info,
+    register_prompt_template,
+    validate_environment,
+    check_valid_key,
+    get_llm_provider,
+    register_model,
+    encode,
+    decode,
+    _calculate_retry_after,
+    _should_retry,
+    get_secret,
+)
+from .llms.huggingface_restapi import HuggingfaceConfig
+from .llms.anthropic import AnthropicConfig
+from .llms.replicate import ReplicateConfig
+from .llms.cohere import CohereConfig
+from .llms.ai21 import AI21Config
+from .llms.together_ai import TogetherAIConfig
+from .llms.cloudflare import CloudflareConfig
+from .llms.palm import PalmConfig
+from .llms.gemini import GeminiConfig
+from .llms.nlp_cloud import NLPCloudConfig
+from .llms.aleph_alpha import AlephAlphaConfig
+from .llms.petals import PetalsConfig
+from .llms.vertex_ai import VertexAIConfig
+from .llms.sagemaker import SagemakerConfig
+from .llms.ollama import OllamaConfig
+from .llms.maritalk import MaritTalkConfig
+from .llms.bedrock import (
+    AmazonTitanConfig,
+    AmazonAI21Config,
+    AmazonAnthropicConfig,
+    AmazonCohereConfig,
+    AmazonLlamaConfig,
+)
+from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
+from .llms.azure import AzureOpenAIConfig, AzureOpenAIError
+from .main import *  # type: ignore
+from .integrations import *
+from .exceptions import (
+    AuthenticationError,
+    InvalidRequestError,
+    BadRequestError,
+    NotFoundError,
+    RateLimitError,
+    ServiceUnavailableError,
+    OpenAIError,
+    ContextWindowExceededError,
+    ContentPolicyViolationError,
+    BudgetExceededError,
+    APIError,
+    Timeout,
+    APIConnectionError,
+    APIResponseValidationError,
+    UnprocessableEntityError,
+)
+from .budget_manager import BudgetManager
+from .proxy.proxy_cli import run_server
+from .router import Router

litellm/_logging.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import logging
+set_verbose = False
+# Create a handler for the logger (you may need to adapt this based on your needs)
+handler = logging.StreamHandler()
+handler.setLevel(logging.DEBUG)
+# Create a formatter and set it for the handler
+formatter = logging.Formatter("\033[92m%(name)s - %(levelname)s\033[0m: %(message)s")
+handler.setFormatter(formatter)
+def print_verbose(print_statement):
+    try:
+        if set_verbose:
+            print(print_statement)  # noqa
+    except:
+        pass
+verbose_proxy_logger = logging.getLogger("LiteLLM Proxy")
+verbose_router_logger = logging.getLogger("LiteLLM Router")
+verbose_logger = logging.getLogger("LiteLLM")
+# Add the handler to the logger
+verbose_router_logger.addHandler(handler)
+verbose_proxy_logger.addHandler(handler)

litellm/_redis.py ADDED Viewed

	@@ -0,0 +1,93 @@

+# +-----------------------------------------------+
+# |                                               |
+# |           Give Feedback / Get Help            |
+# | https://github.com/BerriAI/litellm/issues/new |
+# |                                               |
+# +-----------------------------------------------+
+#
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+# s/o [@Frank Colson](https://www.linkedin.com/in/frank-colson-422b9b183/) for this redis implementation
+import os
+import inspect
+import redis, litellm
+from typing import List, Optional
+def _get_redis_kwargs():
+    arg_spec = inspect.getfullargspec(redis.Redis)
+    # Only allow primitive arguments
+    exclude_args = {
+        "self",
+        "connection_pool",
+        "retry",
+    }
+    include_args = ["url"]
+    available_args = [x for x in arg_spec.args if x not in exclude_args] + include_args
+    return available_args
+def _get_redis_env_kwarg_mapping():
+    PREFIX = "REDIS_"
+    return {f"{PREFIX}{x.upper()}": x for x in _get_redis_kwargs()}
+def _redis_kwargs_from_environment():
+    mapping = _get_redis_env_kwarg_mapping()
+    return_dict = {}
+    for k, v in mapping.items():
+        value = litellm.get_secret(k, default_value=None)  # check os.environ/key vault
+        if value is not None:
+            return_dict[v] = value
+    return return_dict
+def get_redis_url_from_environment():
+    if "REDIS_URL" in os.environ:
+        return os.environ["REDIS_URL"]
+    if "REDIS_HOST" not in os.environ or "REDIS_PORT" not in os.environ:
+        raise ValueError(
+            "Either 'REDIS_URL' or both 'REDIS_HOST' and 'REDIS_PORT' must be specified for Redis."
+        )
+    if "REDIS_PASSWORD" in os.environ:
+        redis_password = f":{os.environ['REDIS_PASSWORD']}@"
+    else:
+        redis_password = ""
+    return (
+        f"redis://{redis_password}{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}"
+    )
+def get_redis_client(**env_overrides):
+    ### check if "os.environ/<key-name>" passed in
+    for k, v in env_overrides.items():
+        if isinstance(v, str) and v.startswith("os.environ/"):
+            v = v.replace("os.environ/", "")
+            value = litellm.get_secret(v)
+            env_overrides[k] = value
+    redis_kwargs = {
+        **_redis_kwargs_from_environment(),
+        **env_overrides,
+    }
+    if "url" in redis_kwargs and redis_kwargs["url"] is not None:
+        redis_kwargs.pop("host", None)
+        redis_kwargs.pop("port", None)
+        redis_kwargs.pop("db", None)
+        redis_kwargs.pop("password", None)
+        return redis.Redis.from_url(**redis_kwargs)
+    elif "host" not in redis_kwargs or redis_kwargs["host"] is None:
+        raise ValueError("Either 'host' or 'url' must be specified for redis.")
+    litellm.print_verbose(f"redis_kwargs: {redis_kwargs}")
+    return redis.Redis(**redis_kwargs)

litellm/_version.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import importlib_metadata
+try:
+    version = importlib_metadata.version("litellm")
+except:
+    pass

litellm/budget_manager.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os, json, time
+import litellm
+from litellm.utils import ModelResponse
+import requests, threading
+from typing import Optional, Union, Literal
+class BudgetManager:
+    def __init__(
+        self,
+        project_name: str,
+        client_type: str = "local",
+        api_base: Optional[str] = None,
+    ):
+        self.client_type = client_type
+        self.project_name = project_name
+        self.api_base = api_base or "https://api.litellm.ai"
+        ## load the data or init the initial dictionaries
+        self.load_data()
+    def print_verbose(self, print_statement):
+        try:
+            if litellm.set_verbose:
+                import logging
+                logging.info(print_statement)
+        except:
+            pass
+    def load_data(self):
+        if self.client_type == "local":
+            # Check if user dict file exists
+            if os.path.isfile("user_cost.json"):
+                # Load the user dict
+                with open("user_cost.json", "r") as json_file:
+                    self.user_dict = json.load(json_file)
+            else:
+                self.print_verbose("User Dictionary not found!")
+                self.user_dict = {}
+            self.print_verbose(f"user dict from local: {self.user_dict}")
+        elif self.client_type == "hosted":
+            # Load the user_dict from hosted db
+            url = self.api_base + "/get_budget"
+            headers = {"Content-Type": "application/json"}
+            data = {"project_name": self.project_name}
+            response = requests.post(url, headers=headers, json=data)
+            response = response.json()
+            if response["status"] == "error":
+                self.user_dict = (
+                    {}
+                )  # assume this means the user dict hasn't been stored yet
+            else:
+                self.user_dict = response["data"]
+    def create_budget(
+        self,
+        total_budget: float,
+        user: str,
+        duration: Optional[Literal["daily", "weekly", "monthly", "yearly"]] = None,
+        created_at: float = time.time(),
+    ):
+        self.user_dict[user] = {"total_budget": total_budget}
+        if duration is None:
+            return self.user_dict[user]
+        if duration == "daily":
+            duration_in_days = 1
+        elif duration == "weekly":
+            duration_in_days = 7
+        elif duration == "monthly":
+            duration_in_days = 28
+        elif duration == "yearly":
+            duration_in_days = 365
+        else:
+            raise ValueError(
+                """duration needs to be one of ["daily", "weekly", "monthly", "yearly"]"""
+            )
+        self.user_dict[user] = {
+            "total_budget": total_budget,
+            "duration": duration_in_days,
+            "created_at": created_at,
+            "last_updated_at": created_at,
+        }
+        self._save_data_thread()  # [Non-Blocking] Update persistent storage without blocking execution
+        return self.user_dict[user]
+    def projected_cost(self, model: str, messages: list, user: str):
+        text = "".join(message["content"] for message in messages)
+        prompt_tokens = litellm.token_counter(model=model, text=text)
+        prompt_cost, _ = litellm.cost_per_token(
+            model=model, prompt_tokens=prompt_tokens, completion_tokens=0
+        )
+        current_cost = self.user_dict[user].get("current_cost", 0)
+        projected_cost = prompt_cost + current_cost
+        return projected_cost
+    def get_total_budget(self, user: str):
+        return self.user_dict[user]["total_budget"]
+    def update_cost(
+        self,
+        user: str,
+        completion_obj: Optional[ModelResponse] = None,
+        model: Optional[str] = None,
+        input_text: Optional[str] = None,
+        output_text: Optional[str] = None,
+    ):
+        if model and input_text and output_text:
+            prompt_tokens = litellm.token_counter(
+                model=model, messages=[{"role": "user", "content": input_text}]
+            )
+            completion_tokens = litellm.token_counter(
+                model=model, messages=[{"role": "user", "content": output_text}]
+            )
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = litellm.cost_per_token(
+                model=model,
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+            )
+            cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+        elif completion_obj:
+            cost = litellm.completion_cost(completion_response=completion_obj)
+            model = completion_obj[
+                "model"
+            ]  # if this throws an error try, model = completion_obj['model']
+        else:
+            raise ValueError(
+                "Either a chat completion object or the text response needs to be passed in. Learn more - https://docs.litellm.ai/docs/budget_manager"
+            )
+        self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get(
+            "current_cost", 0
+        )
+        if "model_cost" in self.user_dict[user]:
+            self.user_dict[user]["model_cost"][model] = cost + self.user_dict[user][
+                "model_cost"
+            ].get(model, 0)
+        else:
+            self.user_dict[user]["model_cost"] = {model: cost}
+        self._save_data_thread()  # [Non-Blocking] Update persistent storage without blocking execution
+        return {"user": self.user_dict[user]}
+    def get_current_cost(self, user):
+        return self.user_dict[user].get("current_cost", 0)
+    def get_model_cost(self, user):
+        return self.user_dict[user].get("model_cost", 0)
+    def is_valid_user(self, user: str) -> bool:
+        return user in self.user_dict
+    def get_users(self):
+        return list(self.user_dict.keys())
+    def reset_cost(self, user):
+        self.user_dict[user]["current_cost"] = 0
+        self.user_dict[user]["model_cost"] = {}
+        return {"user": self.user_dict[user]}
+    def reset_on_duration(self, user: str):
+        # Get current and creation time
+        last_updated_at = self.user_dict[user]["last_updated_at"]
+        current_time = time.time()
+        # Convert duration from days to seconds
+        duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60
+        # Check if duration has elapsed
+        if current_time - last_updated_at >= duration_in_seconds:
+            # Reset cost if duration has elapsed and update the creation time
+            self.reset_cost(user)
+            self.user_dict[user]["last_updated_at"] = current_time
+            self._save_data_thread()  # Save the data
+    def update_budget_all_users(self):
+        for user in self.get_users():
+            if "duration" in self.user_dict[user]:
+                self.reset_on_duration(user)
+    def _save_data_thread(self):
+        thread = threading.Thread(
+            target=self.save_data
+        )  # [Non-Blocking]: saves data without blocking execution
+        thread.start()
+    def save_data(self):
+        if self.client_type == "local":
+            import json
+            # save the user dict
+            with open("user_cost.json", "w") as json_file:
+                json.dump(
+                    self.user_dict, json_file, indent=4
+                )  # Indent for pretty formatting
+            return {"status": "success"}
+        elif self.client_type == "hosted":
+            url = self.api_base + "/set_budget"
+            headers = {"Content-Type": "application/json"}
+            data = {"project_name": self.project_name, "user_dict": self.user_dict}
+            response = requests.post(url, headers=headers, json=data)
+            response = response.json()
+            return response

litellm/caching.py ADDED Viewed

	@@ -0,0 +1,678 @@

+# +-----------------------------------------------+
+# |                                               |
+# |           Give Feedback / Get Help            |
+# | https://github.com/BerriAI/litellm/issues/new |
+# |                                               |
+# +-----------------------------------------------+
+#
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+import litellm
+import time, logging
+import json, traceback, ast, hashlib
+from typing import Optional, Literal, List, Union, Any
+from openai._models import BaseModel as OpenAIObject
+def print_verbose(print_statement):
+    try:
+        if litellm.set_verbose:
+            print(print_statement)  # noqa
+    except:
+        pass
+class BaseCache:
+    def set_cache(self, key, value, **kwargs):
+        raise NotImplementedError
+    def get_cache(self, key, **kwargs):
+        raise NotImplementedError
+class InMemoryCache(BaseCache):
+    def __init__(self):
+        # if users don't provider one, use the default litellm cache
+        self.cache_dict = {}
+        self.ttl_dict = {}
+    def set_cache(self, key, value, **kwargs):
+        self.cache_dict[key] = value
+        if "ttl" in kwargs:
+            self.ttl_dict[key] = time.time() + kwargs["ttl"]
+    def get_cache(self, key, **kwargs):
+        if key in self.cache_dict:
+            if key in self.ttl_dict:
+                if time.time() > self.ttl_dict[key]:
+                    self.cache_dict.pop(key, None)
+                    return None
+            original_cached_response = self.cache_dict[key]
+            try:
+                cached_response = json.loads(original_cached_response)
+            except:
+                cached_response = original_cached_response
+            return cached_response
+        return None
+    def flush_cache(self):
+        self.cache_dict.clear()
+        self.ttl_dict.clear()
+class RedisCache(BaseCache):
+    def __init__(self, host=None, port=None, password=None, **kwargs):
+        import redis
+        # if users don't provider one, use the default litellm cache
+        from ._redis import get_redis_client
+        redis_kwargs = {}
+        if host is not None:
+            redis_kwargs["host"] = host
+        if port is not None:
+            redis_kwargs["port"] = port
+        if password is not None:
+            redis_kwargs["password"] = password
+        redis_kwargs.update(kwargs)
+        self.redis_client = get_redis_client(**redis_kwargs)
+    def set_cache(self, key, value, **kwargs):
+        ttl = kwargs.get("ttl", None)
+        print_verbose(f"Set Redis Cache: key: {key}\nValue {value}")
+        try:
+            self.redis_client.set(name=key, value=str(value), ex=ttl)
+        except Exception as e:
+            # NON blocking - notify users Redis is throwing an exception
+            logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
+    def get_cache(self, key, **kwargs):
+        try:
+            print_verbose(f"Get Redis Cache: key: {key}")
+            cached_response = self.redis_client.get(key)
+            print_verbose(
+                f"Got Redis Cache: key: {key}, cached_response {cached_response}"
+            )
+            if cached_response != None:
+                # cached_response is in `b{} convert it to ModelResponse
+                cached_response = cached_response.decode(
+                    "utf-8"
+                )  # Convert bytes to string
+                try:
+                    cached_response = json.loads(
+                        cached_response
+                    )  # Convert string to dictionary
+                except:
+                    cached_response = ast.literal_eval(cached_response)
+                return cached_response
+        except Exception as e:
+            # NON blocking - notify users Redis is throwing an exception
+            traceback.print_exc()
+            logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+    def flush_cache(self):
+        self.redis_client.flushall()
+class S3Cache(BaseCache):
+    def __init__(
+        self,
+        s3_bucket_name,
+        s3_region_name=None,
+        s3_api_version=None,
+        s3_use_ssl=True,
+        s3_verify=None,
+        s3_endpoint_url=None,
+        s3_aws_access_key_id=None,
+        s3_aws_secret_access_key=None,
+        s3_aws_session_token=None,
+        s3_config=None,
+        **kwargs,
+    ):
+        import boto3
+        self.bucket_name = s3_bucket_name
+        # Create an S3 client with custom endpoint URL
+        self.s3_client = boto3.client(
+            "s3",
+            region_name=s3_region_name,
+            endpoint_url=s3_endpoint_url,
+            api_version=s3_api_version,
+            use_ssl=s3_use_ssl,
+            verify=s3_verify,
+            aws_access_key_id=s3_aws_access_key_id,
+            aws_secret_access_key=s3_aws_secret_access_key,
+            aws_session_token=s3_aws_session_token,
+            config=s3_config,
+            **kwargs,
+        )
+    def set_cache(self, key, value, **kwargs):
+        try:
+            print_verbose(f"LiteLLM SET Cache - S3. Key={key}. Value={value}")
+            ttl = kwargs.get("ttl", None)
+            # Convert value to JSON before storing in S3
+            serialized_value = json.dumps(value)
+            if ttl is not None:
+                cache_control = f"immutable, max-age={ttl}, s-maxage={ttl}"
+                import datetime
+                # Calculate expiration time
+                expiration_time = datetime.datetime.now() + ttl
+                # Upload the data to S3 with the calculated expiration time
+                self.s3_client.put_object(
+                    Bucket=self.bucket_name,
+                    Key=key,
+                    Body=serialized_value,
+                    Expires=expiration_time,
+                    CacheControl=cache_control,
+                    ContentType="application/json",
+                    ContentLanguage="en",
+                    ContentDisposition=f"inline; filename=\"{key}.json\""
+                )
+            else:
+                cache_control = "immutable, max-age=31536000, s-maxage=31536000"
+                # Upload the data to S3 without specifying Expires
+                self.s3_client.put_object(
+                    Bucket=self.bucket_name,
+                    Key=key,
+                    Body=serialized_value,
+                    CacheControl=cache_control,
+                    ContentType="application/json",
+                    ContentLanguage="en",
+                    ContentDisposition=f"inline; filename=\"{key}.json\""
+                )
+        except Exception as e:
+            # NON blocking - notify users S3 is throwing an exception
+            print_verbose(f"S3 Caching: set_cache() - Got exception from S3: {e}")
+    def get_cache(self, key, **kwargs):
+        import boto3, botocore
+        try:
+            print_verbose(f"Get S3 Cache: key: {key}")
+            # Download the data from S3
+            cached_response = self.s3_client.get_object(
+                Bucket=self.bucket_name, Key=key
+            )
+            if cached_response != None:
+                # cached_response is in `b{} convert it to ModelResponse
+                cached_response = (
+                    cached_response["Body"].read().decode("utf-8")
+                )  # Convert bytes to string
+                try:
+                    cached_response = json.loads(
+                        cached_response
+                    )  # Convert string to dictionary
+                except Exception as e:
+                    cached_response = ast.literal_eval(cached_response)
+            if type(cached_response) is not dict:
+                cached_response = dict(cached_response)
+            print_verbose(
+                f"Got S3 Cache: key: {key}, cached_response {cached_response}. Type Response {type(cached_response)}"
+            )
+            return cached_response
+        except botocore.exceptions.ClientError as e:
+            if e.response["Error"]["Code"] == "NoSuchKey":
+                print_verbose(
+                    f"S3 Cache: The specified key '{key}' does not exist in the S3 bucket."
+                )
+                return None
+        except Exception as e:
+            # NON blocking - notify users S3 is throwing an exception
+            traceback.print_exc()
+            print_verbose(f"S3 Caching: get_cache() - Got exception from S3: {e}")
+    def flush_cache(self):
+        pass
+class DualCache(BaseCache):
+    """
+    This updates both Redis and an in-memory cache simultaneously.
+    When data is updated or inserted, it is written to both the in-memory cache + Redis.
+    This ensures that even if Redis hasn't been updated yet, the in-memory cache reflects the most recent data.
+    """
+    def __init__(
+        self,
+        in_memory_cache: Optional[InMemoryCache] = None,
+        redis_cache: Optional[RedisCache] = None,
+    ) -> None:
+        super().__init__()
+        # If in_memory_cache is not provided, use the default InMemoryCache
+        self.in_memory_cache = in_memory_cache or InMemoryCache()
+        # If redis_cache is not provided, use the default RedisCache
+        self.redis_cache = redis_cache
+    def set_cache(self, key, value, local_only: bool = False, **kwargs):
+        # Update both Redis and in-memory cache
+        try:
+            print_verbose(f"set cache: key: {key}; value: {value}")
+            if self.in_memory_cache is not None:
+                self.in_memory_cache.set_cache(key, value, **kwargs)
+            if self.redis_cache is not None and local_only == False:
+                self.redis_cache.set_cache(key, value, **kwargs)
+        except Exception as e:
+            print_verbose(e)
+    def get_cache(self, key, local_only: bool = False, **kwargs):
+        # Try to fetch from in-memory cache first
+        try:
+            print_verbose(f"get cache: cache key: {key}; local_only: {local_only}")
+            result = None
+            if self.in_memory_cache is not None:
+                in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
+                print_verbose(f"in_memory_result: {in_memory_result}")
+                if in_memory_result is not None:
+                    result = in_memory_result
+            if result is None and self.redis_cache is not None and local_only == False:
+                # If not found in in-memory cache, try fetching from Redis
+                redis_result = self.redis_cache.get_cache(key, **kwargs)
+                if redis_result is not None:
+                    # Update in-memory cache with the value from Redis
+                    self.in_memory_cache.set_cache(key, redis_result, **kwargs)
+                result = redis_result
+            print_verbose(f"get cache: cache result: {result}")
+            return result
+        except Exception as e:
+            traceback.print_exc()
+    def flush_cache(self):
+        if self.in_memory_cache is not None:
+            self.in_memory_cache.flush_cache()
+        if self.redis_cache is not None:
+            self.redis_cache.flush_cache()
+#### LiteLLM.Completion / Embedding Cache ####
+class Cache:
+    def __init__(
+        self,
+        type: Optional[Literal["local", "redis", "s3"]] = "local",
+        host: Optional[str] = None,
+        port: Optional[str] = None,
+        password: Optional[str] = None,
+        supported_call_types: Optional[
+            List[Literal["completion", "acompletion", "embedding", "aembedding"]]
+        ] = ["completion", "acompletion", "embedding", "aembedding"],
+        # s3 Bucket, boto3 configuration
+        s3_bucket_name: Optional[str] = None,
+        s3_region_name: Optional[str] = None,
+        s3_api_version: Optional[str] = None,
+        s3_use_ssl: Optional[bool] = True,
+        s3_verify: Optional[Union[bool, str]] = None,
+        s3_endpoint_url: Optional[str] = None,
+        s3_aws_access_key_id: Optional[str] = None,
+        s3_aws_secret_access_key: Optional[str] = None,
+        s3_aws_session_token: Optional[str] = None,
+        s3_config: Optional[Any] = None,
+        **kwargs,
+    ):
+        """
+        Initializes the cache based on the given type.
+        Args:
+            type (str, optional): The type of cache to initialize. Can be "local" or "redis". Defaults to "local".
+            host (str, optional): The host address for the Redis cache. Required if type is "redis".
+            port (int, optional): The port number for the Redis cache. Required if type is "redis".
+            password (str, optional): The password for the Redis cache. Required if type is "redis".
+            supported_call_types (list, optional): List of call types to cache for. Defaults to cache == on for all call types.
+            **kwargs: Additional keyword arguments for redis.Redis() cache
+        Raises:
+            ValueError: If an invalid cache type is provided.
+        Returns:
+            None. Cache is set as a litellm param
+        """
+        if type == "redis":
+            self.cache: BaseCache = RedisCache(host, port, password, **kwargs)
+        if type == "local":
+            self.cache = InMemoryCache()
+        if type == "s3":
+            self.cache = S3Cache(
+                s3_bucket_name=s3_bucket_name,
+                s3_region_name=s3_region_name,
+                s3_api_version=s3_api_version,
+                s3_use_ssl=s3_use_ssl,
+                s3_verify=s3_verify,
+                s3_endpoint_url=s3_endpoint_url,
+                s3_aws_access_key_id=s3_aws_access_key_id,
+                s3_aws_secret_access_key=s3_aws_secret_access_key,
+                s3_aws_session_token=s3_aws_session_token,
+                s3_config=s3_config,
+                **kwargs,
+            )
+        if "cache" not in litellm.input_callback:
+            litellm.input_callback.append("cache")
+        if "cache" not in litellm.success_callback:
+            litellm.success_callback.append("cache")
+        if "cache" not in litellm._async_success_callback:
+            litellm._async_success_callback.append("cache")
+        self.supported_call_types = supported_call_types  # default to ["completion", "acompletion", "embedding", "aembedding"]
+        self.type = type
+    def get_cache_key(self, *args, **kwargs):
+        """
+        Get the cache key for the given arguments.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            str: The cache key generated from the arguments, or None if no cache key could be generated.
+        """
+        cache_key = ""
+        print_verbose(f"\nGetting Cache key. Kwargs: {kwargs}")
+        # for streaming, we use preset_cache_key. It's created in wrapper(), we do this because optional params like max_tokens, get transformed for bedrock -> max_new_tokens
+        if kwargs.get("litellm_params", {}).get("preset_cache_key", None) is not None:
+            print_verbose(f"\nReturning preset cache key: {cache_key}")
+            return kwargs.get("litellm_params", {}).get("preset_cache_key", None)
+        # sort kwargs by keys, since model: [gpt-4, temperature: 0.2, max_tokens: 200] == [temperature: 0.2, max_tokens: 200, model: gpt-4]
+        completion_kwargs = [
+            "model",
+            "messages",
+            "temperature",
+            "top_p",
+            "n",
+            "stop",
+            "max_tokens",
+            "presence_penalty",
+            "frequency_penalty",
+            "logit_bias",
+            "user",
+            "response_format",
+            "seed",
+            "tools",
+            "tool_choice",
+        ]
+        embedding_only_kwargs = [
+            "input",
+            "encoding_format",
+        ]  # embedding kwargs = model, input, user, encoding_format. Model, user are checked in completion_kwargs
+        # combined_kwargs - NEEDS to be ordered across get_cache_key(). Do not use a set()
+        combined_kwargs = completion_kwargs + embedding_only_kwargs
+        for param in combined_kwargs:
+            # ignore litellm params here
+            if param in kwargs:
+                # check if param == model and model_group is passed in, then override model with model_group
+                if param == "model":
+                    model_group = None
+                    caching_group = None
+                    metadata = kwargs.get("metadata", None)
+                    litellm_params = kwargs.get("litellm_params", {})
+                    if metadata is not None:
+                        model_group = metadata.get("model_group")
+                        model_group = metadata.get("model_group", None)
+                        caching_groups = metadata.get("caching_groups", None)
+                        if caching_groups:
+                            for group in caching_groups:
+                                if model_group in group:
+                                    caching_group = group
+                                    break
+                    if litellm_params is not None:
+                        metadata = litellm_params.get("metadata", None)
+                        if metadata is not None:
+                            model_group = metadata.get("model_group", None)
+                            caching_groups = metadata.get("caching_groups", None)
+                            if caching_groups:
+                                for group in caching_groups:
+                                    if model_group in group:
+                                        caching_group = group
+                                        break
+                    param_value = (
+                        caching_group or model_group or kwargs[param]
+                    )  # use caching_group, if set then model_group if it exists, else use kwargs["model"]
+                else:
+                    if kwargs[param] is None:
+                        continue  # ignore None params
+                    param_value = kwargs[param]
+                cache_key += f"{str(param)}: {str(param_value)}"
+        print_verbose(f"\nCreated cache key: {cache_key}")
+        # Use hashlib to create a sha256 hash of the cache key
+        hash_object = hashlib.sha256(cache_key.encode())
+        # Hexadecimal representation of the hash
+        hash_hex = hash_object.hexdigest()
+        print_verbose(f"Hashed cache key (SHA-256): {hash_hex}")
+        return hash_hex
+    def generate_streaming_content(self, content):
+        chunk_size = 5  # Adjust the chunk size as needed
+        for i in range(0, len(content), chunk_size):
+            yield {
+                "choices": [
+                    {
+                        "delta": {
+                            "role": "assistant",
+                            "content": content[i : i + chunk_size],
+                        }
+                    }
+                ]
+            }
+            time.sleep(0.02)
+    def get_cache(self, *args, **kwargs):
+        """
+        Retrieves the cached result for the given arguments.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            The cached result if it exists, otherwise None.
+        """
+        try:  # never block execution
+            if "cache_key" in kwargs:
+                cache_key = kwargs["cache_key"]
+            else:
+                cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
+                cache_control_args = kwargs.get("cache", {})
+                max_age = cache_control_args.get(
+                    "s-max-age", cache_control_args.get("s-maxage", float("inf"))
+                )
+                cached_result = self.cache.get_cache(cache_key)
+                # Check if a timestamp was stored with the cached response
+                if (
+                    cached_result is not None
+                    and isinstance(cached_result, dict)
+                    and "timestamp" in cached_result
+                    and max_age is not None
+                ):
+                    timestamp = cached_result["timestamp"]
+                    current_time = time.time()
+                    # Calculate age of the cached response
+                    response_age = current_time - timestamp
+                    # Check if the cached response is older than the max-age
+                    if response_age > max_age:
+                        print_verbose(
+                            f"Cached response for key {cache_key} is too old. Max-age: {max_age}s, Age: {response_age}s"
+                        )
+                        return None  # Cached response is too old
+                    # If the response is fresh, or there's no max-age requirement, return the cached response
+                    # cached_response is in `b{} convert it to ModelResponse
+                    cached_response = cached_result.get("response")
+                    try:
+                        if isinstance(cached_response, dict):
+                            pass
+                        else:
+                            cached_response = json.loads(
+                                cached_response
+                            )  # Convert string to dictionary
+                    except:
+                        cached_response = ast.literal_eval(cached_response)
+                    return cached_response
+                return cached_result
+        except Exception as e:
+            print_verbose(f"An exception occurred: {traceback.format_exc()}")
+            return None
+    def add_cache(self, result, *args, **kwargs):
+        """
+        Adds a result to the cache.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            None
+        """
+        try:
+            if "cache_key" in kwargs:
+                cache_key = kwargs["cache_key"]
+            else:
+                cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
+                if isinstance(result, OpenAIObject):
+                    result = result.model_dump_json()
+                ## Get Cache-Controls ##
+                if kwargs.get("cache", None) is not None and isinstance(
+                    kwargs.get("cache"), dict
+                ):
+                    for k, v in kwargs.get("cache").items():
+                        if k == "ttl":
+                            kwargs["ttl"] = v
+                cached_data = {"timestamp": time.time(), "response": result}
+                self.cache.set_cache(cache_key, cached_data, **kwargs)
+        except Exception as e:
+            print_verbose(f"LiteLLM Cache: Excepton add_cache: {str(e)}")
+            traceback.print_exc()
+            pass
+    async def _async_add_cache(self, result, *args, **kwargs):
+        self.add_cache(result, *args, **kwargs)
+def enable_cache(
+    type: Optional[Literal["local", "redis", "s3"]] = "local",
+    host: Optional[str] = None,
+    port: Optional[str] = None,
+    password: Optional[str] = None,
+    supported_call_types: Optional[
+        List[Literal["completion", "acompletion", "embedding", "aembedding"]]
+    ] = ["completion", "acompletion", "embedding", "aembedding"],
+    **kwargs,
+):
+    """
+    Enable cache with the specified configuration.
+    Args:
+        type (Optional[Literal["local", "redis"]]): The type of cache to enable. Defaults to "local".
+        host (Optional[str]): The host address of the cache server. Defaults to None.
+        port (Optional[str]): The port number of the cache server. Defaults to None.
+        password (Optional[str]): The password for the cache server. Defaults to None.
+        supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
+            The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
+        **kwargs: Additional keyword arguments.
+    Returns:
+        None
+    Raises:
+        None
+    """
+    print_verbose("LiteLLM: Enabling Cache")
+    if "cache" not in litellm.input_callback:
+        litellm.input_callback.append("cache")
+    if "cache" not in litellm.success_callback:
+        litellm.success_callback.append("cache")
+    if "cache" not in litellm._async_success_callback:
+        litellm._async_success_callback.append("cache")
+    if litellm.cache == None:
+        litellm.cache = Cache(
+            type=type,
+            host=host,
+            port=port,
+            password=password,
+            supported_call_types=supported_call_types,
+            **kwargs,
+        )
+    print_verbose(f"LiteLLM: Cache enabled, litellm.cache={litellm.cache}")
+    print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
+def update_cache(
+    type: Optional[Literal["local", "redis"]] = "local",
+    host: Optional[str] = None,
+    port: Optional[str] = None,
+    password: Optional[str] = None,
+    supported_call_types: Optional[
+        List[Literal["completion", "acompletion", "embedding", "aembedding"]]
+    ] = ["completion", "acompletion", "embedding", "aembedding"],
+    **kwargs,
+):
+    """
+    Update the cache for LiteLLM.
+    Args:
+        type (Optional[Literal["local", "redis"]]): The type of cache. Defaults to "local".
+        host (Optional[str]): The host of the cache. Defaults to None.
+        port (Optional[str]): The port of the cache. Defaults to None.
+        password (Optional[str]): The password for the cache. Defaults to None.
+        supported_call_types (Optional[List[Literal["completion", "acompletion", "embedding", "aembedding"]]]):
+            The supported call types for the cache. Defaults to ["completion", "acompletion", "embedding", "aembedding"].
+        **kwargs: Additional keyword arguments for the cache.
+    Returns:
+        None
+    """
+    print_verbose("LiteLLM: Updating Cache")
+    litellm.cache = Cache(
+        type=type,
+        host=host,
+        port=port,
+        password=password,
+        supported_call_types=supported_call_types,
+        **kwargs,
+    )
+    print_verbose(f"LiteLLM: Cache Updated, litellm.cache={litellm.cache}")
+    print_verbose(f"LiteLLM Cache: {vars(litellm.cache)}")
+def disable_cache():
+    """
+    Disable the cache used by LiteLLM.
+    This function disables the cache used by the LiteLLM module. It removes the cache-related callbacks from the input_callback, success_callback, and _async_success_callback lists. It also sets the litellm.cache attribute to None.
+    Parameters:
+    None
+    Returns:
+    None
+    """
+    from contextlib import suppress
+    print_verbose("LiteLLM: Disabling Cache")
+    with suppress(ValueError):
+        litellm.input_callback.remove("cache")
+        litellm.success_callback.remove("cache")
+        litellm._async_success_callback.remove("cache")
+    litellm.cache = None
+    print_verbose(f"LiteLLM: Cache disabled, litellm.cache={litellm.cache}")

litellm/cost.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "gpt-3.5-turbo-0613": 0.00015000000000000001,
+    "claude-2": 0.00016454,
+    "gpt-4-0613": 0.015408
+}

litellm/deprecated_litellm_server/.env.template ADDED Viewed

	@@ -0,0 +1,43 @@

+# # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
+# AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
+# OPENAI_API_KEY = ""
+# HUGGINGFACE_API_KEY=""
+# TOGETHERAI_API_KEY=""
+# REPLICATE_API_KEY=""
+# ## bedrock / sagemaker
+# AWS_ACCESS_KEY_ID = ""
+# AWS_SECRET_ACCESS_KEY = ""
+# AZURE_API_KEY = ""
+# AZURE_API_BASE = ""
+# AZURE_API_VERSION = ""
+# ANTHROPIC_API_KEY = ""
+# COHERE_API_KEY = ""
+# ## CONFIG FILE ##
+# # CONFIG_FILE_PATH = ""  # uncomment to point to config file
+# ## LOGGING ##
+# SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
+# ### LANGFUSE
+# LANGFUSE_PUBLIC_KEY = ""
+# LANGFUSE_SECRET_KEY = ""
+# # Optional, defaults to https://cloud.langfuse.com
+# LANGFUSE_HOST = "" # optional
+# ## CACHING ##
+# ### REDIS
+# REDIS_HOST = ""
+# REDIS_PORT = ""
+# REDIS_PASSWORD = ""

litellm/deprecated_litellm_server/Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+# FROM python:3.10
+# ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
+# COPY . /app
+# WORKDIR /app
+# RUN pip install -r requirements.txt
+# EXPOSE $PORT
+# CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10

litellm/deprecated_litellm_server/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # litellm-server [experimental]
2	+
3	+ Deprecated. See litellm/proxy

litellm/deprecated_litellm_server/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # from .main import *
2	+ # from .server_utils import *

litellm/deprecated_litellm_server/main.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# import os, traceback
+# from fastapi import FastAPI, Request, HTTPException
+# from fastapi.routing import APIRouter
+# from fastapi.responses import StreamingResponse, FileResponse
+# from fastapi.middleware.cors import CORSMiddleware
+# import json, sys
+# from typing import Optional
+# sys.path.insert(
+#     0, os.path.abspath("../")
+# )  # Adds the parent directory to the system path - for litellm local dev
+# import litellm
+# try:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# except ImportError:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# import dotenv
+# dotenv.load_dotenv() # load env variables
+# app = FastAPI(docs_url="/", title="LiteLLM API")
+# router = APIRouter()
+# origins = ["*"]
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=origins,
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# #### GLOBAL VARIABLES ####
+# llm_router: Optional[litellm.Router] = None
+# llm_model_list: Optional[list] = None
+# server_settings: Optional[dict] = None
+# set_callbacks() # sets litellm callbacks for logging if they exist in the environment
+# if "CONFIG_FILE_PATH" in os.environ:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
+# else:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
+# #### API ENDPOINTS ####
+# @router.get("/v1/models")
+# @router.get("/models")  # if project requires model list
+# def model_list():
+#     all_models = litellm.utils.get_valid_models()
+#     if llm_model_list:
+#         all_models += llm_model_list
+#     return dict(
+#         data=[
+#             {
+#                 "id": model,
+#                 "object": "model",
+#                 "created": 1677610602,
+#                 "owned_by": "openai",
+#             }
+#             for model in all_models
+#         ],
+#         object="list",
+#     )
+# # for streaming
+# def data_generator(response):
+#     for chunk in response:
+#         yield f"data: {json.dumps(chunk)}\n\n"
+# @router.post("/v1/completions")
+# @router.post("/completions")
+# async def completion(request: Request):
+#     data = await request.json()
+#     response = litellm.completion(
+#         **data
+#     )
+#     if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#             return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#     return response
+# @router.post("/v1/embeddings")
+# @router.post("/embeddings")
+# async def embedding(request: Request):
+#     try:
+#         data = await request.json()
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
+#             api_key = request.headers.get("authorization")
+#             api_key = api_key.replace("Bearer", "").strip() # type: ignore
+#             if len(api_key.strip()) > 0:
+#                 api_key = api_key
+#                 data["api_key"] = api_key
+#         response = litellm.embedding(
+#             **data
+#         )
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.post("/v1/chat/completions")
+# @router.post("/chat/completions")
+# @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
+# async def chat_completion(request: Request, model: Optional[str] = None):
+#     global llm_model_list, server_settings
+#     try:
+#         data = await request.json()
+#         server_model = server_settings.get("completion_model", None) if server_settings else None
+#         data["model"] = server_model or model or data["model"]
+#         ## CHECK KEYS ##
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         # env_validation = litellm.validate_environment(model=data["model"])
+#         # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
+#         #     if "authorization" in request.headers:
+#         #         api_key = request.headers.get("authorization")
+#         #     elif "api-key" in request.headers:
+#         #         api_key = request.headers.get("api-key")
+#         #     print(f"api_key in headers: {api_key}")
+#         #     if " " in api_key:
+#         #         api_key = api_key.split(" ")[1]
+#         #     print(f"api_key split: {api_key}")
+#         #     if len(api_key) > 0:
+#         #         api_key = api_key
+#         #         data["api_key"] = api_key
+#         #         print(f"api_key in data: {api_key}")
+#         ## CHECK CONFIG ##
+#         if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
+#             for m in llm_model_list:
+#                 if data["model"] == m["model_name"]:
+#                     for key, value in m["litellm_params"].items():
+#                         data[key] = value
+#                     break
+#         response = litellm.completion(
+#             **data
+#         )
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         # return {"error": error_msg}
+#         raise HTTPException(status_code=500, detail=error_msg)
+# @router.post("/router/completions")
+# async def router_completion(request: Request):
+#     global llm_router
+#     try:
+#         data = await request.json()
+#         if "model_list" in data:
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None:
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+#         # openai.ChatCompletion.create replacement
+#         response = await llm_router.acompletion(model="gpt-3.5-turbo",
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.post("/router/embedding")
+# async def router_embedding(request: Request):
+#     global llm_router
+#     try:
+#         data = await request.json()
+#         if "model_list" in data:
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None:
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+#         response = await llm_router.aembedding(model="gpt-3.5-turbo",  # type: ignore
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.get("/")
+# async def home(request: Request):
+#     return "LiteLLM: RUNNING"
+# app.include_router(router)

litellm/deprecated_litellm_server/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# openai
+# fastapi
+# uvicorn
+# boto3
+# litellm
+# python-dotenv
+# redis

litellm/deprecated_litellm_server/server_utils.py ADDED Viewed

	@@ -0,0 +1,85 @@

+# import os, litellm
+# import pkg_resources
+# import dotenv
+# dotenv.load_dotenv() # load env variables
+# def print_verbose(print_statement):
+#     pass
+# def get_package_version(package_name):
+#     try:
+#         package = pkg_resources.get_distribution(package_name)
+#         return package.version
+#     except pkg_resources.DistributionNotFound:
+#         return None
+# # Usage example
+# package_name = "litellm"
+# version = get_package_version(package_name)
+# if version:
+#     print_verbose(f"The version of {package_name} is {version}")
+# else:
+#     print_verbose(f"{package_name} is not installed")
+# import yaml
+# import dotenv
+# from typing import Optional
+# dotenv.load_dotenv() # load env variables
+# def set_callbacks():
+#     ## LOGGING
+#     if len(os.getenv("SET_VERBOSE", "")) > 0:
+#         if os.getenv("SET_VERBOSE") == "True":
+#             litellm.set_verbose = True
+#             print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
+#         else:
+#             litellm.set_verbose = False
+#     ### LANGFUSE
+#     if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
+#         litellm.success_callback = ["langfuse"]
+#         print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
+#     ## CACHING
+#     ### REDIS
+#     # if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
+#     #     print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
+#     #     from litellm.caching import Cache
+#     #     litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
+#     #     print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
+# def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
+#     config = {}
+#     server_settings  = {}
+#     try:
+#         if os.path.exists(config_file_path): # type: ignore
+#             with open(config_file_path, 'r') as file: # type: ignore
+#                 config = yaml.safe_load(file)
+#         else:
+#             pass
+#     except:
+#         pass
+#     ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
+#     server_settings = config.get("server_settings", None)
+#     if server_settings:
+#         server_settings = server_settings
+#     ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
+#     litellm_settings = config.get('litellm_settings', None)
+#     if litellm_settings:
+#         for key, value in litellm_settings.items():
+#             setattr(litellm, key, value)
+#     ## MODEL LIST
+#     model_list = config.get('model_list', None)
+#     if model_list:
+#         router = litellm.Router(model_list=model_list)
+#     ## ENVIRONMENT VARIABLES
+#     environment_variables = config.get('environment_variables', None)
+#     if environment_variables:
+#         for key, value in environment_variables.items():
+#             os.environ[key] = value
+#     return router, model_list, server_settings

litellm/exceptions.py ADDED Viewed

	@@ -0,0 +1,200 @@

+# +-----------------------------------------------+
+# |                                               |
+# |           Give Feedback / Get Help            |
+# | https://github.com/BerriAI/litellm/issues/new |
+# |                                               |
+# +-----------------------------------------------+
+#
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+## LiteLLM versions of the OpenAI Exception Types
+from openai import (
+    AuthenticationError,
+    BadRequestError,
+    NotFoundError,
+    RateLimitError,
+    APIStatusError,
+    OpenAIError,
+    APIError,
+    APITimeoutError,
+    APIConnectionError,
+    APIResponseValidationError,
+    UnprocessableEntityError,
+)
+import httpx
+class AuthenticationError(AuthenticationError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 401
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+# raise when invalid models passed, example gpt-8
+class NotFoundError(NotFoundError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 404
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+class BadRequestError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+class UnprocessableEntityError(UnprocessableEntityError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 422
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+class Timeout(APITimeoutError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 408
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        super().__init__(
+            request=request
+        )  # Call the base class constructor with the parameters it needs
+class RateLimitError(RateLimitError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 429
+        self.message = message
+        self.llm_provider = llm_provider
+        self.modle = model
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+# sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
+class ContextWindowExceededError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            message=self.message,
+            model=self.model,  # type: ignore
+            llm_provider=self.llm_provider,  # type: ignore
+            response=response,
+        )  # Call the base class constructor with the parameters it needs
+class ContentPolicyViolationError(BadRequestError):  # type: ignore
+    #  Error code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Image descriptions generated from your prompt may contain text that is not allowed by our safety system. If you believe this was done in error, your request may succeed if retried, or by adjusting your prompt.', 'param': None, 'type': 'invalid_request_error'}}
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            message=self.message,
+            model=self.model,  # type: ignore
+            llm_provider=self.llm_provider,  # type: ignore
+            response=response,
+        )  # Call the base class constructor with the parameters it needs
+class ServiceUnavailableError(APIStatusError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 503
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(
+            self.message, response=response, body=None
+        )  # Call the base class constructor with the parameters it needs
+# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
+class APIError(APIError):  # type: ignore
+    def __init__(
+        self, status_code, message, llm_provider, model, request: httpx.Request
+    ):
+        self.status_code = status_code
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(self.message, request=request, body=None)  # type: ignore
+# raised if an invalid request (not get, delete, put, post) is made
+class APIConnectionError(APIConnectionError):  # type: ignore
+    def __init__(self, message, llm_provider, model, request: httpx.Request):
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        self.status_code = 500
+        super().__init__(message=self.message, request=request)
+# raised if an invalid request (not get, delete, put, post) is made
+class APIResponseValidationError(APIResponseValidationError):  # type: ignore
+    def __init__(self, message, llm_provider, model):
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        response = httpx.Response(status_code=500, request=request)
+        super().__init__(response=response, body=None, message=message)
+class OpenAIError(OpenAIError):  # type: ignore
+    def __init__(self, original_exception):
+        self.status_code = original_exception.http_status
+        super().__init__(
+            http_body=original_exception.http_body,
+            http_status=original_exception.http_status,
+            json_body=original_exception.json_body,
+            headers=original_exception.headers,
+            code=original_exception.code,
+        )
+        self.llm_provider = "openai"
+class BudgetExceededError(Exception):
+    def __init__(self, current_cost, max_budget):
+        self.current_cost = current_cost
+        self.max_budget = max_budget
+        message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
+        super().__init__(message)
+## DEPRECATED ##
+class InvalidRequestError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, f"{self.model}"
+        )  # Call the base class constructor with the parameters it needs

litellm/integrations/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import *

litellm/integrations/aispend.py ADDED Viewed

	@@ -0,0 +1,177 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime
+model_cost = {
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
+}
+class AISpendLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
+        self.api_key = os.getenv("AISPEND_API_KEY")
+    def price_calculator(self, model, response_obj, start_time, end_time):
+        # try and find if the model is in the model_cost map
+        # else default to the average of the costs
+        prompt_tokens_cost_usd_dollar = 0
+        completion_tokens_cost_usd_dollar = 0
+        if model in model_cost:
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
+            # replicate models are charged based on time
+            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
+            model_run_time = end_time - start_time  # assuming time in seconds
+            cost_usd_dollar = model_run_time * 0.0032
+            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
+            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
+        else:
+            # calculate average input cost
+            input_cost_sum = 0
+            output_cost_sum = 0
+            for model in model_cost:
+                input_cost_sum += model_cost[model]["input_cost_per_token"]
+                output_cost_sum += model_cost[model]["output_cost_per_token"]
+            avg_input_cost = input_cost_sum / len(model_cost.keys())
+            avg_output_cost = output_cost_sum / len(model_cost.keys())
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    def log_event(self, model, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        try:
+            print_verbose(
+                f"AISpend Logging - Enters logging function for model {model}"
+            )
+            url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
+            response_timestamp = datetime.datetime.fromtimestamp(
+                int(response_obj["created"])
+            ).strftime("%Y-%m-%d")
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
+            completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
+            data = [
+                {
+                    "requests": 1,
+                    "requests_context": 1,
+                    "context_tokens": response_obj["usage"]["prompt_tokens"],
+                    "requests_generated": 1,
+                    "generated_tokens": response_obj["usage"]["completion_tokens"],
+                    "recorded_date": response_timestamp,
+                    "model_id": response_obj["model"],
+                    "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
+                    "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
+                }
+            ]
+            print_verbose(f"AISpend Logging - final data object: {data}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/berrispend.py ADDED Viewed

	@@ -0,0 +1,184 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime
+model_cost = {
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
+}
+class BerriSpendLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
+    def price_calculator(self, model, response_obj, start_time, end_time):
+        # try and find if the model is in the model_cost map
+        # else default to the average of the costs
+        prompt_tokens_cost_usd_dollar = 0
+        completion_tokens_cost_usd_dollar = 0
+        if model in model_cost:
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
+            # replicate models are charged based on time
+            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
+            model_run_time = end_time - start_time  # assuming time in seconds
+            cost_usd_dollar = model_run_time * 0.0032
+            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
+            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
+        else:
+            # calculate average input cost
+            input_cost_sum = 0
+            output_cost_sum = 0
+            for model in model_cost:
+                input_cost_sum += model_cost[model]["input_cost_per_token"]
+                output_cost_sum += model_cost[model]["output_cost_per_token"]
+            avg_input_cost = input_cost_sum / len(model_cost.keys())
+            avg_output_cost = output_cost_sum / len(model_cost.keys())
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    def log_event(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
+        # Method definition
+        try:
+            print_verbose(
+                f"BerriSpend Logging - Enters logging function for model {model}"
+            )
+            url = f"https://berrispend.berri.ai/spend"
+            headers = {"Content-Type": "application/json"}
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            total_cost = (
+                prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+            )
+            response_time = (end_time - start_time).total_seconds()
+            if "response" in response_obj:
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "response": response_obj["choices"][0]["message"]["content"],
+                        "account_id": self.account_id,
+                    }
+                ]
+            elif "error" in response_obj:
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "error": response_obj["error"],
+                        "account_id": self.account_id,
+                    }
+                ]
+            print_verbose(f"BerriSpend Logging - final data object: {data}")
+            response = requests.post(url, headers=headers, json=data)
+        except:
+            # traceback.print_exc()
+            print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/custom_logger.py ADDED Viewed

	@@ -0,0 +1,130 @@

+#### What this does ####
+#    On success, logs events to Promptlayer
+import dotenv, os
+import requests
+from litellm.proxy._types import UserAPIKeyAuth
+from litellm.caching import DualCache
+from typing import Literal
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class CustomLogger:  # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
+    # Class variables or attributes
+    def __init__(self):
+        pass
+    def log_pre_api_call(self, model, messages, kwargs):
+        pass
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    #### ASYNC ####
+    async def async_log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    async def async_log_pre_api_call(self, model, messages, kwargs):
+        pass
+    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    #### CALL HOOKS - proxy only ####
+    """
+    Control the modify incoming / outgoung data before calling the model
+    """
+    async def async_pre_call_hook(
+        self,
+        user_api_key_dict: UserAPIKeyAuth,
+        cache: DualCache,
+        data: dict,
+        call_type: Literal["completion", "embeddings"],
+    ):
+        pass
+    async def async_post_call_failure_hook(
+        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+    ):
+        pass
+    #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
+    def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
+        try:
+            kwargs["model"] = model
+            kwargs["messages"] = messages
+            kwargs["log_event_type"] = "pre_api_call"
+            callback_func(
+                kwargs,
+            )
+            print_verbose(f"Custom Logger - model call details: {kwargs}")
+        except:
+            traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+    async def async_log_input_event(
+        self, model, messages, kwargs, print_verbose, callback_func
+    ):
+        try:
+            kwargs["model"] = model
+            kwargs["messages"] = messages
+            kwargs["log_event_type"] = "pre_api_call"
+            await callback_func(
+                kwargs,
+            )
+            print_verbose(f"Custom Logger - model call details: {kwargs}")
+        except:
+            traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+    def log_event(
+        self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
+    ):
+        # Method definition
+        try:
+            kwargs["log_event_type"] = "post_api_call"
+            callback_func(
+                kwargs,  # kwargs to func
+                response_obj,
+                start_time,
+                end_time,
+            )
+            print_verbose(f"Custom Logger - final response object: {response_obj}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+            pass
+    async def async_log_event(
+        self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func
+    ):
+        # Method definition
+        try:
+            kwargs["log_event_type"] = "post_api_call"
+            await callback_func(
+                kwargs,  # kwargs to func
+                response_obj,
+                start_time,
+                end_time,
+            )
+            print_verbose(f"Custom Logger - final response object: {response_obj}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+            pass

litellm/integrations/dynamodb.py ADDED Viewed

	@@ -0,0 +1,92 @@

+#### What this does ####
+#    On success + failure, log events to Supabase
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime, subprocess, sys
+import litellm, uuid
+from litellm._logging import print_verbose
+class DyanmoDBLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        import boto3
+        self.dynamodb = boto3.resource(
+            "dynamodb", region_name=os.environ["AWS_REGION_NAME"]
+        )
+        if litellm.dynamodb_table_name is None:
+            raise ValueError(
+                "LiteLLM Error, trying to use DynamoDB but not table name passed. Create a table and set `litellm.dynamodb_table_name=<your-table>`"
+            )
+        self.table_name = litellm.dynamodb_table_name
+    async def _async_log_event(
+        self, kwargs, response_obj, start_time, end_time, print_verbose
+    ):
+        self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        try:
+            print_verbose(
+                f"DynamoDB Logging - Enters logging function for model {kwargs}"
+            )
+            # construct payload to send to DynamoDB
+            # follows the same params as langfuse.py
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )  # if litellm_params['metadata'] == None
+            messages = kwargs.get("messages")
+            optional_params = kwargs.get("optional_params", {})
+            call_type = kwargs.get("call_type", "litellm.completion")
+            usage = response_obj["usage"]
+            id = response_obj.get("id", str(uuid.uuid4()))
+            # Build the initial payload
+            payload = {
+                "id": id,
+                "call_type": call_type,
+                "startTime": start_time,
+                "endTime": end_time,
+                "model": kwargs.get("model", ""),
+                "user": kwargs.get("user", ""),
+                "modelParameters": optional_params,
+                "messages": messages,
+                "response": response_obj,
+                "usage": usage,
+                "metadata": metadata,
+            }
+            # Ensure everything in the payload is converted to str
+            for key, value in payload.items():
+                try:
+                    payload[key] = str(value)
+                except:
+                    # non blocking if it can't cast to a str
+                    pass
+            print_verbose(f"\nDynamoDB Logger - Logging payload = {payload}")
+            # put data in dyanmo DB
+            table = self.dynamodb.Table(self.table_name)
+            # Assuming log_data is a dictionary with log information
+            response = table.put_item(Item=payload)
+            print_verbose(f"Response from DynamoDB:{str(response)}")
+            print_verbose(
+                f"DynamoDB Layer Logging - final response object: {response_obj}"
+            )
+            return response
+        except:
+            traceback.print_exc()
+            print_verbose(f"DynamoDB Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/helicone.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#### What this does ####
+#    On success, logs events to Helicone
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class HeliconeLogger:
+    # Class variables or attributes
+    helicone_model_list = ["gpt", "claude"]
+    def __init__(self):
+        # Instance variables
+        self.provider_url = "https://api.openai.com/v1"
+        self.key = os.getenv("HELICONE_API_KEY")
+    def claude_mapping(self, model, messages, response_obj):
+        from anthropic import HUMAN_PROMPT, AI_PROMPT
+        prompt = f"{HUMAN_PROMPT}"
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += f"{HUMAN_PROMPT}{message['content']}"
+                else:
+                    prompt += f"{AI_PROMPT}{message['content']}"
+            else:
+                prompt += f"{HUMAN_PROMPT}{message['content']}"
+        prompt += f"{AI_PROMPT}"
+        claude_provider_request = {"model": model, "prompt": prompt}
+        claude_response_obj = {
+            "completion": response_obj["choices"][0]["message"]["content"],
+            "model": model,
+            "stop_reason": "stop_sequence",
+        }
+        return claude_provider_request, claude_response_obj
+    def log_success(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
+        # Method definition
+        try:
+            print_verbose(
+                f"Helicone Logging - Enters logging function for model {model}"
+            )
+            model = (
+                model
+                if any(
+                    accepted_model in model
+                    for accepted_model in self.helicone_model_list
+                )
+                else "gpt-3.5-turbo"
+            )
+            provider_request = {"model": model, "messages": messages}
+            if "claude" in model:
+                provider_request, response_obj = self.claude_mapping(
+                    model=model, messages=messages, response_obj=response_obj
+                )
+            providerResponse = {
+                "json": response_obj,
+                "headers": {"openai-version": "2020-10-01"},
+                "status": 200,
+            }
+            # Code to be executed
+            url = "https://api.hconeai.com/oai/v1/log"
+            headers = {
+                "Authorization": f"Bearer {self.key}",
+                "Content-Type": "application/json",
+            }
+            start_time_seconds = int(start_time.timestamp())
+            start_time_milliseconds = int(
+                (start_time.timestamp() - start_time_seconds) * 1000
+            )
+            end_time_seconds = int(end_time.timestamp())
+            end_time_milliseconds = int(
+                (end_time.timestamp() - end_time_seconds) * 1000
+            )
+            data = {
+                "providerRequest": {
+                    "url": self.provider_url,
+                    "json": provider_request,
+                    "meta": {"Helicone-Auth": f"Bearer {self.key}"},
+                },
+                "providerResponse": providerResponse,
+                "timing": {
+                    "startTime": {
+                        "seconds": start_time_seconds,
+                        "milliseconds": start_time_milliseconds,
+                    },
+                    "endTime": {
+                        "seconds": end_time_seconds,
+                        "milliseconds": end_time_milliseconds,
+                    },
+                },  # {"seconds": .., "milliseconds": ..}
+            }
+            response = requests.post(url, headers=headers, json=data)
+            if response.status_code == 200:
+                print_verbose("Helicone Logging - Success!")
+            else:
+                print_verbose(
+                    f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
+                )
+                print_verbose(f"Helicone Logging - Error {response.text}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/langfuse.py ADDED Viewed

	@@ -0,0 +1,191 @@

+#### What this does ####
+#    On success, logs events to Langfuse
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+from packaging.version import Version
+class LangFuseLogger:
+    # Class variables or attributes
+    def __init__(self):
+        try:
+            from langfuse import Langfuse
+        except Exception as e:
+            raise Exception(
+                f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\033[0m"
+            )
+        # Instance variables
+        self.secret_key = os.getenv("LANGFUSE_SECRET_KEY")
+        self.public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
+        self.langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
+        self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
+        self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
+        self.Langfuse = Langfuse(
+            public_key=self.public_key,
+            secret_key=self.secret_key,
+            host=self.langfuse_host,
+            release=self.langfuse_release,
+            debug=self.langfuse_debug,
+        )
+    def log_event(
+        self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
+    ):
+        # Method definition
+        try:
+            print_verbose(
+                f"Langfuse Logging - Enters logging function for model {kwargs}"
+            )
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )  # if litellm_params['metadata'] == None
+            prompt = [kwargs.get("messages")]
+            optional_params = kwargs.get("optional_params", {})
+            optional_params.pop("functions", None)
+            optional_params.pop("tools", None)
+            # langfuse only accepts str, int, bool, float for logging
+            for param, value in optional_params.items():
+                if not isinstance(value, (str, int, bool, float)):
+                    try:
+                        optional_params[param] = str(value)
+                    except:
+                        # if casting value to str fails don't block logging
+                        pass
+            # end of processing langfuse ########################
+            input = prompt
+            output = response_obj["choices"][0]["message"].json()
+            print_verbose(
+                f"OUTPUT IN LANGFUSE: {output}; original: {response_obj['choices'][0]['message']}"
+            )
+            self._log_langfuse_v2(
+                user_id,
+                metadata,
+                output,
+                start_time,
+                end_time,
+                kwargs,
+                optional_params,
+                input,
+                response_obj,
+            ) if self._is_langfuse_v2() else self._log_langfuse_v1(
+                user_id,
+                metadata,
+                output,
+                start_time,
+                end_time,
+                kwargs,
+                optional_params,
+                input,
+                response_obj,
+            )
+            self.Langfuse.flush()
+            print_verbose(
+                f"Langfuse Layer Logging - final response object: {response_obj}"
+            )
+        except:
+            traceback.print_exc()
+            print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
+            pass
+    async def _async_log_event(
+        self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
+    ):
+        self.log_event(
+            kwargs, response_obj, start_time, end_time, user_id, print_verbose
+        )
+    def _is_langfuse_v2(self):
+        import langfuse
+        return Version(langfuse.version.__version__) >= Version("2.0.0")
+    def _log_langfuse_v1(
+        self,
+        user_id,
+        metadata,
+        output,
+        start_time,
+        end_time,
+        kwargs,
+        optional_params,
+        input,
+        response_obj,
+    ):
+        from langfuse.model import CreateTrace, CreateGeneration
+        print(
+            "Please upgrade langfuse to v2.0.0 or higher: https://github.com/langfuse/langfuse-python/releases/tag/v2.0.1"
+        )
+        trace = self.Langfuse.trace(
+            CreateTrace(
+                name=metadata.get("generation_name", "litellm-completion"),
+                input=input,
+                output=output,
+                userId=user_id,
+            )
+        )
+        trace.generation(
+            CreateGeneration(
+                name=metadata.get("generation_name", "litellm-completion"),
+                startTime=start_time,
+                endTime=end_time,
+                model=kwargs["model"],
+                modelParameters=optional_params,
+                input=input,
+                output=output,
+                usage={
+                    "prompt_tokens": response_obj["usage"]["prompt_tokens"],
+                    "completion_tokens": response_obj["usage"]["completion_tokens"],
+                },
+                metadata=metadata,
+            )
+        )
+    def _log_langfuse_v2(
+        self,
+        user_id,
+        metadata,
+        output,
+        start_time,
+        end_time,
+        kwargs,
+        optional_params,
+        input,
+        response_obj,
+    ):
+        trace = self.Langfuse.trace(
+            name=metadata.get("generation_name", "litellm-completion"),
+            input=input,
+            output=output,
+            user_id=metadata.get("trace_user_id", user_id),
+            id=metadata.get("trace_id", None),
+        )
+        trace.generation(
+            name=metadata.get("generation_name", "litellm-completion"),
+            id=metadata.get("generation_id", None),
+            startTime=start_time,
+            endTime=end_time,
+            model=kwargs["model"],
+            modelParameters=optional_params,
+            input=input,
+            output=output,
+            usage={
+                "prompt_tokens": response_obj["usage"]["prompt_tokens"],
+                "completion_tokens": response_obj["usage"]["completion_tokens"],
+            },
+            metadata=metadata,
+        )

litellm/integrations/langsmith.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#### What this does ####
+#    On success, logs events to Langsmith
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class LangsmithLogger:
+    # Class variables or attributes
+    def __init__(self):
+        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
+        metadata = {}
+        if "litellm_params" in kwargs:
+            metadata = kwargs["litellm_params"].get("metadata", {})
+        # set project name and run_name for langsmith logging
+        # users can pass project_name and run name to litellm.completion()
+        # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
+        # if not set litellm will use default project_name = litellm-completion, run_name = LLMRun
+        project_name = metadata.get("project_name", "litellm-completion")
+        run_name = metadata.get("run_name", "LLMRun")
+        print_verbose(
+            f"Langsmith Logging - project_name: {project_name}, run_name {run_name}"
+        )
+        try:
+            print_verbose(
+                f"Langsmith Logging - Enters logging function for model {kwargs}"
+            )
+            import requests
+            import datetime
+            from datetime import timezone
+            try:
+                start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
+                end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
+            except:
+                start_time = datetime.datetime.utcnow().isoformat()
+                end_time = datetime.datetime.utcnow().isoformat()
+            # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
+            new_kwargs = {}
+            for key in kwargs:
+                value = kwargs[key]
+                if key == "start_time" or key == "end_time":
+                    pass
+                elif type(value) != dict:
+                    new_kwargs[key] = value
+            requests.post(
+                "https://api.smith.langchain.com/runs",
+                json={
+                    "name": run_name,
+                    "run_type": "llm",  # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
+                    "inputs": {**new_kwargs},
+                    "outputs": response_obj.json(),
+                    "session_name": project_name,
+                    "start_time": start_time,
+                    "end_time": end_time,
+                },
+                headers={"x-api-key": self.langsmith_api_key},
+            )
+            print_verbose(
+                f"Langsmith Layer Logging - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/litedebugger.py ADDED Viewed

	@@ -0,0 +1,262 @@

+import requests, traceback, json, os
+import types
+class LiteDebugger:
+    user_email = None
+    dashboard_url = None
+    def __init__(self, email=None):
+        self.api_url = "https://api.litellm.ai/debugger"
+        self.validate_environment(email)
+        pass
+    def validate_environment(self, email):
+        try:
+            self.user_email = (
+                email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL")
+            )
+            if (
+                self.user_email == None
+            ):  # if users are trying to use_client=True but token not set
+                raise ValueError(
+                    "litellm.use_client = True but no token or email passed. Please set it in litellm.token"
+                )
+            self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
+            try:
+                print(
+                    f"\033[92mHere's your LiteLLM Dashboard 👉 \033[94m\033[4m{self.dashboard_url}\033[0m"
+                )
+            except:
+                print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
+            if self.user_email == None:
+                raise ValueError(
+                    "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
+                )
+        except Exception as e:
+            raise ValueError(
+                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
+            )
+    def input_log_event(
+        self,
+        model,
+        messages,
+        end_user,
+        litellm_call_id,
+        call_type,
+        print_verbose,
+        litellm_params,
+        optional_params,
+    ):
+        print_verbose(
+            f"LiteDebugger: Pre-API Call Logging for call id {litellm_call_id}"
+        )
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
+            )
+            def remove_key_value(dictionary, key):
+                new_dict = dictionary.copy()  # Create a copy of the original dictionary
+                new_dict.pop(key)  # Remove the specified key-value pair from the copy
+                return new_dict
+            updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
+            if call_type == "embedding":
+                for (
+                    message
+                ) in (
+                    messages
+                ):  # assuming the input is a list as required by the embedding function
+                    litellm_data_obj = {
+                        "model": model,
+                        "messages": [{"role": "user", "content": message}],
+                        "end_user": end_user,
+                        "status": "initiated",
+                        "litellm_call_id": litellm_call_id,
+                        "user_email": self.user_email,
+                        "litellm_params": updated_litellm_params,
+                        "optional_params": optional_params,
+                    }
+                    print_verbose(
+                        f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
+                print_verbose(f"LiteDebugger: embedding api response - {response.text}")
+            elif call_type == "completion":
+                litellm_data_obj = {
+                    "model": model,
+                    "messages": messages
+                    if isinstance(messages, list)
+                    else [{"role": "user", "content": messages}],
+                    "end_user": end_user,
+                    "status": "initiated",
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                    "litellm_params": updated_litellm_params,
+                    "optional_params": optional_params,
+                }
+                print_verbose(
+                    f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+                print_verbose(
+                    f"LiteDebugger: completion api response - {response.text}"
+                )
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+            pass
+    def post_call_log_event(
+        self, original_response, litellm_call_id, print_verbose, call_type, stream
+    ):
+        print_verbose(
+            f"LiteDebugger: Post-API Call Logging for call id {litellm_call_id}"
+        )
+        try:
+            if call_type == "embedding":
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {
+                        "original_response": str(
+                            original_response["data"][0]["embedding"][:5]
+                        )
+                    },  # don't store the entire vector
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and not stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": original_response},
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {
+                        "original_response": "Streamed response"
+                        if isinstance(original_response, types.GeneratorType)
+                        else original_response
+                    },
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
+            response = requests.post(
+                url=self.api_url,
+                headers={"content-type": "application/json"},
+                data=json.dumps(litellm_data_obj),
+            )
+            print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+    def log_event(
+        self,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,
+        call_type,
+        stream=False,
+    ):
+        print_verbose(
+            f"LiteDebugger: Success/Failure Call Logging for call id {litellm_call_id}"
+        )
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
+            )
+            total_cost = 0  # [TODO] implement cost tracking
+            response_time = (end_time - start_time).total_seconds()
+            if call_type == "completion" and stream == False:
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "total_cost": total_cost,
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+            elif call_type == "embedding":
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "total_cost": total_cost,
+                    "response": str(response_obj["data"][0]["embedding"][:5]),
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+            elif call_type == "completion" and stream == True:
+                if len(response_obj["content"]) > 0:  # don't log the empty strings
+                    litellm_data_obj = {
+                        "response_time": response_time,
+                        "total_cost": total_cost,
+                        "response": response_obj["content"],
+                        "litellm_call_id": litellm_call_id,
+                        "status": "success",
+                    }
+                    print_verbose(
+                        f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
+            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure",
+                    "user_email": self.user_email,
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+                print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+            pass

litellm/integrations/llmonitor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import datetime
+import traceback
+import dotenv
+import os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+# convert to {completion: xx, tokens: xx}
+def parse_usage(usage):
+    return {
+        "completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
+        "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
+    }
+def parse_messages(input):
+    if input is None:
+        return None
+    def clean_message(message):
+        # if is strin, return as is
+        if isinstance(message, str):
+            return message
+        if "message" in message:
+            return clean_message(message["message"])
+        text = message["content"]
+        if text == None:
+            text = message.get("function_call", None)
+        return {
+            "role": message["role"],
+            "text": text,
+        }
+    if isinstance(input, list):
+        if len(input) == 1:
+            return clean_message(input[0])
+        else:
+            return [clean_message(msg) for msg in input]
+    else:
+        return clean_message(input)
+class LLMonitorLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
+        self.app_id = os.getenv("LLMONITOR_APP_ID")
+    def log_event(
+        self,
+        type,
+        event,
+        run_id,
+        model,
+        print_verbose,
+        input=None,
+        user_id=None,
+        response_obj=None,
+        start_time=datetime.datetime.now(),
+        end_time=datetime.datetime.now(),
+        error=None,
+    ):
+        # Method definition
+        try:
+            print_verbose(f"LLMonitor Logging - Logging request for model {model}")
+            if response_obj:
+                usage = (
+                    parse_usage(response_obj["usage"])
+                    if "usage" in response_obj
+                    else None
+                )
+                output = response_obj["choices"] if "choices" in response_obj else None
+            else:
+                usage = None
+                output = None
+            if error:
+                error_obj = {"stack": error}
+            else:
+                error_obj = None
+            data = [
+                {
+                    "type": type,
+                    "name": model,
+                    "runId": run_id,
+                    "app": self.app_id,
+                    "event": "start",
+                    "timestamp": start_time.isoformat(),
+                    "userId": user_id,
+                    "input": parse_messages(input),
+                },
+                {
+                    "type": type,
+                    "runId": run_id,
+                    "app": self.app_id,
+                    "event": event,
+                    "error": error_obj,
+                    "timestamp": end_time.isoformat(),
+                    "userId": user_id,
+                    "output": parse_messages(output),
+                    "tokensUsage": usage,
+                },
+            ]
+            print_verbose(f"LLMonitor Logging - final data object: {data}")
+            response = requests.post(
+                self.api_url + "/api/report",
+                headers={"Content-Type": "application/json"},
+                json={"events": data},
+            )
+            print_verbose(f"LLMonitor Logging - response: {response}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/prompt_layer.py ADDED Viewed

	@@ -0,0 +1,72 @@

+#### What this does ####
+#    On success, logs events to Promptlayer
+import dotenv, os
+import requests
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class PromptLayerLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.key = os.getenv("PROMPTLAYER_API_KEY")
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        try:
+            new_kwargs = {}
+            new_kwargs["model"] = kwargs["model"]
+            new_kwargs["messages"] = kwargs["messages"]
+            # add kwargs["optional_params"] to new_kwargs
+            for optional_param in kwargs["optional_params"]:
+                new_kwargs[optional_param] = kwargs["optional_params"][optional_param]
+            print_verbose(
+                f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
+            )
+            request_response = requests.post(
+                "https://api.promptlayer.com/rest/track-request",
+                json={
+                    "function_name": "openai.ChatCompletion.create",
+                    "kwargs": new_kwargs,
+                    "tags": ["hello", "world"],
+                    "request_response": dict(response_obj),
+                    "request_start_time": int(start_time.timestamp()),
+                    "request_end_time": int(end_time.timestamp()),
+                    "api_key": self.key,
+                    # Optional params for PromptLayer
+                    # "prompt_id": "<PROMPT ID>",
+                    # "prompt_input_variables": "<Dictionary of variables for prompt>",
+                    # "prompt_version":1,
+                },
+            )
+            print_verbose(
+                f"Prompt Layer Logging: success - final response object: {request_response.text}"
+            )
+            response_json = request_response.json()
+            if "success" not in request_response.json():
+                raise Exception("Promptlayer did not successfully log the response!")
+            if "request_id" in response_json:
+                print(kwargs["litellm_params"]["metadata"])
+                if kwargs["litellm_params"]["metadata"] is not None:
+                    response = requests.post(
+                        "https://api.promptlayer.com/rest/track-metadata",
+                        json={
+                            "request_id": response_json["request_id"],
+                            "api_key": self.key,
+                            "metadata": kwargs["litellm_params"]["metadata"],
+                        },
+                    )
+                    print_verbose(
+                        f"Prompt Layer Logging: success - metadata post response object: {response.text}"
+                    )
+        except:
+            print_verbose(f"error: Prompt Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/s3.py ADDED Viewed

	@@ -0,0 +1,150 @@

+#### What this does ####
+#    On success + failure, log events to Supabase
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime, subprocess, sys
+import litellm, uuid
+from litellm._logging import print_verbose
+class S3Logger:
+    # Class variables or attributes
+    def __init__(
+        self,
+        s3_bucket_name=None,
+        s3_region_name=None,
+        s3_api_version=None,
+        s3_use_ssl=True,
+        s3_verify=None,
+        s3_endpoint_url=None,
+        s3_aws_access_key_id=None,
+        s3_aws_secret_access_key=None,
+        s3_aws_session_token=None,
+        s3_config=None,
+        **kwargs,
+    ):
+        import boto3
+        try:
+            print_verbose("in init s3 logger")
+            if litellm.s3_callback_params is not None:
+                # read in .env variables - example os.environ/AWS_BUCKET_NAME
+                for key, value in litellm.s3_callback_params.items():
+                    if type(value) is str and value.startswith("os.environ/"):
+                        litellm.s3_callback_params[key] = litellm.get_secret(value)
+                # now set s3 params from litellm.s3_logger_params
+                s3_bucket_name = litellm.s3_callback_params.get("s3_bucket_name")
+                s3_region_name = litellm.s3_callback_params.get("s3_region_name")
+                s3_api_version = litellm.s3_callback_params.get("s3_api_version")
+                s3_use_ssl = litellm.s3_callback_params.get("s3_use_ssl")
+                s3_verify = litellm.s3_callback_params.get("s3_verify")
+                s3_endpoint_url = litellm.s3_callback_params.get("s3_endpoint_url")
+                s3_aws_access_key_id = litellm.s3_callback_params.get(
+                    "s3_aws_access_key_id"
+                )
+                s3_aws_secret_access_key = litellm.s3_callback_params.get(
+                    "s3_aws_secret_access_key"
+                )
+                s3_aws_session_token = litellm.s3_callback_params.get(
+                    "s3_aws_session_token"
+                )
+                s3_config = litellm.s3_callback_params.get("s3_config")
+                # done reading litellm.s3_callback_params
+            self.bucket_name = s3_bucket_name
+            # Create an S3 client with custom endpoint URL
+            self.s3_client = boto3.client(
+                "s3",
+                region_name=s3_region_name,
+                endpoint_url=s3_endpoint_url,
+                api_version=s3_api_version,
+                use_ssl=s3_use_ssl,
+                verify=s3_verify,
+                aws_access_key_id=s3_aws_access_key_id,
+                aws_secret_access_key=s3_aws_secret_access_key,
+                aws_session_token=s3_aws_session_token,
+                config=s3_config,
+                **kwargs,
+            )
+        except Exception as e:
+            print_verbose(f"Got exception on init s3 client {str(e)}")
+            raise e
+    async def _async_log_event(
+        self, kwargs, response_obj, start_time, end_time, print_verbose
+    ):
+        self.log_event(kwargs, response_obj, start_time, end_time, print_verbose)
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        try:
+            print_verbose(f"s3 Logging - Enters logging function for model {kwargs}")
+            # construct payload to send to s3
+            # follows the same params as langfuse.py
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = (
+                litellm_params.get("metadata", {}) or {}
+            )  # if litellm_params['metadata'] == None
+            messages = kwargs.get("messages")
+            optional_params = kwargs.get("optional_params", {})
+            call_type = kwargs.get("call_type", "litellm.completion")
+            cache_hit = kwargs.get("cache_hit", False)
+            usage = response_obj["usage"]
+            id = response_obj.get("id", str(uuid.uuid4()))
+            # Build the initial payload
+            payload = {
+                "id": id,
+                "call_type": call_type,
+                "cache_hit": cache_hit,
+                "startTime": start_time,
+                "endTime": end_time,
+                "model": kwargs.get("model", ""),
+                "user": kwargs.get("user", ""),
+                "modelParameters": optional_params,
+                "messages": messages,
+                "response": response_obj,
+                "usage": usage,
+                "metadata": metadata,
+            }
+            # Ensure everything in the payload is converted to str
+            for key, value in payload.items():
+                try:
+                    payload[key] = str(value)
+                except:
+                    # non blocking if it can't cast to a str
+                    pass
+            s3_object_key = (
+                payload["id"] + "-time=" + str(start_time)
+            )  # we need the s3 key to include the time, so we log cache hits too
+            import json
+            payload = json.dumps(payload)
+            print_verbose(f"\ns3 Logger - Logging payload = {payload}")
+            response = self.s3_client.put_object(
+                Bucket=self.bucket_name,
+                Key=s3_object_key,
+                Body=payload,
+                ContentType="application/json",
+                ContentLanguage="en",
+                ContentDisposition=f'inline; filename="{key}.json"',
+            )
+            print_verbose(f"Response from s3:{str(response)}")
+            print_verbose(f"s3 Layer Logging - final response object: {response_obj}")
+            return response
+        except Exception as e:
+            traceback.print_exc()
+            print_verbose(f"s3 Layer Error - {str(e)}\n{traceback.format_exc()}")
+            pass

litellm/integrations/supabase.py ADDED Viewed

	@@ -0,0 +1,117 @@

+#### What this does ####
+#    On success + failure, log events to Supabase
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime, subprocess, sys
+import litellm
+class Supabase:
+    # Class variables or attributes
+    supabase_table_name = "request_logs"
+    def __init__(self):
+        # Instance variables
+        self.supabase_url = os.getenv("SUPABASE_URL")
+        self.supabase_key = os.getenv("SUPABASE_KEY")
+        try:
+            import supabase
+        except ImportError:
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
+            import supabase
+        self.supabase_client = supabase.create_client(
+            self.supabase_url, self.supabase_key
+        )
+    def input_log_event(
+        self, model, messages, end_user, litellm_call_id, print_verbose
+    ):
+        try:
+            print_verbose(
+                f"Supabase Logging - Enters input logging function for model {model}"
+            )
+            supabase_data_obj = {
+                "model": model,
+                "messages": messages,
+                "end_user": end_user,
+                "status": "initiated",
+                "litellm_call_id": litellm_call_id,
+            }
+            data, count = (
+                self.supabase_client.table(self.supabase_table_name)
+                .insert(supabase_data_obj)
+                .execute()
+            )
+            print_verbose(f"data: {data}")
+        except:
+            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
+            pass
+    def log_event(
+        self,
+        model,
+        messages,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,
+    ):
+        try:
+            print_verbose(
+                f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
+            )
+            total_cost = litellm.completion_cost(completion_response=response_obj)
+            response_time = (end_time - start_time).total_seconds()
+            if "choices" in response_obj:
+                supabase_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj, on_conflict="litellm_call_id")
+                    .execute()
+                )
+            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
+                supabase_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure",
+                }
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj, on_conflict="litellm_call_id")
+                    .execute()
+                )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/traceloop.py ADDED Viewed

	@@ -0,0 +1,114 @@

+class TraceloopLogger:
+    def __init__(self):
+        from traceloop.sdk.tracing.tracing import TracerWrapper
+        from traceloop.sdk import Traceloop
+        Traceloop.init(app_name="Litellm-Server", disable_batch=True)
+        self.tracer_wrapper = TracerWrapper()
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        from opentelemetry.trace import SpanKind
+        from opentelemetry.semconv.ai import SpanAttributes
+        try:
+            tracer = self.tracer_wrapper.get_tracer()
+            model = kwargs.get("model")
+            # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
+            if kwargs.get("litellm_params").get("custom_llm_provider") == "openai":
+                return
+            optional_params = kwargs.get("optional_params", {})
+            with tracer.start_as_current_span(
+                "litellm.completion",
+                kind=SpanKind.CLIENT,
+            ) as span:
+                if span.is_recording():
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
+                    )
+                    if "stop" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_CHAT_STOP_SEQUENCES,
+                            optional_params.get("stop"),
+                        )
+                    if "frequency_penalty" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_FREQUENCY_PENALTY,
+                            optional_params.get("frequency_penalty"),
+                        )
+                    if "presence_penalty" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_PRESENCE_PENALTY,
+                            optional_params.get("presence_penalty"),
+                        )
+                    if "top_p" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_TOP_P, optional_params.get("top_p")
+                        )
+                    if "tools" in optional_params or "functions" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_REQUEST_FUNCTIONS,
+                            optional_params.get(
+                                "tools", optional_params.get("functions")
+                            ),
+                        )
+                    if "user" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_USER, optional_params.get("user")
+                        )
+                    if "max_tokens" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_REQUEST_MAX_TOKENS,
+                            kwargs.get("max_tokens"),
+                        )
+                    if "temperature" in optional_params:
+                        span.set_attribute(
+                            SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
+                        )
+                    for idx, prompt in enumerate(kwargs.get("messages")):
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                            prompt.get("role"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
+                            prompt.get("content"),
+                        )
+                    span.set_attribute(
+                        SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
+                    )
+                    usage = response_obj.get("usage")
+                    if usage:
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                            usage.get("total_tokens"),
+                        )
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+                            usage.get("completion_tokens"),
+                        )
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+                            usage.get("prompt_tokens"),
+                        )
+                    for idx, choice in enumerate(response_obj.get("choices")):
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                            choice.get("finish_reason"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                            choice.get("message").get("role"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                            choice.get("message").get("content"),
+                        )
+        except Exception as e:
+            print_verbose(f"Traceloop Layer Error - {e}")

litellm/integrations/weights_biases.py ADDED Viewed

	@@ -0,0 +1,223 @@

+imported_openAIResponse = True
+try:
+    import io
+    import logging
+    import sys
+    from typing import Any, Dict, List, Optional, TypeVar
+    from wandb.sdk.data_types import trace_tree
+    if sys.version_info >= (3, 8):
+        from typing import Literal, Protocol
+    else:
+        from typing_extensions import Literal, Protocol
+    logger = logging.getLogger(__name__)
+    K = TypeVar("K", bound=str)
+    V = TypeVar("V")
+    class OpenAIResponse(Protocol[K, V]):  # type: ignore
+        # contains a (known) object attribute
+        object: Literal["chat.completion", "edit", "text_completion"]
+        def __getitem__(self, key: K) -> V:
+            ...  # pragma: no cover
+        def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
+            ...  # pragma: no cover
+    class OpenAIRequestResponseResolver:
+        def __call__(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> Optional[trace_tree.WBTraceTree]:
+            try:
+                if response["object"] == "edit":
+                    return self._resolve_edit(request, response, time_elapsed)
+                elif response["object"] == "text_completion":
+                    return self._resolve_completion(request, response, time_elapsed)
+                elif response["object"] == "chat.completion":
+                    return self._resolve_chat_completion(
+                        request, response, time_elapsed
+                    )
+                else:
+                    logger.info(f"Unknown OpenAI response object: {response['object']}")
+            except Exception as e:
+                logger.warning(f"Failed to resolve request/response: {e}")
+            return None
+        @staticmethod
+        def results_to_trace_tree(
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            results: List[trace_tree.Result],
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Converts the request, response, and results into a trace tree.
+            params:
+                request: The request dictionary
+                response: The response object
+                results: A list of results object
+                time_elapsed: The time elapsed in seconds
+            returns:
+                A wandb trace tree object.
+            """
+            start_time_ms = int(round(response["created"] * 1000))
+            end_time_ms = start_time_ms + int(round(time_elapsed * 1000))
+            span = trace_tree.Span(
+                name=f"{response.get('model', 'openai')}_{response['object']}_{response.get('created')}",
+                attributes=dict(response),  # type: ignore
+                start_time_ms=start_time_ms,
+                end_time_ms=end_time_ms,
+                span_kind=trace_tree.SpanKind.LLM,
+                results=results,
+            )
+            model_obj = {"request": request, "response": response, "_kind": "openai"}
+            return trace_tree.WBTraceTree(root_span=span, model_dict=model_obj)
+        def _resolve_edit(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Edit`."""
+            request_str = (
+                f"\n\n**Instruction**: {request['instruction']}\n\n"
+                f"**Input**: {request['input']}\n"
+            )
+            choices = [
+                f"\n\n**Edited**: {choice['text']}\n" for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _resolve_completion(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            request_str = f"\n\n**Prompt**: {request['prompt']}\n"
+            choices = [
+                f"\n\n**Completion**: {choice['text']}\n"
+                for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _resolve_chat_completion(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            prompt = io.StringIO()
+            for message in request["messages"]:
+                prompt.write(f"\n\n**{message['role']}**: {message['content']}\n")
+            request_str = prompt.getvalue()
+            choices = [
+                f"\n\n**{choice['message']['role']}**: {choice['message']['content']}\n"
+                for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _request_response_result_to_trace(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            request_str: str,
+            choices: List[str],
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            results = [
+                trace_tree.Result(
+                    inputs={"request": request_str},
+                    outputs={"response": choice},
+                )
+                for choice in choices
+            ]
+            trace = self.results_to_trace_tree(request, response, results, time_elapsed)
+            return trace
+except:
+    imported_openAIResponse = False
+#### What this does ####
+#    On success, logs events to Langfuse
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class WeightsBiasesLogger:
+    # Class variables or attributes
+    def __init__(self):
+        try:
+            import wandb
+        except:
+            raise Exception(
+                "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
+            )
+        if imported_openAIResponse == False:
+            raise Exception(
+                "\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m"
+            )
+        self.resolver = OpenAIRequestResponseResolver()
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        import wandb
+        try:
+            print_verbose(f"W&B Logging - Enters logging function for model {kwargs}")
+            run = wandb.init()
+            print_verbose(response_obj)
+            trace = self.resolver(
+                kwargs, response_obj, (end_time - start_time).total_seconds()
+            )
+            if trace is not None:
+                run.log({"trace": trace})
+            run.finish()
+            print_verbose(
+                f"W&B Logging Logging - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
+            pass

litellm/llms/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import *

litellm/llms/ai21.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os, types, traceback
+import json
+from enum import Enum
+import requests
+import time, httpx
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Choices, Message
+import litellm
+class AI21Error(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.ai21.com/studio/v1/"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AI21Config:
+    """
+    Reference: https://docs.ai21.com/reference/j2-complete-ref
+    The class `AI21Config` provides configuration for the AI21's API interface. Below are the parameters:
+    - `numResults` (int32): Number of completions to sample and return. Optional, default is 1. If the temperature is greater than 0 (non-greedy decoding), a value greater than 1 can be meaningful.
+    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
+    - `minTokens` (int32): The minimum number of tokens to generate per result. Optional, default is 0. If `stopSequences` are given, they are ignored until `minTokens` are generated.
+    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
+    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
+    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
+    - `topKReturn` (int32): Range between 0 to 10, including both. Optional, default is 0. Specifies the top-K alternative tokens to return. A non-zero value includes the string representations and log-probabilities for each of the top-K alternatives at each position.
+    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
+    - `presencePenalty` (object): Placeholder for presence penalty object.
+    - `countPenalty` (object): Placeholder for count penalty object.
+    """
+    numResults: Optional[int] = None
+    maxTokens: Optional[int] = None
+    minTokens: Optional[int] = None
+    temperature: Optional[float] = None
+    topP: Optional[float] = None
+    stopSequences: Optional[list] = None
+    topKReturn: Optional[int] = None
+    frequencePenalty: Optional[dict] = None
+    presencePenalty: Optional[dict] = None
+    countPenalty: Optional[dict] = None
+    def __init__(
+        self,
+        numResults: Optional[int] = None,
+        maxTokens: Optional[int] = None,
+        minTokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        topP: Optional[float] = None,
+        stopSequences: Optional[list] = None,
+        topKReturn: Optional[int] = None,
+        frequencePenalty: Optional[dict] = None,
+        presencePenalty: Optional[dict] = None,
+        countPenalty: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "Authorization": "Bearer " + api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    model = model
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += f"{message['content']}"
+            else:
+                prompt += f"{message['content']}"
+        else:
+            prompt += f"{message['content']}"
+    ## Load Config
+    config = litellm.AI21Config.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > ai21_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "prompt": prompt,
+        # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        api_base + model + "/complete", headers=headers, data=json.dumps(data)
+    )
+    if response.status_code != 200:
+        raise AI21Error(status_code=response.status_code, message=response.text)
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        try:
+            choices_list = []
+            for idx, item in enumerate(completion_response["completions"]):
+                if len(item["data"]["text"]) > 0:
+                    message_obj = Message(content=item["data"]["text"])
+                else:
+                    message_obj = Message(content=None)
+                choice_obj = Choices(
+                    finish_reason=item["finishReason"]["reason"],
+                    index=idx + 1,
+                    message=message_obj,
+                )
+                choices_list.append(choice_obj)
+            model_response["choices"] = choices_list
+        except Exception as e:
+            raise AI21Error(
+                message=traceback.format_exc(), status_code=response.status_code
+            )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content"))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        model_response["usage"] = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        }
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/aleph_alpha.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import httpx
+class AlephAlphaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.aleph-alpha.com/complete"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AlephAlphaConfig:
+    """
+    Reference: https://docs.aleph-alpha.com/api/complete/
+    The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
+    - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
+    - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
+    - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
+    - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
+    - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
+    - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
+    - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
+    - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
+    - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
+    - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
+    - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
+    - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
+    - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
+    - `n` (integer, nullable; default value: 1): The number of completions to return.
+    - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
+    - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
+    - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
+    - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
+    - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
+    - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
+    - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
+    - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
+    - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
+    - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
+    """
+    maximum_tokens: Optional[
+        int
+    ] = litellm.max_tokens  # aleph alpha requires max tokens
+    minimum_tokens: Optional[int] = None
+    echo: Optional[bool] = None
+    temperature: Optional[int] = None
+    top_k: Optional[int] = None
+    top_p: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    sequence_penalty: Optional[int] = None
+    sequence_penalty_min_length: Optional[int] = None
+    repetition_penalties_include_prompt: Optional[bool] = None
+    repetition_penalties_include_completion: Optional[bool] = None
+    use_multiplicative_presence_penalty: Optional[bool] = None
+    use_multiplicative_frequency_penalty: Optional[bool] = None
+    use_multiplicative_sequence_penalty: Optional[bool] = None
+    penalty_bias: Optional[str] = None
+    penalty_exceptions_include_stop_sequences: Optional[bool] = None
+    best_of: Optional[int] = None
+    n: Optional[int] = None
+    logit_bias: Optional[dict] = None
+    log_probs: Optional[int] = None
+    stop_sequences: Optional[list] = None
+    tokens: Optional[bool] = None
+    raw_completion: Optional[bool] = None
+    disable_optimizations: Optional[bool] = None
+    completion_bias_inclusion: Optional[list] = None
+    completion_bias_exclusion: Optional[list] = None
+    completion_bias_inclusion_first_token_only: Optional[bool] = None
+    completion_bias_exclusion_first_token_only: Optional[bool] = None
+    contextual_control_threshold: Optional[int] = None
+    control_log_additive: Optional[bool] = None
+    def __init__(
+        self,
+        maximum_tokens: Optional[int] = None,
+        minimum_tokens: Optional[int] = None,
+        echo: Optional[bool] = None,
+        temperature: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        sequence_penalty: Optional[int] = None,
+        sequence_penalty_min_length: Optional[int] = None,
+        repetition_penalties_include_prompt: Optional[bool] = None,
+        repetition_penalties_include_completion: Optional[bool] = None,
+        use_multiplicative_presence_penalty: Optional[bool] = None,
+        use_multiplicative_frequency_penalty: Optional[bool] = None,
+        use_multiplicative_sequence_penalty: Optional[bool] = None,
+        penalty_bias: Optional[str] = None,
+        penalty_exceptions_include_stop_sequences: Optional[bool] = None,
+        best_of: Optional[int] = None,
+        n: Optional[int] = None,
+        logit_bias: Optional[dict] = None,
+        log_probs: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+        tokens: Optional[bool] = None,
+        raw_completion: Optional[bool] = None,
+        disable_optimizations: Optional[bool] = None,
+        completion_bias_inclusion: Optional[list] = None,
+        completion_bias_exclusion: Optional[list] = None,
+        completion_bias_inclusion_first_token_only: Optional[bool] = None,
+        completion_bias_exclusion_first_token_only: Optional[bool] = None,
+        contextual_control_threshold: Optional[int] = None,
+        control_log_additive: Optional[bool] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.AlephAlphaConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    completion_url = api_base
+    model = model
+    prompt = ""
+    if "control" in model:  # follow the ###Instruction / ###Response format
+        for idx, message in enumerate(messages):
+            if "role" in message:
+                if (
+                    idx == 0
+                ):  # set first message as instruction (required), let later user messages be input
+                    prompt += f"###Instruction: {message['content']}"
+                else:
+                    if message["role"] == "system":
+                        prompt += f"###Instruction: {message['content']}"
+                    elif message["role"] == "user":
+                        prompt += f"###Input: {message['content']}"
+                    else:
+                        prompt += f"###Response: {message['content']}"
+            else:
+                prompt += f"{message['content']}"
+    else:
+        prompt = " ".join(message["content"] for message in messages)
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise AlephAlphaError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                choices_list = []
+                for idx, item in enumerate(completion_response["completions"]):
+                    if len(item["completion"]) > 0:
+                        message_obj = Message(content=item["completion"])
+                    else:
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(
+                        finish_reason=item["finish_reason"],
+                        index=idx + 1,
+                        message=message_obj,
+                    )
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
+            except:
+                raise AlephAlphaError(
+                    message=json.dumps(completion_response),
+                    status_code=response.status_code,
+                )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"]["content"])
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/anthropic.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Usage
+import litellm
+from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman: "
+    AI_PROMPT = "\n\nAssistant: "
+class AnthropicError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.anthropic.com/v1/complete"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AnthropicConfig:
+    """
+    Reference: https://docs.anthropic.com/claude/reference/complete_post
+    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
+    """
+    max_tokens_to_sample: Optional[
+        int
+    ] = litellm.max_tokens  # anthropic requires a default
+    stop_sequences: Optional[list] = None
+    temperature: Optional[int] = None
+    top_p: Optional[int] = None
+    top_k: Optional[int] = None
+    metadata: Optional[dict] = None
+    def __init__(
+        self,
+        max_tokens_to_sample: Optional[int] = 256,  # anthropic requires a default
+        stop_sequences: Optional[list] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+        top_k: Optional[int] = None,
+        metadata: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+# makes headers for API call
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "anthropic-version": "2023-06-01",
+        "content-type": "application/json",
+        "x-api-key": api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    custom_prompt_dict: dict,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+            role_dict=model_prompt_details["roles"],
+            initial_prompt_value=model_prompt_details["initial_prompt_value"],
+            final_prompt_value=model_prompt_details["final_prompt_value"],
+            messages=messages,
+        )
+    else:
+        prompt = prompt_factory(
+            model=model, messages=messages, custom_llm_provider="anthropic"
+        )
+    ## Load Config
+    config = litellm.AnthropicConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data, "api_base": api_base},
+    )
+    ## COMPLETION CALL
+    if "stream" in optional_params and optional_params["stream"] == True:
+        response = requests.post(
+            api_base,
+            headers=headers,
+            data=json.dumps(data),
+            stream=optional_params["stream"],
+        )
+        if response.status_code != 200:
+            raise AnthropicError(
+                status_code=response.status_code, message=response.text
+            )
+        return response.iter_lines()
+    else:
+        response = requests.post(api_base, headers=headers, data=json.dumps(data))
+        if response.status_code != 200:
+            raise AnthropicError(
+                status_code=response.status_code, message=response.text
+            )
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        try:
+            completion_response = response.json()
+        except:
+            raise AnthropicError(
+                message=response.text, status_code=response.status_code
+            )
+        if "error" in completion_response:
+            raise AnthropicError(
+                message=str(completion_response["error"]),
+                status_code=response.status_code,
+            )
+        else:
+            if len(completion_response["completion"]) > 0:
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["completion"]
+            model_response.choices[0].finish_reason = completion_response["stop_reason"]
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )  ##[TODO] use the anthropic tokenizer here
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )  ##[TODO] use the anthropic tokenizer here
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/azure.py ADDED Viewed

	@@ -0,0 +1,799 @@

+from typing import Optional, Union, Any
+import types, requests
+from .base import BaseLLM
+from litellm.utils import (
+    ModelResponse,
+    Choices,
+    Message,
+    CustomStreamWrapper,
+    convert_to_model_response_object,
+)
+from typing import Callable, Optional
+from litellm import OpenAIConfig
+import litellm, json
+import httpx
+from .custom_httpx.azure_dall_e_2 import CustomHTTPTransport, AsyncCustomHTTPTransport
+from openai import AzureOpenAI, AsyncAzureOpenAI
+class AzureOpenAIError(Exception):
+    def __init__(
+        self,
+        status_code,
+        message,
+        request: Optional[httpx.Request] = None,
+        response: Optional[httpx.Response] = None,
+    ):
+        self.status_code = status_code
+        self.message = message
+        if request:
+            self.request = request
+        else:
+            self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        if response:
+            self.response = response
+        else:
+            self.response = httpx.Response(
+                status_code=status_code, request=self.request
+            )
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AzureOpenAIConfig(OpenAIConfig):
+    """
+    Reference: https://platform.openai.com/docs/api-reference/chat/create
+    The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
+    - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
+    - `function_call` (string or object): This optional parameter controls how the model calls functions.
+    - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
+    - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
+    - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
+    - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
+    - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+    - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
+    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+    """
+    def __init__(
+        self,
+        frequency_penalty: Optional[int] = None,
+        function_call: Optional[Union[str, dict]] = None,
+        functions: Optional[list] = None,
+        logit_bias: Optional[dict] = None,
+        max_tokens: Optional[int] = None,
+        n: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        stop: Optional[Union[str, list]] = None,
+        temperature: Optional[int] = None,
+        top_p: Optional[int] = None,
+    ) -> None:
+        super().__init__(
+            frequency_penalty,
+            function_call,
+            functions,
+            logit_bias,
+            max_tokens,
+            n,
+            presence_penalty,
+            stop,
+            temperature,
+            top_p,
+        )
+class AzureChatCompletion(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+    def validate_environment(self, api_key, azure_ad_token):
+        headers = {
+            "content-type": "application/json",
+        }
+        if api_key is not None:
+            headers["api-key"] = api_key
+        elif azure_ad_token is not None:
+            headers["Authorization"] = f"Bearer {azure_ad_token}"
+        return headers
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        model_response: ModelResponse,
+        api_key: str,
+        api_base: str,
+        api_version: str,
+        api_type: str,
+        azure_ad_token: str,
+        print_verbose: Callable,
+        timeout,
+        logging_obj,
+        optional_params,
+        litellm_params,
+        logger_fn,
+        acompletion: bool = False,
+        headers: Optional[dict] = None,
+        client=None,
+    ):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            if model is None or messages is None:
+                raise AzureOpenAIError(
+                    status_code=422, message=f"Missing model or messages"
+                )
+            max_retries = optional_params.pop("max_retries", 2)
+            ### CHECK IF CLOUDFLARE AI GATEWAY ###
+            ### if so - set the model as part of the base url
+            if "gateway.ai.cloudflare.com" in api_base:
+                ## build base url - assume api base includes resource name
+                if client is None:
+                    if not api_base.endswith("/"):
+                        api_base += "/"
+                    api_base += f"{model}"
+                    azure_client_params = {
+                        "api_version": api_version,
+                        "base_url": f"{api_base}",
+                        "http_client": litellm.client_session,
+                        "max_retries": max_retries,
+                        "timeout": timeout,
+                    }
+                    if api_key is not None:
+                        azure_client_params["api_key"] = api_key
+                    elif azure_ad_token is not None:
+                        azure_client_params["azure_ad_token"] = azure_ad_token
+                    if acompletion is True:
+                        client = AsyncAzureOpenAI(**azure_client_params)
+                    else:
+                        client = AzureOpenAI(**azure_client_params)
+                data = {"model": None, "messages": messages, **optional_params}
+            else:
+                data = {
+                    "model": model,  # type: ignore
+                    "messages": messages,
+                    **optional_params,
+                }
+            if acompletion is True:
+                if optional_params.get("stream", False):
+                    return self.async_streaming(
+                        logging_obj=logging_obj,
+                        api_base=api_base,
+                        data=data,
+                        model=model,
+                        api_key=api_key,
+                        api_version=api_version,
+                        azure_ad_token=azure_ad_token,
+                        timeout=timeout,
+                        client=client,
+                    )
+                else:
+                    return self.acompletion(
+                        api_base=api_base,
+                        data=data,
+                        model_response=model_response,
+                        api_key=api_key,
+                        api_version=api_version,
+                        model=model,
+                        azure_ad_token=azure_ad_token,
+                        timeout=timeout,
+                        client=client,
+                        logging_obj=logging_obj,
+                    )
+            elif "stream" in optional_params and optional_params["stream"] == True:
+                return self.streaming(
+                    logging_obj=logging_obj,
+                    api_base=api_base,
+                    data=data,
+                    model=model,
+                    api_key=api_key,
+                    api_version=api_version,
+                    azure_ad_token=azure_ad_token,
+                    timeout=timeout,
+                    client=client,
+                )
+            else:
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=messages,
+                    api_key=api_key,
+                    additional_args={
+                        "headers": {
+                            "api_key": api_key,
+                            "azure_ad_token": azure_ad_token,
+                        },
+                        "api_version": api_version,
+                        "api_base": api_base,
+                        "complete_input_dict": data,
+                    },
+                )
+                if not isinstance(max_retries, int):
+                    raise AzureOpenAIError(
+                        status_code=422, message="max retries must be an int"
+                    )
+                # init AzureOpenAI Client
+                azure_client_params = {
+                    "api_version": api_version,
+                    "azure_endpoint": api_base,
+                    "azure_deployment": model,
+                    "http_client": litellm.client_session,
+                    "max_retries": max_retries,
+                    "timeout": timeout,
+                }
+                if api_key is not None:
+                    azure_client_params["api_key"] = api_key
+                elif azure_ad_token is not None:
+                    azure_client_params["azure_ad_token"] = azure_ad_token
+                if client is None:
+                    azure_client = AzureOpenAI(**azure_client_params)
+                else:
+                    azure_client = client
+                response = azure_client.chat.completions.create(**data, timeout=timeout)  # type: ignore
+                stringified_response = response.model_dump()
+                ## LOGGING
+                logging_obj.post_call(
+                    input=messages,
+                    api_key=api_key,
+                    original_response=stringified_response,
+                    additional_args={
+                        "headers": headers,
+                        "api_version": api_version,
+                        "api_base": api_base,
+                    },
+                )
+                return convert_to_model_response_object(
+                    response_object=stringified_response,
+                    model_response_object=model_response,
+                )
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if hasattr(e, "status_code"):
+                raise AzureOpenAIError(status_code=e.status_code, message=str(e))
+            else:
+                raise AzureOpenAIError(status_code=500, message=str(e))
+    async def acompletion(
+        self,
+        api_key: str,
+        api_version: str,
+        model: str,
+        api_base: str,
+        data: dict,
+        timeout: Any,
+        model_response: ModelResponse,
+        azure_ad_token: Optional[str] = None,
+        client=None,  # this is the AsyncAzureOpenAI
+        logging_obj=None,
+    ):
+        response = None
+        try:
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise AzureOpenAIError(
+                    status_code=422, message="max retries must be an int"
+                )
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "http_client": litellm.client_session,
+                "max_retries": max_retries,
+                "timeout": timeout,
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            if client is None:
+                azure_client = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                azure_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data["messages"],
+                api_key=azure_client.api_key,
+                additional_args={
+                    "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
+                    "api_base": azure_client._base_url._uri_reference,
+                    "acompletion": True,
+                    "complete_input_dict": data,
+                },
+            )
+            response = await azure_client.chat.completions.create(
+                **data, timeout=timeout
+            )
+            return convert_to_model_response_object(
+                response_object=response.model_dump(),
+                model_response_object=model_response,
+            )
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if hasattr(e, "status_code"):
+                raise e
+            else:
+                raise AzureOpenAIError(status_code=500, message=str(e))
+    def streaming(
+        self,
+        logging_obj,
+        api_base: str,
+        api_key: str,
+        api_version: str,
+        data: dict,
+        model: str,
+        timeout: Any,
+        azure_ad_token: Optional[str] = None,
+        client=None,
+    ):
+        max_retries = data.pop("max_retries", 2)
+        if not isinstance(max_retries, int):
+            raise AzureOpenAIError(
+                status_code=422, message="max retries must be an int"
+            )
+        # init AzureOpenAI Client
+        azure_client_params = {
+            "api_version": api_version,
+            "azure_endpoint": api_base,
+            "azure_deployment": model,
+            "http_client": litellm.client_session,
+            "max_retries": max_retries,
+            "timeout": timeout,
+        }
+        if api_key is not None:
+            azure_client_params["api_key"] = api_key
+        elif azure_ad_token is not None:
+            azure_client_params["azure_ad_token"] = azure_ad_token
+        if client is None:
+            azure_client = AzureOpenAI(**azure_client_params)
+        else:
+            azure_client = client
+        ## LOGGING
+        logging_obj.pre_call(
+            input=data["messages"],
+            api_key=azure_client.api_key,
+            additional_args={
+                "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
+                "api_base": azure_client._base_url._uri_reference,
+                "acompletion": True,
+                "complete_input_dict": data,
+            },
+        )
+        response = azure_client.chat.completions.create(**data, timeout=timeout)
+        streamwrapper = CustomStreamWrapper(
+            completion_stream=response,
+            model=model,
+            custom_llm_provider="azure",
+            logging_obj=logging_obj,
+        )
+        return streamwrapper
+    async def async_streaming(
+        self,
+        logging_obj,
+        api_base: str,
+        api_key: str,
+        api_version: str,
+        data: dict,
+        model: str,
+        timeout: Any,
+        azure_ad_token: Optional[str] = None,
+        client=None,
+    ):
+        try:
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "http_client": litellm.client_session,
+                "max_retries": data.pop("max_retries", 2),
+                "timeout": timeout,
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            if client is None:
+                azure_client = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                azure_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                input=data["messages"],
+                api_key=azure_client.api_key,
+                additional_args={
+                    "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
+                    "api_base": azure_client._base_url._uri_reference,
+                    "acompletion": True,
+                    "complete_input_dict": data,
+                },
+            )
+            response = await azure_client.chat.completions.create(
+                **data, timeout=timeout
+            )
+            # return response
+            streamwrapper = CustomStreamWrapper(
+                completion_stream=response,
+                model=model,
+                custom_llm_provider="azure",
+                logging_obj=logging_obj,
+            )
+            return streamwrapper  ## DO NOT make this into an async for ... loop, it will yield an async generator, which won't raise errors if the response fails
+        except Exception as e:
+            if hasattr(e, "status_code"):
+                raise AzureOpenAIError(status_code=e.status_code, message=str(e))
+            else:
+                raise AzureOpenAIError(status_code=500, message=str(e))
+    async def aembedding(
+        self,
+        data: dict,
+        model_response: ModelResponse,
+        azure_client_params: dict,
+        api_key: str,
+        input: list,
+        client=None,
+        logging_obj=None,
+        timeout=None,
+    ):
+        response = None
+        try:
+            if client is None:
+                openai_aclient = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                openai_aclient = client
+            response = await openai_aclient.embeddings.create(**data, timeout=timeout)
+            stringified_response = response.model_dump()
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=stringified_response,
+            )
+            return convert_to_model_response_object(
+                response_object=stringified_response,
+                model_response_object=model_response,
+                response_type="embedding",
+            )
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=str(e),
+            )
+            raise e
+    def embedding(
+        self,
+        model: str,
+        input: list,
+        api_key: str,
+        api_base: str,
+        api_version: str,
+        timeout: float,
+        logging_obj=None,
+        model_response=None,
+        optional_params=None,
+        azure_ad_token: Optional[str] = None,
+        client=None,
+        aembedding=None,
+    ):
+        super().embedding()
+        exception_mapping_worked = False
+        if self._client_session is None:
+            self._client_session = self.create_client_session()
+        try:
+            data = {"model": model, "input": input, **optional_params}
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise AzureOpenAIError(
+                    status_code=422, message="max retries must be an int"
+                )
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "http_client": litellm.client_session,
+                "max_retries": max_retries,
+                "timeout": timeout,
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            ## LOGGING
+            logging_obj.pre_call(
+                input=input,
+                api_key=api_key,
+                additional_args={
+                    "complete_input_dict": data,
+                    "headers": {"api_key": api_key, "azure_ad_token": azure_ad_token},
+                },
+            )
+            if aembedding == True:
+                response = self.aembedding(
+                    data=data,
+                    input=input,
+                    logging_obj=logging_obj,
+                    api_key=api_key,
+                    model_response=model_response,
+                    azure_client_params=azure_client_params,
+                    timeout=timeout,
+                )
+                return response
+            if client is None:
+                azure_client = AzureOpenAI(**azure_client_params)  # type: ignore
+            else:
+                azure_client = client
+            ## COMPLETION CALL
+            response = azure_client.embeddings.create(**data, timeout=timeout)  # type: ignore
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data, "api_base": api_base},
+                original_response=response,
+            )
+            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="embedding")  # type: ignore
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if hasattr(e, "status_code"):
+                raise AzureOpenAIError(status_code=e.status_code, message=str(e))
+            else:
+                raise AzureOpenAIError(status_code=500, message=str(e))
+    async def aimage_generation(
+        self,
+        data: dict,
+        model_response: ModelResponse,
+        azure_client_params: dict,
+        api_key: str,
+        input: list,
+        client=None,
+        logging_obj=None,
+        timeout=None,
+    ):
+        response = None
+        try:
+            if client is None:
+                client_session = litellm.aclient_session or httpx.AsyncClient(
+                    transport=AsyncCustomHTTPTransport(),
+                )
+                openai_aclient = AsyncAzureOpenAI(
+                    http_client=client_session, **azure_client_params
+                )
+            else:
+                openai_aclient = client
+            response = await openai_aclient.images.generate(**data, timeout=timeout)
+            stringified_response = response.model_dump()
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=stringified_response,
+            )
+            return convert_to_model_response_object(
+                response_object=stringified_response,
+                model_response_object=model_response,
+                response_type="image_generation",
+            )
+        except Exception as e:
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=str(e),
+            )
+            raise e
+    def image_generation(
+        self,
+        prompt: str,
+        timeout: float,
+        model: Optional[str] = None,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        api_version: Optional[str] = None,
+        model_response: Optional[litellm.utils.ImageResponse] = None,
+        azure_ad_token: Optional[str] = None,
+        logging_obj=None,
+        optional_params=None,
+        client=None,
+        aimg_generation=None,
+    ):
+        exception_mapping_worked = False
+        try:
+            if model and len(model) > 0:
+                model = model
+            else:
+                model = None
+            data = {"model": model, "prompt": prompt, **optional_params}
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise AzureOpenAIError(
+                    status_code=422, message="max retries must be an int"
+                )
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "max_retries": max_retries,
+                "timeout": timeout,
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            if aimg_generation == True:
+                response = self.aimage_generation(data=data, input=input, logging_obj=logging_obj, model_response=model_response, api_key=api_key, client=client, azure_client_params=azure_client_params, timeout=timeout)  # type: ignore
+                return response
+            if client is None:
+                client_session = litellm.client_session or httpx.Client(
+                    transport=CustomHTTPTransport(),
+                )
+                azure_client = AzureOpenAI(http_client=client_session, **azure_client_params)  # type: ignore
+            else:
+                azure_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                input=prompt,
+                api_key=azure_client.api_key,
+                additional_args={
+                    "headers": {"Authorization": f"Bearer {azure_client.api_key}"},
+                    "api_base": azure_client._base_url._uri_reference,
+                    "acompletion": False,
+                    "complete_input_dict": data,
+                },
+            )
+            ## COMPLETION CALL
+            response = azure_client.images.generate(**data, timeout=timeout)  # type: ignore
+            ## LOGGING
+            logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=response,
+            )
+            # return response
+            return convert_to_model_response_object(response_object=response.model_dump(), model_response_object=model_response, response_type="image_generation")  # type: ignore
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if hasattr(e, "status_code"):
+                raise AzureOpenAIError(status_code=e.status_code, message=str(e))
+            else:
+                raise AzureOpenAIError(status_code=500, message=str(e))
+    async def ahealth_check(
+        self,
+        model: Optional[str],
+        api_key: str,
+        api_base: str,
+        api_version: str,
+        timeout: float,
+        mode: str,
+        messages: Optional[list] = None,
+        input: Optional[list] = None,
+        prompt: Optional[str] = None,
+    ):
+        client_session = litellm.aclient_session or httpx.AsyncClient(
+            transport=AsyncCustomHTTPTransport(),  # handle dall-e-2 calls
+        )
+        if "gateway.ai.cloudflare.com" in api_base:
+            ## build base url - assume api base includes resource name
+            if not api_base.endswith("/"):
+                api_base += "/"
+            api_base += f"{model}"
+            client = AsyncAzureOpenAI(
+                base_url=api_base,
+                api_version=api_version,
+                api_key=api_key,
+                timeout=timeout,
+                http_client=client_session,
+            )
+            model = None
+            # cloudflare ai gateway, needs model=None
+        else:
+            client = AsyncAzureOpenAI(
+                api_version=api_version,
+                azure_endpoint=api_base,
+                api_key=api_key,
+                timeout=timeout,
+                http_client=client_session,
+            )
+            # only run this check if it's not cloudflare ai gateway
+            if model is None and mode != "image_generation":
+                raise Exception("model is not set")
+        completion = None
+        if mode == "completion":
+            completion = await client.completions.with_raw_response.create(
+                model=model,  # type: ignore
+                prompt=prompt,  # type: ignore
+            )
+        elif mode == "chat":
+            if messages is None:
+                raise Exception("messages is not set")
+            completion = await client.chat.completions.with_raw_response.create(
+                model=model,  # type: ignore
+                messages=messages,  # type: ignore
+            )
+        elif mode == "embedding":
+            if input is None:
+                raise Exception("input is not set")
+            completion = await client.embeddings.with_raw_response.create(
+                model=model,  # type: ignore
+                input=input,  # type: ignore
+            )
+        elif mode == "image_generation":
+            if prompt is None:
+                raise Exception("prompt is not set")
+            completion = await client.images.with_raw_response.generate(
+                model=model,  # type: ignore
+                prompt=prompt,  # type: ignore
+            )
+        else:
+            raise Exception("mode not set")
+        response = {}
+        if completion is None or not hasattr(completion, "headers"):
+            raise Exception("invalid completion response")
+        if (
+            completion.headers.get("x-ratelimit-remaining-requests", None) is not None
+        ):  # not provided for dall-e requests
+            response["x-ratelimit-remaining-requests"] = completion.headers[
+                "x-ratelimit-remaining-requests"
+            ]
+        if completion.headers.get("x-ratelimit-remaining-tokens", None) is not None:
+            response["x-ratelimit-remaining-tokens"] = completion.headers[
+                "x-ratelimit-remaining-tokens"
+            ]
+        return response

litellm/llms/base.py ADDED Viewed

	@@ -0,0 +1,45 @@

+## This is a template base class to be used for adding new LLM providers via API calls
+import litellm
+import httpx
+from typing import Optional
+class BaseLLM:
+    _client_session: Optional[httpx.Client] = None
+    def create_client_session(self):
+        if litellm.client_session:
+            _client_session = litellm.client_session
+        else:
+            _client_session = httpx.Client()
+        return _client_session
+    def create_aclient_session(self):
+        if litellm.aclient_session:
+            _aclient_session = litellm.aclient_session
+        else:
+            _aclient_session = httpx.AsyncClient()
+        return _aclient_session
+    def __exit__(self):
+        if hasattr(self, "_client_session"):
+            self._client_session.close()
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if hasattr(self, "_aclient_session"):
+            await self._aclient_session.aclose()
+    def validate_environment(self):  # set up the environment required to run the model
+        pass
+    def completion(
+        self, *args, **kwargs
+    ):  # logic for parsing in - calling - parsing out model completion calls
+        pass
+    def embedding(
+        self, *args, **kwargs
+    ):  # logic for parsing in - calling - parsing out model embedding calls
+        pass

litellm/llms/baseten.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable
+from litellm.utils import ModelResponse, Usage
+class BasetenError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Api-Key {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url_fragment_1 = "https://app.baseten.co/models/"
+    completion_url_fragment_2 = "/predict"
+    model = model
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += f"{message['content']}"
+            else:
+                prompt += f"{message['content']}"
+        else:
+            prompt += f"{message['content']}"
+    data = {
+        "inputs": prompt,
+        "prompt": prompt,
+        "parameters": optional_params,
+        "stream": True
+        if "stream" in optional_params and optional_params["stream"] == True
+        else False,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url_fragment_1 + model + completion_url_fragment_2,
+        headers=headers,
+        data=json.dumps(data),
+        stream=True
+        if "stream" in optional_params and optional_params["stream"] == True
+        else False,
+    )
+    if "text/event-stream" in response.headers["Content-Type"] or (
+        "stream" in optional_params and optional_params["stream"] == True
+    ):
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise BasetenError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            if "model_output" in completion_response:
+                if (
+                    isinstance(completion_response["model_output"], dict)
+                    and "data" in completion_response["model_output"]
+                    and isinstance(completion_response["model_output"]["data"], list)
+                ):
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["model_output"]["data"][0]
+                elif isinstance(completion_response["model_output"], str):
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["model_output"]
+            elif "completion" in completion_response and isinstance(
+                completion_response["completion"], str
+            ):
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["completion"]
+            elif isinstance(completion_response, list) and len(completion_response) > 0:
+                if "generated_text" not in completion_response:
+                    raise BasetenError(
+                        message=f"Unable to parse response. Original response: {response.text}",
+                        status_code=response.status_code,
+                    )
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response[0]["generated_text"]
+                ## GETTING LOGPROBS
+                if (
+                    "details" in completion_response[0]
+                    and "tokens" in completion_response[0]["details"]
+                ):
+                    model_response.choices[0].finish_reason = completion_response[0][
+                        "details"
+                    ]["finish_reason"]
+                    sum_logprob = 0
+                    for token in completion_response[0]["details"]["tokens"]:
+                        sum_logprob += token["logprob"]
+                    model_response["choices"][0]["message"]._logprobs = sum_logprob
+            else:
+                raise BasetenError(
+                    message=f"Unable to parse response. Original response: {response.text}",
+                    status_code=response.status_code,
+                )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"]["content"])
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/bedrock.py ADDED Viewed

	@@ -0,0 +1,799 @@

+import json, copy, types
+import os
+from enum import Enum
+import time
+from typing import Callable, Optional, Any, Union
+import litellm
+from litellm.utils import ModelResponse, get_secret, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx
+class BedrockError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AmazonTitanConfig:
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
+    Supported Params for the Amazon Titan models:
+    - `maxTokenCount` (integer) max tokens,
+    - `stopSequences` (string[]) list of stop sequence strings
+    - `temperature` (float) temperature for model,
+    - `topP` (int) top p for model
+    """
+    maxTokenCount: Optional[int] = None
+    stopSequences: Optional[list] = None
+    temperature: Optional[float] = None
+    topP: Optional[int] = None
+    def __init__(
+        self,
+        maxTokenCount: Optional[int] = None,
+        stopSequences: Optional[list] = None,
+        temperature: Optional[float] = None,
+        topP: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+class AmazonAnthropicConfig:
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
+    Supported Params for the Amazon / Anthropic models:
+    - `max_tokens_to_sample` (integer) max tokens,
+    - `temperature` (float) model temperature,
+    - `top_k` (integer) top k,
+    - `top_p` (integer) top p,
+    - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
+    - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
+    """
+    max_tokens_to_sample: Optional[int] = litellm.max_tokens
+    stop_sequences: Optional[list] = None
+    temperature: Optional[float] = None
+    top_k: Optional[int] = None
+    top_p: Optional[int] = None
+    anthropic_version: Optional[str] = None
+    def __init__(
+        self,
+        max_tokens_to_sample: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+        temperature: Optional[float] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[int] = None,
+        anthropic_version: Optional[str] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+class AmazonCohereConfig:
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
+    Supported Params for the Amazon / Cohere models:
+    - `max_tokens` (integer) max tokens,
+    - `temperature` (float) model temperature,
+    - `return_likelihood` (string) n/a
+    """
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    return_likelihood: Optional[str] = None
+    def __init__(
+        self,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        return_likelihood: Optional[str] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+class AmazonAI21Config:
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
+    Supported Params for the Amazon / AI21 models:
+    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
+    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
+    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
+    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
+    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
+    - `presencePenalty` (object): Placeholder for presence penalty object.
+    - `countPenalty` (object): Placeholder for count penalty object.
+    """
+    maxTokens: Optional[int] = None
+    temperature: Optional[float] = None
+    topP: Optional[float] = None
+    stopSequences: Optional[list] = None
+    frequencePenalty: Optional[dict] = None
+    presencePenalty: Optional[dict] = None
+    countPenalty: Optional[dict] = None
+    def __init__(
+        self,
+        maxTokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        topP: Optional[float] = None,
+        stopSequences: Optional[list] = None,
+        frequencePenalty: Optional[dict] = None,
+        presencePenalty: Optional[dict] = None,
+        countPenalty: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman: "
+    AI_PROMPT = "\n\nAssistant: "
+class AmazonLlamaConfig:
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
+    Supported Params for the Amazon / Meta Llama models:
+    - `max_gen_len` (integer) max tokens,
+    - `temperature` (float) temperature for model,
+    - `top_p` (float) top p for model
+    """
+    max_gen_len: Optional[int] = None
+    temperature: Optional[float] = None
+    topP: Optional[float] = None
+    def __init__(
+        self,
+        maxTokenCount: Optional[int] = None,
+        temperature: Optional[float] = None,
+        topP: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def init_bedrock_client(
+    region_name=None,
+    aws_access_key_id: Optional[str] = None,
+    aws_secret_access_key: Optional[str] = None,
+    aws_region_name: Optional[str] = None,
+    aws_bedrock_runtime_endpoint: Optional[str] = None,
+):
+    # check for custom AWS_REGION_NAME and use it if not passed to init_bedrock_client
+    litellm_aws_region_name = get_secret("AWS_REGION_NAME", None)
+    standard_aws_region_name = get_secret("AWS_REGION", None)
+    ## CHECK IS  'os.environ/' passed in
+    # Define the list of parameters to check
+    params_to_check = [
+        aws_access_key_id,
+        aws_secret_access_key,
+        aws_region_name,
+        aws_bedrock_runtime_endpoint,
+    ]
+    # Iterate over parameters and update if needed
+    for i, param in enumerate(params_to_check):
+        if param and param.startswith("os.environ/"):
+            params_to_check[i] = get_secret(param)
+    # Assign updated values back to parameters
+    (
+        aws_access_key_id,
+        aws_secret_access_key,
+        aws_region_name,
+        aws_bedrock_runtime_endpoint,
+    ) = params_to_check
+    if region_name:
+        pass
+    elif aws_region_name:
+        region_name = aws_region_name
+    elif litellm_aws_region_name:
+        region_name = litellm_aws_region_name
+    elif standard_aws_region_name:
+        region_name = standard_aws_region_name
+    else:
+        raise BedrockError(
+            message="AWS region not set: set AWS_REGION_NAME or AWS_REGION env variable or in .env file",
+            status_code=401,
+        )
+    # check for custom AWS_BEDROCK_RUNTIME_ENDPOINT and use it if not passed to init_bedrock_client
+    env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
+    if aws_bedrock_runtime_endpoint:
+        endpoint_url = aws_bedrock_runtime_endpoint
+    elif env_aws_bedrock_runtime_endpoint:
+        endpoint_url = env_aws_bedrock_runtime_endpoint
+    else:
+        endpoint_url = f"https://bedrock-runtime.{region_name}.amazonaws.com"
+    import boto3
+    if aws_access_key_id != None:
+        # uses auth params passed to completion
+        # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
+        client = boto3.client(
+            service_name="bedrock-runtime",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            region_name=region_name,
+            endpoint_url=endpoint_url,
+        )
+    else:
+        # aws_access_key_id is None, assume user is trying to auth using env variables
+        # boto3 automatically reads env variables
+        client = boto3.client(
+            service_name="bedrock-runtime",
+            region_name=region_name,
+            endpoint_url=endpoint_url,
+        )
+    return client
+def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
+    # handle anthropic prompts using anthropic constants
+    if provider == "anthropic":
+        if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"],
+                initial_prompt_value=model_prompt_details["initial_prompt_value"],
+                final_prompt_value=model_prompt_details["final_prompt_value"],
+                messages=messages,
+            )
+        else:
+            prompt = prompt_factory(
+                model=model, messages=messages, custom_llm_provider="anthropic"
+            )
+    else:
+        prompt = ""
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += f"{message['content']}"
+                else:
+                    prompt += f"{message['content']}"
+            else:
+                prompt += f"{message['content']}"
+    return prompt
+"""
+BEDROCK AUTH Keys/Vars
+os.environ['AWS_ACCESS_KEY_ID'] = ""
+os.environ['AWS_SECRET_ACCESS_KEY'] = ""
+"""
+# set os.environ['AWS_REGION_NAME'] = <your-region_name>
+def completion(
+    model: str,
+    messages: list,
+    custom_prompt_dict: dict,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    exception_mapping_worked = False
+    try:
+        # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+        aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+        aws_region_name = optional_params.pop("aws_region_name", None)
+        aws_bedrock_runtime_endpoint = optional_params.pop(
+            "aws_bedrock_runtime_endpoint", None
+        )
+        # use passed in BedrockRuntime.Client if provided, otherwise create a new one
+        client = optional_params.pop("aws_bedrock_client", None)
+        # only init client, if user did not pass one
+        if client is None:
+            client = init_bedrock_client(
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+                aws_region_name=aws_region_name,
+                aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
+            )
+        model = model
+        modelId = (
+            optional_params.pop("model_id", None) or model
+        )  # default to model if not passed
+        provider = model.split(".")[0]
+        prompt = convert_messages_to_prompt(
+            model, messages, provider, custom_prompt_dict
+        )
+        inference_params = copy.deepcopy(optional_params)
+        stream = inference_params.pop("stream", False)
+        if provider == "anthropic":
+            ## LOAD CONFIG
+            config = litellm.AmazonAnthropicConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "ai21":
+            ## LOAD CONFIG
+            config = litellm.AmazonAI21Config.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "cohere":
+            ## LOAD CONFIG
+            config = litellm.AmazonCohereConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            if optional_params.get("stream", False) == True:
+                inference_params[
+                    "stream"
+                ] = True  # cohere requires stream = True in inference params
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "meta":
+            ## LOAD CONFIG
+            config = litellm.AmazonLlamaConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({"prompt": prompt, **inference_params})
+        elif provider == "amazon":  # amazon titan
+            ## LOAD CONFIG
+            config = litellm.AmazonTitanConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in inference_params
+                ):  # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps(
+                {
+                    "inputText": prompt,
+                    "textGenerationConfig": inference_params,
+                }
+            )
+        else:
+            data = json.dumps({})
+        ## COMPLETION CALL
+        accept = "application/json"
+        contentType = "application/json"
+        if stream == True:
+            if provider == "ai21":
+                ## LOGGING
+                request_str = f"""
+                response = client.invoke_model(
+                    body={data},
+                    modelId={modelId},
+                    accept=accept,
+                    contentType=contentType
+                )
+                """
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key="",
+                    additional_args={
+                        "complete_input_dict": data,
+                        "request_str": request_str,
+                    },
+                )
+                response = client.invoke_model(
+                    body=data, modelId=modelId, accept=accept, contentType=contentType
+                )
+                response = response.get("body").read()
+                return response
+            else:
+                ## LOGGING
+                request_str = f"""
+                response = client.invoke_model_with_response_stream(
+                    body={data},
+                    modelId={modelId},
+                    accept=accept,
+                    contentType=contentType
+                )
+                """
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key="",
+                    additional_args={
+                        "complete_input_dict": data,
+                        "request_str": request_str,
+                    },
+                )
+                response = client.invoke_model_with_response_stream(
+                    body=data, modelId=modelId, accept=accept, contentType=contentType
+                )
+                response = response.get("body")
+                return response
+        try:
+            ## LOGGING
+            request_str = f"""
+            response = client.invoke_model(
+                body={data},
+                modelId={modelId},
+                accept=accept,
+                contentType=contentType
+            )
+            """
+            logging_obj.pre_call(
+                input=prompt,
+                api_key="",
+                additional_args={
+                    "complete_input_dict": data,
+                    "request_str": request_str,
+                },
+            )
+            response = client.invoke_model(
+                body=data, modelId=modelId, accept=accept, contentType=contentType
+            )
+        except client.exceptions.ValidationException as e:
+            if "The provided model identifier is invalid" in str(e):
+                raise BedrockError(status_code=404, message=str(e))
+            raise BedrockError(status_code=400, message=str(e))
+        except Exception as e:
+            raise BedrockError(status_code=500, message=str(e))
+        response_body = json.loads(response.get("body").read())
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key="",
+            original_response=json.dumps(response_body),
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response}")
+        ## RESPONSE OBJECT
+        outputText = "default"
+        if provider == "ai21":
+            outputText = response_body.get("completions")[0].get("data").get("text")
+        elif provider == "anthropic":
+            outputText = response_body["completion"]
+            model_response["finish_reason"] = response_body["stop_reason"]
+        elif provider == "cohere":
+            outputText = response_body["generations"][0]["text"]
+        elif provider == "meta":
+            outputText = response_body["generation"]
+        else:  # amazon titan
+            outputText = response_body.get("results")[0].get("outputText")
+        response_metadata = response.get("ResponseMetadata", {})
+        if response_metadata.get("HTTPStatusCode", 500) >= 400:
+            raise BedrockError(
+                message=outputText,
+                status_code=response_metadata.get("HTTPStatusCode", 500),
+            )
+        else:
+            try:
+                if len(outputText) > 0:
+                    model_response["choices"][0]["message"]["content"] = outputText
+            except:
+                raise BedrockError(
+                    message=json.dumps(outputText),
+                    status_code=response_metadata.get("HTTPStatusCode", 500),
+                )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+    except BedrockError as e:
+        exception_mapping_worked = True
+        raise e
+    except Exception as e:
+        if exception_mapping_worked:
+            raise e
+        else:
+            import traceback
+            raise BedrockError(status_code=500, message=traceback.format_exc())
+def _embedding_func_single(
+    model: str,
+    input: str,
+    client: Any,
+    optional_params=None,
+    encoding=None,
+    logging_obj=None,
+):
+    # logic for parsing in - calling - parsing out model embedding calls
+    ## FORMAT EMBEDDING INPUT ##
+    provider = model.split(".")[0]
+    inference_params = copy.deepcopy(optional_params)
+    inference_params.pop(
+        "user", None
+    )  # make sure user is not passed in for bedrock call
+    modelId = (
+        optional_params.pop("model_id", None) or model
+    )  # default to model if not passed
+    if provider == "amazon":
+        input = input.replace(os.linesep, " ")
+        data = {"inputText": input, **inference_params}
+        # data = json.dumps(data)
+    elif provider == "cohere":
+        inference_params["input_type"] = inference_params.get(
+            "input_type", "search_document"
+        )  # aws bedrock example default - https://us-east-1.console.aws.amazon.com/bedrock/home?region=us-east-1#/providers?model=cohere.embed-english-v3
+        data = {"texts": [input], **inference_params}  # type: ignore
+    body = json.dumps(data).encode("utf-8")
+    ## LOGGING
+    request_str = f"""
+    response = client.invoke_model(
+        body={body},
+        modelId={modelId},
+        accept="*/*",
+        contentType="application/json",
+    )"""  # type: ignore
+    logging_obj.pre_call(
+        input=input,
+        api_key="",  # boto3 is used for init.
+        additional_args={
+            "complete_input_dict": {"model": modelId, "texts": input},
+            "request_str": request_str,
+        },
+    )
+    try:
+        response = client.invoke_model(
+            body=body,
+            modelId=modelId,
+            accept="*/*",
+            contentType="application/json",
+        )
+        response_body = json.loads(response.get("body").read())
+        ## LOGGING
+        logging_obj.post_call(
+            input=input,
+            api_key="",
+            additional_args={"complete_input_dict": data},
+            original_response=json.dumps(response_body),
+        )
+        if provider == "cohere":
+            response = response_body.get("embeddings")
+            # flatten list
+            response = [item for sublist in response for item in sublist]
+            return response
+        elif provider == "amazon":
+            return response_body.get("embedding")
+    except Exception as e:
+        raise BedrockError(
+            message=f"Embedding Error with model {model}: {e}", status_code=500
+        )
+def embedding(
+    model: str,
+    input: Union[list, str],
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    optional_params=None,
+    encoding=None,
+):
+    ### BOTO3 INIT ###
+    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+    aws_region_name = optional_params.pop("aws_region_name", None)
+    aws_bedrock_runtime_endpoint = optional_params.pop(
+        "aws_bedrock_runtime_endpoint", None
+    )
+    # use passed in BedrockRuntime.Client if provided, otherwise create a new one
+    client = init_bedrock_client(
+        aws_access_key_id=aws_access_key_id,
+        aws_secret_access_key=aws_secret_access_key,
+        aws_region_name=aws_region_name,
+        aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
+    )
+    if type(input) == str:
+        embeddings = [
+            _embedding_func_single(
+                model,
+                input,
+                optional_params=optional_params,
+                client=client,
+                logging_obj=logging_obj,
+            )
+        ]
+    else:
+        ## Embedding Call
+        embeddings = [
+            _embedding_func_single(
+                model,
+                i,
+                optional_params=optional_params,
+                client=client,
+                logging_obj=logging_obj,
+            )
+            for i in input
+        ]  # [TODO]: make these parallel calls
+    ## Populate OpenAI compliant dictionary
+    embedding_response = []
+    for idx, embedding in enumerate(embeddings):
+        embedding_response.append(
+            {
+                "object": "embedding",
+                "index": idx,
+                "embedding": embedding,
+            }
+        )
+    model_response["object"] = "list"
+    model_response["data"] = embedding_response
+    model_response["model"] = model
+    input_tokens = 0
+    input_str = "".join(input)
+    input_tokens += len(encoding.encode(input_str))
+    usage = Usage(
+        prompt_tokens=input_tokens, completion_tokens=0, total_tokens=input_tokens + 0
+    )
+    model_response.usage = usage
+    return model_response

litellm/llms/cloudflare.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+import httpx
+from litellm.utils import ModelResponse, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class CloudflareError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.cloudflare.com")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class CloudflareConfig:
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = None
+    def __init__(
+        self,
+        max_tokens: Optional[int] = None,
+        stream: Optional[bool] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing CloudflareError API Key - A call is being made to cloudflare but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "Authorization": "Bearer " + api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    custom_prompt_dict={},
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.CloudflareConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params:
+            optional_params[k] = v
+    print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+            role_dict=model_prompt_details.get("roles", {}),
+            initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
+            final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+            bos_token=model_prompt_details.get("bos_token", ""),
+            eos_token=model_prompt_details.get("eos_token", ""),
+            messages=messages,
+        )
+    # cloudflare adds the model to the api base
+    api_base = api_base + model
+    data = {
+        "messages": messages,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=messages,
+        api_key=api_key,
+        additional_args={
+            "headers": headers,
+            "api_base": api_base,
+            "complete_input_dict": data,
+        },
+    )
+    ## COMPLETION CALL
+    if "stream" in optional_params and optional_params["stream"] == True:
+        response = requests.post(
+            api_base,
+            headers=headers,
+            data=json.dumps(data),
+            stream=optional_params["stream"],
+        )
+        return response.iter_lines()
+    else:
+        response = requests.post(api_base, headers=headers, data=json.dumps(data))
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        if response.status_code != 200:
+            raise CloudflareError(
+                status_code=response.status_code, message=response.text
+            )
+        completion_response = response.json()
+        model_response["choices"][0]["message"]["content"] = completion_response[
+            "result"
+        ]["response"]
+        ## CALCULATING USAGE
+        print_verbose(
+            f"CALCULATING CLOUDFLARE TOKEN USAGE. Model Response: {model_response}; model_response['choices'][0]['message'].get('content', ''): {model_response['choices'][0]['message'].get('content', None)}"
+        )
+        prompt_tokens = litellm.utils.get_token_count(messages=messages, model=model)
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = "cloudflare/" + model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/cohere.py ADDED Viewed

	@@ -0,0 +1,293 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time, traceback
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import litellm
+import httpx
+class CohereError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST", url="https://api.cohere.ai/v1/generate"
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class CohereConfig:
+    """
+    Reference: https://docs.cohere.com/reference/generate
+    The class `CohereConfig` provides configuration for the Cohere's API interface. Below are the parameters:
+    - `num_generations` (integer): Maximum number of generations returned. Default is 1, with a minimum value of 1 and a maximum value of 5.
+    - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default value is 20.
+    - `truncate` (string): Specifies how the API handles inputs longer than maximum token length. Options include NONE, START, END. Default is END.
+    - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.75.
+    - `preset` (string): Identifier of a custom preset, a combination of parameters such as prompt, temperature etc.
+    - `end_sequences` (array of strings): The generated text gets cut at the beginning of the earliest occurrence of an end sequence, which will be excluded from the text.
+    - `stop_sequences` (array of strings): The generated text gets cut at the end of the earliest occurrence of a stop sequence, which will be included in the text.
+    - `k` (integer): Limits generation at each step to top `k` most likely tokens. Default is 0.
+    - `p` (number): Limits generation at each step to most likely tokens with total probability mass of `p`. Default is 0.
+    - `frequency_penalty` (number): Reduces repetitiveness of generated tokens. Higher values apply stronger penalties to previously occurred tokens.
+    - `presence_penalty` (number): Reduces repetitiveness of generated tokens. Similar to frequency_penalty, but this penalty applies equally to all tokens that have already appeared.
+    - `return_likelihoods` (string): Specifies how and if token likelihoods are returned with the response. Options include GENERATION, ALL and NONE.
+    - `logit_bias` (object): Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. e.g. {"hello_world": 1233}
+    """
+    num_generations: Optional[int] = None
+    max_tokens: Optional[int] = None
+    truncate: Optional[str] = None
+    temperature: Optional[int] = None
+    preset: Optional[str] = None
+    end_sequences: Optional[list] = None
+    stop_sequences: Optional[list] = None
+    k: Optional[int] = None
+    p: Optional[int] = None
+    frequency_penalty: Optional[int] = None
+    presence_penalty: Optional[int] = None
+    return_likelihoods: Optional[str] = None
+    logit_bias: Optional[dict] = None
+    def __init__(
+        self,
+        num_generations: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        truncate: Optional[str] = None,
+        temperature: Optional[int] = None,
+        preset: Optional[str] = None,
+        end_sequences: Optional[list] = None,
+        stop_sequences: Optional[list] = None,
+        k: Optional[int] = None,
+        p: Optional[int] = None,
+        frequency_penalty: Optional[int] = None,
+        presence_penalty: Optional[int] = None,
+        return_likelihoods: Optional[str] = None,
+        logit_bias: Optional[dict] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url = api_base
+    model = model
+    prompt = " ".join(message["content"] for message in messages)
+    ## Load Config
+    config = litellm.CohereConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={
+            "complete_input_dict": data,
+            "headers": headers,
+            "api_base": completion_url,
+        },
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    ## error handling for cohere calls
+    if response.status_code != 200:
+        raise CohereError(message=response.text, status_code=response.status_code)
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise CohereError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                choices_list = []
+                for idx, item in enumerate(completion_response["generations"]):
+                    if len(item["text"]) > 0:
+                        message_obj = Message(content=item["text"])
+                    else:
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(
+                        finish_reason=item["finish_reason"],
+                        index=idx + 1,
+                        message=message_obj,
+                    )
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
+            except Exception as e:
+                raise CohereError(
+                    message=response.text, status_code=response.status_code
+                )
+        ## CALCULATING USAGE
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding(
+    model: str,
+    input: list,
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+    optional_params=None,
+):
+    headers = validate_environment(api_key)
+    embed_url = "https://api.cohere.ai/v1/embed"
+    model = model
+    data = {"model": model, "texts": input, **optional_params}
+    if "3" in model and "input_type" not in data:
+        # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
+        data["input_type"] = "search_document"
+    ## LOGGING
+    logging_obj.pre_call(
+        input=input,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = requests.post(embed_url, headers=headers, data=json.dumps(data))
+    ## LOGGING
+    logging_obj.post_call(
+        input=input,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+        original_response=response,
+    )
+    """
+        response
+        {
+            'object': "list",
+            'data': [
+            ]
+            'model',
+            'usage'
+        }
+    """
+    if response.status_code != 200:
+        raise CohereError(message=response.text, status_code=response.status_code)
+    embeddings = response.json()["embeddings"]
+    output_data = []
+    for idx, embedding in enumerate(embeddings):
+        output_data.append(
+            {"object": "embedding", "index": idx, "embedding": embedding}
+        )
+    model_response["object"] = "list"
+    model_response["data"] = output_data
+    model_response["model"] = model
+    input_tokens = 0
+    for text in input:
+        input_tokens += len(encoding.encode(text))
+    model_response["usage"] = {
+        "prompt_tokens": input_tokens,
+        "total_tokens": input_tokens,
+    }
+    return model_response

litellm/llms/custom_httpx/azure_dall_e_2.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import time, json, httpx, asyncio
+class AsyncCustomHTTPTransport(httpx.AsyncHTTPTransport):
+    """
+    Async implementation of custom http transport
+    """
+    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
+        if "images/generations" in request.url.path and request.url.params[
+            "api-version"
+        ] in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
+            "2023-06-01-preview",
+            "2023-07-01-preview",
+            "2023-08-01-preview",
+            "2023-09-01-preview",
+            "2023-10-01-preview",
+        ]:
+            request.url = request.url.copy_with(
+                path="/openai/images/generations:submit"
+            )
+            response = await super().handle_async_request(request)
+            operation_location_url = response.headers["operation-location"]
+            request.url = httpx.URL(operation_location_url)
+            request.method = "GET"
+            response = await super().handle_async_request(request)
+            await response.aread()
+            timeout_secs: int = 120
+            start_time = time.time()
+            while response.json()["status"] not in ["succeeded", "failed"]:
+                if time.time() - start_time > timeout_secs:
+                    timeout = {
+                        "error": {
+                            "code": "Timeout",
+                            "message": "Operation polling timed out.",
+                        }
+                    }
+                    return httpx.Response(
+                        status_code=400,
+                        headers=response.headers,
+                        content=json.dumps(timeout).encode("utf-8"),
+                        request=request,
+                    )
+                time.sleep(int(response.headers.get("retry-after")) or 10)
+                response = await super().handle_async_request(request)
+                await response.aread()
+            if response.json()["status"] == "failed":
+                error_data = response.json()
+                return httpx.Response(
+                    status_code=400,
+                    headers=response.headers,
+                    content=json.dumps(error_data).encode("utf-8"),
+                    request=request,
+                )
+            result = response.json()["result"]
+            return httpx.Response(
+                status_code=200,
+                headers=response.headers,
+                content=json.dumps(result).encode("utf-8"),
+                request=request,
+            )
+        return await super().handle_async_request(request)
+class CustomHTTPTransport(httpx.HTTPTransport):
+    """
+    This class was written as a workaround to support dall-e-2 on openai > v1.x
+    Refer to this issue for more: https://github.com/openai/openai-python/issues/692
+    """
+    def handle_request(
+        self,
+        request: httpx.Request,
+    ) -> httpx.Response:
+        if "images/generations" in request.url.path and request.url.params[
+            "api-version"
+        ] in [  # dall-e-3 starts from `2023-12-01-preview` so we should be able to avoid conflict
+            "2023-06-01-preview",
+            "2023-07-01-preview",
+            "2023-08-01-preview",
+            "2023-09-01-preview",
+            "2023-10-01-preview",
+        ]:
+            request.url = request.url.copy_with(
+                path="/openai/images/generations:submit"
+            )
+            response = super().handle_request(request)
+            operation_location_url = response.headers["operation-location"]
+            request.url = httpx.URL(operation_location_url)
+            request.method = "GET"
+            response = super().handle_request(request)
+            response.read()
+            timeout_secs: int = 120
+            start_time = time.time()
+            while response.json()["status"] not in ["succeeded", "failed"]:
+                if time.time() - start_time > timeout_secs:
+                    timeout = {
+                        "error": {
+                            "code": "Timeout",
+                            "message": "Operation polling timed out.",
+                        }
+                    }
+                    return httpx.Response(
+                        status_code=400,
+                        headers=response.headers,
+                        content=json.dumps(timeout).encode("utf-8"),
+                        request=request,
+                    )
+                time.sleep(int(response.headers.get("retry-after")) or 10)
+                response = super().handle_request(request)
+                response.read()
+            if response.json()["status"] == "failed":
+                error_data = response.json()
+                return httpx.Response(
+                    status_code=400,
+                    headers=response.headers,
+                    content=json.dumps(error_data).encode("utf-8"),
+                    request=request,
+                )
+            result = response.json()["result"]
+            return httpx.Response(
+                status_code=200,
+                headers=response.headers,
+                content=json.dumps(result).encode("utf-8"),
+                request=request,
+            )
+        return super().handle_request(request)

litellm/llms/custom_httpx/bedrock_async.py ADDED Viewed

File without changes

litellm/llms/gemini.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import os, types, traceback, copy
+import json
+from enum import Enum
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+import litellm
+import sys, httpx
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class GeminiError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(
+            method="POST",
+            url="https://developers.generativeai.google/api/python/google/generativeai/chat",
+        )
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class GeminiConfig:
+    """
+    Reference: https://ai.google.dev/api/python/google/generativeai/GenerationConfig
+    The class `GeminiConfig` provides configuration for the Gemini's API interface. Here are the parameters:
+    - `candidate_count` (int): Number of generated responses to return.
+    - `stop_sequences` (List[str]): The set of character sequences (up to 5) that will stop output generation. If specified, the API will stop at the first appearance of a stop sequence. The stop sequence will not be included as part of the response.
+    - `max_output_tokens` (int): The maximum number of tokens to include in a candidate. If unset, this will default to output_token_limit specified in the model's specification.
+    - `temperature` (float): Controls the randomness of the output. Note: The default value varies by model, see the Model.temperature attribute of the Model returned the genai.get_model function. Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will produce responses that are more varied and creative, while a value closer to 0.0 will typically result in more straightforward responses from the model.
+    - `top_p` (float): Optional. The maximum cumulative probability of tokens to consider when sampling.
+    - `top_k` (int): Optional. The maximum number of tokens to consider when sampling.
+    """
+    candidate_count: Optional[int] = None
+    stop_sequences: Optional[list] = None
+    max_output_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    def __init__(
+        self,
+        candidate_count: Optional[int] = None,
+        stop_sequences: Optional[list] = None,
+        max_output_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def completion(
+    model: str,
+    messages: list,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    api_key,
+    encoding,
+    logging_obj,
+    custom_prompt_dict: dict,
+    acompletion: bool = False,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    try:
+        import google.generativeai as genai
+    except:
+        raise Exception(
+            "Importing google.generativeai failed, please run 'pip install -q google-generativeai"
+        )
+    genai.configure(api_key=api_key)
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+            role_dict=model_prompt_details["roles"],
+            initial_prompt_value=model_prompt_details["initial_prompt_value"],
+            final_prompt_value=model_prompt_details["final_prompt_value"],
+            messages=messages,
+        )
+    else:
+        prompt = prompt_factory(
+            model=model, messages=messages, custom_llm_provider="gemini"
+        )
+    ## Load Config
+    inference_params = copy.deepcopy(optional_params)
+    inference_params.pop(
+        "stream", None
+    )  # palm does not support streaming, so we handle this by fake streaming in main.py
+    config = litellm.GeminiConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in inference_params
+        ):  # completion(top_k=3) > gemini_config(top_k=3) <- allows for dynamic variables to be passed in
+            inference_params[k] = v
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key="",
+        additional_args={"complete_input_dict": {"inference_params": inference_params}},
+    )
+    ## COMPLETION CALL
+    try:
+        _model = genai.GenerativeModel(f"models/{model}")
+        response = _model.generate_content(
+            contents=prompt,
+            generation_config=genai.types.GenerationConfig(**inference_params),
+        )
+    except Exception as e:
+        raise GeminiError(
+            message=str(e),
+            status_code=500,
+        )
+    ## LOGGING
+    logging_obj.post_call(
+        input=prompt,
+        api_key="",
+        original_response=response,
+        additional_args={"complete_input_dict": {}},
+    )
+    print_verbose(f"raw model_response: {response}")
+    ## RESPONSE OBJECT
+    completion_response = response
+    try:
+        choices_list = []
+        for idx, item in enumerate(completion_response.candidates):
+            if len(item.content.parts) > 0:
+                message_obj = Message(content=item.content.parts[0].text)
+            else:
+                message_obj = Message(content=None)
+            choice_obj = Choices(index=idx + 1, message=message_obj)
+            choices_list.append(choice_obj)
+        model_response["choices"] = choices_list
+    except Exception as e:
+        traceback.print_exc()
+        raise GeminiError(
+            message=traceback.format_exc(), status_code=response.status_code
+        )
+    try:
+        completion_response = model_response["choices"][0]["message"].get("content")
+        if completion_response is None:
+            raise Exception
+    except:
+        original_response = f"response: {response}"
+        if hasattr(response, "candidates"):
+            original_response = f"response: {response.candidates}"
+            if "SAFETY" in original_response:
+                original_response += "\nThe candidate content was flagged for safety reasons."
+            elif "RECITATION" in original_response:
+                original_response += "\nThe candidate content was flagged for recitation reasons."
+        raise GeminiError(
+            status_code=400,
+            message=f"No response received. Original response - {original_response}",
+        )
+    ## CALCULATING USAGE
+    prompt_str = ""
+    for m in messages:
+        if isinstance(m["content"], str):
+            prompt_str += m["content"]
+        elif isinstance(m["content"], list):
+            for content in m["content"]:
+                if content["type"] == "text":
+                    prompt_str += content["text"]
+    prompt_tokens = len(encoding.encode(prompt_str))
+    completion_tokens = len(
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+    )
+    model_response["created"] = int(time.time())
+    model_response["model"] = "gemini/" + model
+    usage = Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+    model_response.usage = usage
+    return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt ADDED Viewed

	@@ -0,0 +1,2523 @@

+0xDEADBEA7/DialoGPT-small-rick
+1Basco/DialoGPT-small-jake
+2early4coffee/DialoGPT-medium-deadpool
+2early4coffee/DialoGPT-small-deadpool
+2gud/DialogGPT-small-Koopsbot
+ABBHISHEK/DialoGPT-small-harrypotter
+AIDynamics/DialoGPT-medium-MentorDealerGuy
+AJ/DialoGPT-small-ricksanchez
+AJ/rick-discord-bot
+AJ/rick-sanchez-bot
+AJ-Dude/DialoGPT-small-harrypotter
+AK270802/DialoGPT-small-harrypotter
+ATGdev/DialoGPT-small-harrypotter
+AVeryRealHuman/DialoGPT-small-TonyStark
+AbhinavSaiTheGreat/DialoGPT-small-harrypotter
+AccurateIsaiah/DialoGPT-small-jefftastic
+AccurateIsaiah/DialoGPT-small-mozark
+AccurateIsaiah/DialoGPT-small-mozarkv2
+AccurateIsaiah/DialoGPT-small-sinclair
+AdharshJolly/HarryPotterBot-Model
+AdrianGzz/DialoGPT-small-harrypotter
+Aero/Tsubomi-Haruno
+AetherIT/DialoGPT-small-Hal
+AiPorter/DialoGPT-small-Back_to_the_future
+Aibox/DialoGPT-small-rick
+Akjder/DialoGPT-small-harrypotter
+AllwynJ/HarryBoy
+AnthonyNelson/DialoGPT-small-ricksanchez
+Apisate/DialoGPT-small-jordan
+ArJakusz/DialoGPT-small-stark
+Aran/DialoGPT-medium-harrypotter
+Aran/DialoGPT-small-harrypotter
+Arcktosh/DialoGPT-small-rick
+AriakimTaiyo/DialoGPT-cultured-Kumiko
+AriakimTaiyo/DialoGPT-medium-Kumiko
+AriakimTaiyo/DialoGPT-revised-Kumiko
+AriakimTaiyo/DialoGPT-small-Kumiko
+AriakimTaiyo/DialoGPT-small-Rikka
+ArtemisZealot/DialoGTP-small-Qkarin
+Aruden/DialoGPT-medium-harrypotterall
+Aspect11/DialoGPT-Medium-LiSBot
+Asuramaru/DialoGPT-small-rintohsaka
+Atchuth/DialoGPT-small-MichaelBot
+Augustvember/WOKKAWOKKA
+Augustvember/WokkaBot3
+Augustvember/test
+Augustvember/wokka2
+Augustvember/wokka4
+Augustvember/wokka5
+Augustvember/wokkabottest2
+AvatarXD/DialoGPT-medium-Blitzo
+Awsaf/DialoGPT-medium-eren
+Awsaf/large-eren
+Axcel/DialoGPT-small-rick
+Ayjayo/DialoGPT-medium-AyjayoAI
+Ayran/DialoGPT-medium-harry-potter-1-through-3
+Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6-e18
+Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6
+Ayran/DialoGPT-small-gandalf
+Ayran/DialoGPT-small-harry-potter-1-through-3
+Azuris/DialoGPT-medium-envy
+Azuris/DialoGPT-medium-senorita
+Azuris/DialoGPT-small-envy
+BW/TEST
+Backedman/DialoGPT-small-Anika
+BalajiSathesh/DialoGPT-small-harrypotter
+Batsy24/DialoGPT-medium-Twilight_BellaBot
+Batsy24/DialoGPT-small-Twilight_EdBot
+Bee-Garbs/DialoGPT-real-cartman-small
+Biasface/DDDC
+Biasface/DDDC2
+BigTooth/DialoGPT-Megumin
+BigTooth/DialoGPT-small-tohru
+BigTooth/Megumin-v0.2
+BigeS/DialoGPT-small-Rick
+Bimal/my_bot_model
+BinksSachary/DialoGPT-small-shaxx
+BinksSachary/ShaxxBot
+BinksSachary/ShaxxBot2
+BlightZz/DialoGPT-medium-Kurisu
+BlightZz/MakiseKurisu
+BlueGamerBeast/DialoGPT-small-Morgana
+BotterHax/DialoGPT-small-harrypotter
+Broadus20/DialoGPT-small-joshua
+BrunoNogueira/DialoGPT-kungfupanda
+Brykee/DialoGPT-medium-Morty
+Bubb-les/DisloGPT-medium-HarryPotter
+Camzure/MaamiBot-test
+Canadiancaleb/DialoGPT-small-jesse
+Canadiancaleb/DialoGPT-small-walter
+CasualHomie/DialoGPT-small-harrypotter
+Chae/botman
+Chakita/Friends
+Chalponkey/DialoGPT-small-Barry
+ChaseBread/DialoGPT-small-harrypotter
+Chiuchiyin/DialoGPT-small-Donald
+ChrisVCB/DialoGPT-medium-cmjs
+ChrisVCB/DialoGPT-medium-ej
+Chuah/DialoGPT-small-harrypotter
+ChukSamuels/DialoGPT-small-Dr.FauciBot
+Ciruzzo/DialoGPT-small-harrypotter
+ClaudeCOULOMBE/RickBot
+Cloudy/DialoGPT-CJ-large
+ClydeWasTaken/DialoGPT-small-joshua
+CodeDanCode/CartmenBot
+CodeDanCode/SP-KyleBot
+CoderBoy432/DialoGPT-small-harrypotter
+CoderEFE/DialoGPT-marxbot
+Connor/DialoGPT-small-rick
+Connorvr/BrightBot-small
+CopymySkill/DialoGPT-medium-atakan
+Corvus/DialoGPT-medium-CaptainPrice-Extended
+Corvus/DialoGPT-medium-CaptainPrice
+Coyotl/DialoGPT-test-last-arthurmorgan
+Coyotl/DialoGPT-test2-arthurmorgan
+Coyotl/DialoGPT-test3-arthurmorgan
+CracklesCreeper/Piglin-Talks-Harry-Potter
+Cryptikdw/DialoGPT-small-rick
+Cthyllax/DialoGPT-medium-PaladinDanse
+CurtisBowser/DialoGPT-medium-sora-two
+CurtisBowser/DialoGPT-medium-sora
+CurtisBowser/DialoGPT-small-sora
+CyberMuffin/DialoGPT-small-ChandlerBot
+DARKVIP3R/DialoGPT-medium-Anakin
+Daivakai/DialoGPT-small-saitama
+Dawit/DialogGPT-small-ironman
+Daymarebait/Discord_BOT_RICK
+DecafNosebleed/DialoGPT-small-ScaraBot
+Denny29/DialoGPT-medium-asunayuuki
+Devid/DialoGPT-small-Miku
+Dilmk2/DialoGPT-small-harrypotter
+Dimedrolza/DialoGPT-small-cyberpunk
+DingleyMaillotUrgell/homer-bot
+Doiman/DialoGPT-medium-harrypotter
+DongHai/DialoGPT-small-rick
+Doquey/DialoGPT-small-Luisbot1
+Doquey/DialoGPT-small-Michaelbot
+Doxophobia/DialoGPT-medium-celeste
+Dragoniod1596/DialoGPT-small-Legacies
+Dreyzin/DialoGPT-medium-avatar
+DueLinx0402/DialoGPT-small-harrypotter
+Duugu/jakebot3000
+Dyzi/DialoGPT-small-landcheese
+EEE/DialoGPT-medium-brooke
+EEE/DialoGPT-small-aang
+EEE/DialoGPT-small-yoda
+ESPersonnel/DialoGPT-small-got
+Eagle3ye/DialoGPT-small-PeppaPig
+Elzen7/DialoGPT-medium-harrypotter
+Emi2160/DialoGPT-small-Neku
+EmileAjar/DialoGPT-small-harrypotter
+EmileAjar/DialoGPT-small-peppapig
+Erikaka/DialoGPT-small-loki
+EstoyDePaso/DialoGPT-small-harrypotter
+EuropeanTurtle/DialoGPT-small-mrcobb
+ExEngineer/DialoGPT-medium-jdt
+Exilon/DialoGPT-large-quirk
+EzioDD/house
+FFF000/dialogpt-FFF
+FangLee/DialoGPT-small-Kirito
+Filosofas/DialoGPT-medium-PALPATINE
+Flampt/DialoGPT-medium-Sheldon
+For/sheldonbot
+FosterPatch/GoT-test
+Fu10k/DialoGPT-medium-Rick
+GabbyDaBUNBUN/DialoGPT-medium-PinkiePie
+Galaxy/DialoGPT-small-hermoine
+GamerMan02/DialoGPT-medium-gamerbot
+Gappy/DialoGPT-small-Zhongli
+Geezy/DialoGPT-small-guy
+GenDelport/DialoGPT-small-harrypotter
+Gowtham25/DialoGPT-small-jackie
+Gregor-Davies/DialoGPT-small-rick
+Greysan/DialoGPT-medium-TOH
+Guard-SK/DialoGPT-medium-ricksanchez
+Guard-SK/DialoGPT-small-ricksanchez
+GunjanPantha/DialoGPT-small-gameofthrones
+Guy0/DialoGPT-small-Batmanbotty
+HAttORi/DialoGPT-Medium-zerotwo
+HackyHackyMan/DialoGPT-small-harrypotter
+Hadron/DialoGPT-medium-nino
+Hallzy/Peterbot
+Hamas/DialoGPT-large-jake
+Hamas/DialoGPT-large-jake2
+Hamas/DialoGPT-large-jake3
+Hamas/DialoGPT-large-jake4
+Hamhams/DialoGPT-small-rick
+HansAnonymous/DialoGPT-medium-rick
+HansAnonymous/DialoGPT-small-shrek
+HarryPuttar/HarryPotterDC
+Harshal6927/Jack_Sparrow_GPT
+Harshal6927/Tony_Stark_GPT
+Havokx/DialoGPT-small-Rick
+Heldhy/DialoGPT-small-tony
+Heldhy/testingAgain
+MagnusChase7/DialoGPT-medium-harrypotter
+Htenn/DialoGPT-small-spongebob
+Htenn/DialoGPT-small-spongebobv2
+HueJanus/DialoGPT-small-ricksanchez
+HypNyx/DialoGPT-small-DwightBot
+HypNyx/DialoGPT-small-Thanos
+HypedKid/PeterBot
+ILoveThatLady/DialoGPT-small-rickandmorty
+ITNODove/DialoGPT-medium-cyberbones
+Icemiser/chat-test
+Ilyabarigou/Genesis-harrybotter
+ImAPizza/DialoGPT-medium-albert
+ImAPizza/DialoGPT-medium-alberttwo
+Invincible/Chat_bot-Harrypotter-medium
+Invincible/Chat_bot-Harrypotter-small
+Invincible/DialoGPT-medium-harryPotter
+Istiaque190515/Sherlock
+Istiaque190515/harry_bot_discord
+Istiaque190515/harry_potter
+ItoYagura/DialoGPT-medium-tohru
+ItzJorinoPlays/DialoGPT-small-PickleRick
+J-Chiang/DialoGPT-small-thor
+JDS22/DialoGPT-medium-HarryPotterBot
+Jedi33/tonystarkAI
+Jeffrey/DialoGPT-small-Jeffrey
+JimmyHodl/DialoGPT-medium
+Jllama/dialoGPT-small-Joshua-test
+Jonesy/DialoGPT-medium_Barney
+Jonesy/FG_OLD
+Jonesy/DialoGPT-small_JT
+Julianqll/DialoGPT-small-finalmorty
+Julianqll/DialoGPT-small-ricksanchez
+KAIHATSU/DialoGPT-small-rick
+KENNETHFOO/DialoGPT-medium-harrypotter
+KOSTAS/DialoGPT-small-Cleverbot
+KP2500/KPBot
+Kai0857/DialoGPT-small-harrypotter
+Kail91/DialoGPT-small-PeraltaBot
+Kairu/DialoGPT-small-Rick
+Kairu/RICKBOT
+KakoSi/Smolmm3
+KakoSi/opaazzi
+Kaledmgo/DialoGPT-small-donajulia
+Kargan/DialoGPT-small-randombot
+KaydenSou/Joshua
+Keen/DialoGPT-small-potter
+KekLord/DialoGPT-small-rick3
+Keqing/Keqing-Siesta
+Keqipig/DialoGPT-small-spamton
+KhanAdeeb/model-tony-stark
+KingCodeSquid/Octavian
+KingCodeSquid/Octavian2
+Kirili4ik/ruDialoGpt3-medium-finetuned-telegram
+KnutZuidema/DialoGPT-small-morty
+Konggate/DialoGPT-small-harrypotter
+Koriyy/DialoGPT-medium-gf
+Koro/DialoGPT-medium-rickandmorty
+Koro/DialoGPT-small-rickandmorty
+KringleClaus/Dialog-santa
+KrispyIChris/DialoGPT-small-harrypotter
+Kryptone/Burobot
+Kryptone/RinAI
+Kryptone/monikAI-Unstable
+Kryptone/monikAI
+Kshaunish/DialoGPT-small-rick
+Kush/DialoGPT-small-harrypotter
+LARACHNIDE/DialogGPT-small-sw
+LactoseLegend/DialoGPT-small-Rick
+Laezor/DialoGPT-small-witcher1
+Laezor/DialoGPT-small-yakuza_0
+LaiJY/DialoGPTChatbot
+Laptop/DialoGPT-small-gandalf
+Lenza/DialoGPT-medium-Kobayashi
+Leonel/DialoGPT-small-chandler
+Leostronkest/DialoGPT-small-michael
+Leostronkest/DialoGPT
+Leviii03/Dialogpt-small-Jake99
+Lizardon/Peterbot
+Lovery/Aqua
+Lucdi90/DialoGPT-medium-XiaoBot
+LuckyWill/DialoGPT-small-JakeBot
+Lurka/DialoGPT-medium-isseibot
+Lurka/DialoGPT-medium-kon
+Luxiere/DialoGPT-medium-tyrion
+MAUtastic/DialoGPT-medium-RickandMortyBot
+MCUxDaredevil/DialoGPT-small-rick
+MS366/DialoGPT-small-vision
+MadhanKumar/DialoGPT-small-HarryPotter
+MadhanKumar/HarryPotter-Bot
+MagmaCubes1133/DialoGPT-large-rick
+Mandy/DialoGPT-small-Mikasa
+Manthan/DialoGPT-small-harrypotter
+Mara/DialoGPT-medium-harrypotter
+MathiasVS/DialoGPT-small-RickAndMorty
+MaxW0748/DialoGPT-small-Rick
+MayankGupta/DialoGPT-small-harrypotter
+MichaelTheLearner/DialoGPT-medium-harry
+Midhunkrishna/DialoGPT-small-bjk
+Mierln/SmartHarry
+MightyCoderX/DialoGPT-medium-EdwardElric
+ModzabazeR/small-okaberintaro
+Mohsin272/DialoGPT-medium-harrypotter
+Mona/DialoGPT-small-harrypotter
+MoonlitEtherna/DialoGPT-small-Nyivae
+MrDuckerino/DialoGPT-medium-Rick
+MrE/DialoGPT-medium-SARGE
+MrE/DialoGPT-medium-SARGER1
+MrE/DialoGPT-medium-SARGER3
+MrGentle/DeltaModel-genius1
+MrZ/DialoGPT-small-Rick
+Mythiie/DialoGPT-small-Modeus
+N8Daawg/chat_bot
+NASABOI/MachineLearningAI
+nabarun/DialoGPT-small-joshua
+NamPE/DialoGPT-medium-Aqua-konosuba
+NamPE/DialoGPT-medium-Takanashi-Rikka
+NamPE/DialoGPT-small-satouhina
+NanniKirby/DialoGPT-medium-bapi
+NanniKirby/bapismall
+Naturealbe/DialoGPT-small-harrypotter-2
+Naturealbe/DialoGPT-small-harrypotter
+Navigator/DialoGPT-medium-martymcfly
+Navya2608/DialoGPT-medium-chandler
+Navya2608/DialoGPT-medium-rachel
+Navya2608/DialoGPT-small-tonystarkscript
+Necrozma/harrypotterbot
+Nekoism/Zhongli-Beta
+NibrasShami/DialopGPT-small-HarryPotter
+NickCavarretta/DialoGPT-small-laffy
+Nihwy/DialoSqui
+NikhilKrishna/DialoGPT-medium-harrypotter
+Ninja5000/DialoGPT-medium-HarryPotter
+Ninja5000/DialoGPT-medium-TWEWYJoshua
+Niphredil/DialoGPT-small-lotr
+Nisarg2701/DialoGPT-medium-Rick
+NoLawz/DialoGPT-medium-hagrid
+NoLawz/DialoGPT-medium-harrypotter
+NoLawz/DialoGPT-medium-spongebob
+Nova/DialoGPT-medium-Lelouch
+NovaChrono/twervy
+Obesitycart/ChatBot
+Obscurity/DialoGPT-Medium-707
+Oji/DialoGPT-small-Rick
+Optimal/Harry
+P4RZ1V4L/DialoGPT-Medium-Tony
+PVAbhiram2003/DialoGPT-medium-RickandMorty
+Paradocx/Dialogpt-mid-hpai
+Pensador777critico/DialoGPT-small-RickandMorty
+PhilipTheGreat/DiabloGPT-small-Traveller
+PinoCorgi/DialoGPT-small-Shrek1
+Piumi/DialogGPT-small-harrypotter
+Plencers/DialoGPT-small-homer
+Poly-Pixel/shrek-medium-full
+Poly-Pixel/shrek-medium
+Poly-Pixel/shrek-test-small
+Pupihed/DialoGPT-small-shrek
+PurpleJacketGuy/My_Jarvis
+PurpleJacketGuy/My_Jarvis_2
+RAhul03/DialoGPT-small-harrypotter
+REAP3R/Chat-bot
+REZERO/DialoGPT-medium-saitama
+RTM/ChatBot
+RTM/Lucky
+RTurk/DialoGPT-small-TIMBOT
+Radicalkiddo/DialoGPT-small-Radical
+Rashid11/DialoGPT-small-rick
+Rathod/DialoGPT-small-harrypotter
+Redolid/DialoGPT-small-Rick
+Rei/DialoGPT-medium-kurisu
+RifsxD/DialoGPT-medium-raifu
+RishabhRawatt/DialoGPT-small-Rickmorty
+RishabhRawatt/DialoGPT-small-kela
+Ritchie/DialoGPT-small-Rickandmorty
+RizqFarIDN/DialoGPT-medium-harrypotter
+RizqFarIDN/DialoGPT-small-harrypotter
+RobinMari/DialoGPT-small-mikoto
+Royce23/DialoGPT-small-almas
+Rush11/DialoGPT-small-HarryPotter
+Ryanar/DialoGPT-medium-Zelda
+Ryukie/DialoGPT-small-Rick
+S34NtheGuy/DialoGPT-medium-Glass_Of_Water
+S34NtheGuy/DialoGPT-medium-Mona
+S34NtheGuy/DialoGPT-small-Harry282
+S34NtheGuy/DialoGPT-small-MJOLNIR_Soul
+S34NtheGuy/DialoGPT-small-cursedryno
+S34NtheGuy/DialoGPT-small-pikamew362
+S34NtheGuy/DialoGPT-small-wetterlettuce
+SJSui/RickBot
+SPGT/LiveSafe-DialoGPT
+SaffronIce/DialoGPT-medium-Jett
+Salma-2/DialoGPT-small-harrypotter
+Sammigooof/Peterbot
+SarahhhUwU/DialoGPT-small-ally
+Sarumomo/DialoGPT-small-test
+Saviour/ChandlerBot
+Saz/DialoGPT-small-paimon
+Saz/DialoGPT-small-saz
+Science-geek32/DialoGPT-small-doctor
+Science-geek32/DialoGPT-small-doctor2.0
+Scoops/SandalBot
+ScottaStrong/DialogGPT-medium-Scott
+ScottaStrong/DialogGPT-medium-joshua
+ScottaStrong/DialogGPT-small-Scott
+ScottaStrong/DialogGPT-small-joshua
+Sebastianthecrab/DialoGPT-small-melchior
+Sedge/DialoGPT-small-Sedge
+Shakaw/DialoGPT-small-spongebot
+ShayoGun/DialoGPT-small-shayo
+Sheel/DialoGPT-small-harrypotter
+Sheerwin02/DialoGPT-medium-mikasa
+Sheerwin02/DialoGPT-small-isla
+Sherman/DialoGPT-medium-joey
+Shike/DialoGPT_medium_harrypotter
+Shinx/DialoGPT-medium-myheroacademia
+NaturesDisaster/DialoGPT-large-Neku
+NaturesDisaster/DialoGPT-small-Neku
+ShiroNeko/DialoGPT-small-rick
+Shubham-Kumar-DTU/DialoGPT-small-goku
+SilentMyuth/sarcastic-model
+SilentMyuth/stableben
+SirBastianXVII/DialoGPT-small-TVD
+Sired/DialoGPT-small-trumpbot
+Siyris/DialoGPT-medium-SIY
+Siyris/SIY
+Skywhy/DialoGPT-medium-Churchyy
+Snaky/StupidEdwin
+Soapsy/DialoGPT-mid-cartman
+SonMooSans/DialoGPT-small-joshua
+SonMooSans/test
+Sora4762/DialoGPT-small-naruto
+Sora4762/DialoGPT-small-naruto1.1
+Soumyajit1008/DialoGPT-small-harryPotterssen
+SpacyGalaxy/DialoGPT-medium-Gandalf
+Spectrox/emmybot
+Spirax/DialoGPT-medium-sheldon
+Spoon/DialoGPT-small-engineer
+Stabley/DialoGPT-small-evelynn
+Stevo/DiagloGPT-medium-spamton
+Stoned-Code/DioloGPT-large-Rick-SC-420
+Sunnydx/BillCipherBot
+TTYU/DialoGPT-small-trump
+TVLG/DialoGPT-small-Iroh-Bot
+Taramiko/DialoGPT-small-hoshiyo_kojima
+Taramiko/Hoshiyo_Kojima
+Tejasvb/DialoGPT-small-rick
+Tejasvb/DialogGPT-small-rick
+ThatSkyFox/DialoGPT-medium-joshua
+ThatSkyFox/DialoGPT-small-joshua
+The-Programmer-With-Cool-Pens/TifaBotAIPackage
+TheCatsMoo/DialoGGPT-small-joshua
+TheDiamondKing/DialoGPT-small-harrypotter
+ThePeachOx/DialoGPT-small-harry
+TheReverendWes/DialoGPT-small-rick
+TheTUFGuy/HermioneChatBot
+Thejas/DialoGPT-small-Stewei
+Thejas/DialoGPT-small-elon
+ThoracicCosine/DialoGPT-small-harrypotter
+Tidum/DialoGPT-large-Michael
+Toadally/DialoGPT-small-david_mast
+Tofu05/DialoGPT-large-boon2
+Tofu05/DialoGPT-med-boon3
+TofuBoy/DialoGPT-medium-Yubin2
+TofuBoy/DialoGPT-medium-boon
+Tr1ex/DialoGPT-small-rick
+TrebleJeff/DialoGPT-small-Michael
+TrimPeachu/Deadpool
+Trixzy/rickai-v1
+Tropics/DialoGPT-small-peppa
+UKJ5/DialoGPT-small-harrypotter
+Username1/Mourinhio-medium
+Username1/Mourinho
+Username1/Wenger
+VLRevolution/DialogGPT-small-GGODMODEL
+VMET/DialoGPT-small-dumbassbot
+VaguelyCynical/DialoGPT-small-RickSanchez
+Vampiro/DialoGPT-small-dante_b
+Vampiro/DialoGPT-small-dante_c
+VariableZee/DialoGPT-small-ivylia03
+Verge/Peterbot
+VincentButterfield/DialoGPT-small-harrypotter
+VishalArun/DialoGPT-medium-harrypotter
+Vitafeu/DialoGPT-medium-ricksanchez
+VulcanBin/DialoGPT-small-cortana
+WarrenK-Design/DialoGPT-small-Rick
+Wessel/DiabloGPT-medium-harrypotter
+White/white-bot
+Whitez/DialoGPT-small-twety
+Wise/DialogGPT-small-JC
+WoutN2001/james3
+WurmWillem/DialoGPT-medium-RickandMorty3
+Xeouz/Ultron-Small
+XuguangAi/DialoGPT-small-Harry
+XuguangAi/DialoGPT-small-Leslie
+XuguangAi/DialoGPT-small-Rick
+Yankee/test1234
+Zane/Ricky
+Zane/Ricky3
+Zeer0/DialoGPT-small-ZerO
+Zen1/Derekbot
+Zen1/test1
+Zeph/DialoGPT-small-rick
+Zephaus/Chromrepo
+Zixtrauce/BDBot
+Zixtrauce/BDBot4Epoch
+Zixtrauce/BaekBot
+Zixtrauce/BrandonBot
+Zixtrauce/BrandonBot2
+Zixtrauce/JohnBot
+Zixtrauce/SelfAwareness
+Zuha/DialoGPT-small-gandalf
+a01709042/DialoGPT-medium
+aadilhassan/Chandlerbot
+aashutosh2102/DialoGPT-smalll-harrypotter
+abhiramtirumala/DialoGPT-sarcastic
+abhisht/DialoGPT-medium-Emilybot
+abjbpi/DS_small
+abjbpi/Dwight_Schrute
+aced/DialoGPT-medium-3PO
+adviksinghania/DialoGPT-medium-rick
+af1tang/personaGPT
+aggb/DialogGPT-small-AGGB-B
+aimiekhe/yummv1
+aimiekhe/yummv2
+aishanisingh/DiagloGPT-small-michaelscott
+aishanisingh/DialoGPT-small-harrypotter
+akaushik1/DialoGPT-small-kaiser
+akhooli/personachat-arabic
+alankar/DialoGPT-small-rick
+alipsezzar/DialoGPT-medium-harrypotter
+alistair7/bbt-diagpt2-model
+aluserhuggingface/DialoGPT-small-harrypotter
+alvinkobe/DialoGPT-medium-steve_biko
+alvinkobe/DialoGPT-small-KST
+andikarachman/DialoGPT-small-sheldon
+anduush/DialoGPT-small-Rick
+ange/DialoGPT-medium-Monke
+ankimt01/DialoGPT-small-anch
+ann101020/le2sbot-hp
+anshengli2/DialogGPT-small-Bot
+anweasha/DialoGPT-small-Chandler
+anweasha/DialoGPT-small-Jake
+aplnestrella/Aladdin-Bot
+arampacha/DialoGPT-medium-simpsons
+archmagos/HourAI
+ardatasc/miniMe-version1
+arifbhrn/DialogGPT-small-Rickk
+arnav7633/DialoGPT-medium-tony_stark
+aryanbhosale/DialoGPT-medium-harrypotter
+asad/DialoGPT-small-harryporter_bot
+ashwinchandran13/DialoGPT-small-harrypotter
+astrobreazy/DialoGPT-small-harrypotter
+atkh6673/DialoGPT-small-harrypotter
+atkh6673/DialoGPT-small-trump
+atomsspawn/DialoGPT-small-dumbledore
+augustojaba/DialoGPT-small-harrypotter
+avinashshrangee/DialoGPT-small-Ricky
+awvik360/DialoGPT-medium-plemons
+awvik360/DialoGPT-medium-plemons2
+awvik360/DialoGPT-small-plemons
+aydin/DialoGPT-medium-michael
+ayush19/rick-sanchez
+b0shakk/DialoGPT-small-Ragnar
+balta/DialoGPT-small-TestBot
+banden/DialoGPT-medium-RickBot
+banden/DialoGPT-small-LokiBot
+beatajackowska/DialoGPT-RickBot
+benajtil/DialoGPT-small-Daddyben
+benajtil/DialoGPT-small-RickAndMortyScripts
+benjaminbeilharz/dialoGPT-small-empatheticdialogues-generation
+benmrtnz27/DialoGPT-small-misato
+bensuydam/CartmanBot
+bestminerevah/DialoGPT-small-thetenthdoctor
+bhaden94/LokiDiscordBot-medium
+bhavya689/DialoGPT-large-chandler
+bleachybrain/DialoGPT-med-ss
+bmdonnell/DialoGPT-medium-harrypotter
+bonebambi/DialoGPT-small-ThakirClone
+bookemdan/DialoGPT-small-harrypotter
+boran/berkbot
+boydster/DialoGPT-small-gollum
+brimeggi/testbot2
+brokentx/newbrokiev2
+bspans/DialoGPT-small-yoda
+byeongal/Ko-DialoGPT
+bypequeno/DialoGPT-small-michaelscott
+caps1994/DialoGPT-small-chrisbot-caps1994
+caps1994/DialoGPT-small-chrisbot
+caps1994/DialoGPT-small-harrypotter-caps1994
+cartyparty/DialoGPT-small-harrypotter
+cartyparty/DialoGPT-small-iteration1
+cartyparty/DialoGPT-small-nerdherd
+cedpsam/chatbot_fr
+centon21/DialoGPT-small-harrypotter
+chaitrabhat/DialoGPT-small-rick
+chamindu/DialoGPT-medium-hermione
+chamodkarunasena/DialoGPT-medium-sokka
+chan030609/DialoGPT-medium-JAB
+chan030609/DialoGPT-small-JAB
+chellver24/DialoGPT-medium-chizuru_ichinose
+chip/DialoGPT-small-chizuru
+thu-coai/blenderbot-400M-esconv
+clairesb/kindness_bot
+clairesb/kindness_bot_repo
+clancystudios/DialoGPT-medium-Morty
+clayfox/DialoGPT-medium-Hiccup
+clayfox/DialoGPT-small-Hiccup
+cocoaclef/DialoGPT-small-kohaku
+codealtgeek/DiabloGPT-medium-rickmorty
+colochoplay/DialoGTP-small-harrypotter
+conniezyj/DialoGPT-small-snape
+cookirei/DialoGPT-medium-Joreyar
+cosmic/DialoGPT-Rick
+cosmicray001/prod-harry
+cosmicray001/small-harry
+crystalgate/DialoGPT-small-rick
+cumtowndiscord/DialoGPT-small-joshua
+cutiebunny639/DialoGPT-small-harry
+d4rk/harry
+danildany/DialoGPT-small-MichaelScott
+danny481/DialoGPT-small-datnguyenchatbot
+danny481/DialoGPT-small-harrypotter
+danny481/Final_ChatBot
+darkzek/chickenbot-jon-snow
+darthboii/DialoGPT-small-PickleRick
+darthboii/DialoGPT-small-Rick
+dats/DialoGPT-small-harrypotter
+dattam/DialoGPT-medium-TonyStarkBot
+dead69/GPT-small-yoda
+deepparag/Aeona
+deepparag/DumBot-Beta
+deepparag/DumBot
+delvan/DialoGPT-medium-DwightV1
+df4rfrrf/DialoGPT-medium-Aerith
+dhanushlnaik/amySan
+disdamoe/DialoGPT-small-moe
+disdamoe/TheGreatManipulator
+disdamoe/TheManipulator
+divi/Peterbot
+dk16gaming/DialoGPT-small-HarryPotter
+dkminer81/Tromm
+dreamline2/DialoGPT-small-joshua-demo
+dukeme/DialoGPT-small-RDBotv1
+eclare/DialoGPT-small-SCHAEFER
+educhav/Austin-DialoGPT-small
+educhav/Elijah-DialoGPT-small
+educhav/J-DialoGPT-small
+educhav/Sam-DialoGPT-small
+eklrivera/DialoGPT-small-harrypotter
+eldritch-axolotl/Rick
+ericklasco/DialoGPT-small-erickHarryPotter
+ericzhou/DialoGPT-Medium-Rick
+ericzhou/DialoGPT-Medium-Rick_v2
+ericzhou/DialoGPT-medium-elon
+ericzhou/tsundere_v1
+estehpanas/pascalbot
+ethzhou/jooby
+ethzhou/joobyChat
+ethzhou/newJooby
+f00d4tehg0dz/Peppa
+f00d4tehg0dz/Yoda
+facebook/blenderbot-1B-distill
+facebook/blenderbot-3B
+facebook/blenderbot-400M-distill
+facebook/blenderbot-90M
+facebook/blenderbot_small-90M
+faketermz/DialoGPT
+fatemaMeem98/DialoGPT-medium-HermioneGrangerBot
+felinecity/DioloGPT-small-KaeyaBot
+felinecity/DioloGPT-small-KaeyaBot2
+felinecity/DioloGPT-small-LisaBot
+felinecity/ScaraBot
+fibruh/DialoGPT-small-harrypotter
+flakje/DialoGPT-small-Marty
+flooptherocket/DialogGPT-small-rick
+ftnvir/DialoGPT-medium-bullyMaguire
+gabtan99/dialogpt-tagalog-medium-10
+gabtan99/dialogpt-tagalog-medium-20
+gabtan99/dialogpt-tagalog-medium-30
+gabtan99/dialogpt-tagalog-medium
+gfdream/dialogpt-small-familyguy
+gfdream/dialogpt-small-harrypotter
+ghhostboy/DialoGPT-medium-connorDBH3-1
+ghhostboy/DialoGPT-medium-connorDBH3-21
+gizmo-dev/DialoGPT-small-jake
+gorkemgoknar/gpt2chatbotenglish
+grayson124/chatbotwaifu
+grounddominator/DialoGPT-lar-Rick
+gusintheshell/DialoGPT-small-rickbot
+gwima/ryan-sackmott
+hama/Doctor_Bot
+hama/Harry_Bot
+hama/barney_bot
+hama/me0.01
+hama/rick_bot
+heabeoun/DiabloGPT-small-nuon-conv
+henryoce/DialoGPT-small-rick-and-morty
+hervetusse/DialogGPT-small-harrypotter
+hireddivas/DialoGPT-small-ray
+hireddivas/DialoGPT-small-scully
+hireddivas/dialoGPT-small-mulder
+hireddivas/dialoGPT-small-phil
+hireddivas/dialoGPT-small-sonic
+honguyenminh/old-zhongli
+houssaineamzil/DialoGPT-small-joey
+hrv/DialoGPT-small-rick-morty
+hyunwoongko/blenderbot-9B
+hyunwoongko/reddit-3B
+hyunwoongko/reddit-9B
+iamalpharius/GPT-Small-BenderBot
+ianc89/hagrid
+ignkai/DialoGPT-medium-spider-man-updated
+ilikeapple12/DialoGPT-small-Phos
+imran2part/DialogGPT-small-Doctor
+imrit1999/DialoGPT-small-MCU
+myynirew/DialoGPT-medium-ettengiv
+myynirew/DialoGPT-medium-leirbag
+myynirew/DialoGPT-small-awazimuruk
+ionite/DialoGPT-large-Sh0rtiAI-v2
+ionite/DialoGPT-medium-IoniteAI
+ionite/DialoGPT-medium-McKayAI-v2
+ionite/DialoGPT-medium-McKayAI
+ionite/DialoGPT-medium-Sh0rtiAI
+ionite/DialoGPT-medium-mohnjilesAI
+ionite/DialoGPT-medium-orangeAI
+ironman123/DialoGPT-small-harrypotter
+ishraaqparvez/DialoGPT-small-harrypotter
+jackky46/DialoGPT-medium-got
+jahz/DialoGPT-medium-FF8
+jalensmh/DialoGPT-medium-jalenbot
+jalensmh/DialoGPT-small-exophoria
+jamestop00/DialoGPT-spike-medium
+jasper/DialoGPT-large-homersimpson
+jchen/DialoGPT-evan
+jeanlks/DialogGPT-small-gayvid
+jeanlks/DialogGPT-small-pato
+jfhr1999/CharacterTest
+jogp10/DialoGPT-medium-arya
+jollmimmim/DialoGPT-small-monkeydluffy
+jordanhagan/DialoGPT-medium-NegaNetizen
+josephmagnayon/DialoGPT-medium-Alfred
+josepjulia/RepoHumanChatBot
+josh8/DialoGPT-medium-josh
+josh8/DialoGPT-small-josh
+jpsxlr8/DialoGPT-small-harrypotter
+jth1903/DialoGPT-small-rick
+julianolf/DialoGPT-small-harrypotter
+kaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaot1k/DialoGPT-small-Wanda
+kagennotsuki/DialoGPT-medium-radion
+kche0138/DialoGPT-medium-DIO
+kingabzpro/DialoGPT-small-Rick-Bot
+kipiiler/Rickbot
+knightbat/harry-potter
+kripanshudixit/DialoGPT-small-phoenix
+kris/DialoGPT-small-spock
+kris/DialoGPT-small-spock3
+kris/DialoGPT-small-spock4
+kris/DialoGPT-small-spock5
+kshitiz/testing-bot-repo
+kunalbhargava/DialoGPT-small-housebot
+kvothe28/DiabloGPT-small-Rick
+l41n/c3rbs
+lain2/Peterbot
+lanejm/DialoGPT-small-hagrid
+lapacc33/DialoGPT-medium-rick
+life4free96/DialogGPT-med-TeiaMoranta
+life4free96/DialogGPT-med-TeiaMoranta3
+light/small-rickk
+limivan/DialoGPT-small-c3po
+cosmicroxks/DialoGPT-small-scott
+logube/DialogGPT_small_harrypotter
+lonewanderer27/DialoGPT-small-Joshua
+lonewanderer27/KeitaroBot
+lonewanderer27/YoshinoriBot
+lonewanderer27/YuriBot
+lovellyweather/DialoGPT-medium-johnny
+luca-martial/DialoGPT-Elon
+lucas-bo/DialogGPT-small-yoda
+ludowoods/KujouSara
+lulueve3/DialoGPT-medium-Kokkoro
+lulueve3/DialoGPT-medium-Kokkoro2
+madbuda/DialoGPT-got-skippy
+madbuda/DialoGPT-medium-skippy
+majonez57/JoeBot
+manav/dialogpt-large-kanye-reddit
+manav/dialogpt-medium-berkeley-reddit
+maniacGhost24/MichaelScott-bot-push-small
+manraf/DialoGPT-smmall-harrypotter
+matprado/DialoGPT-small-rick-sanchez
+maxxx2021/DialGPT-small-harrypotter
+mdc1616/DialoGPT-large-sherlock
+melon422/DialoGPT-medium-MelonBot
+melon422/DialoGPT-medium-MelonBot2
+mewmew/DialoGPT-small-rick
+michelleshx/DialoGPT-small-michelle-discord-bot
+microsoft/DialoGPT-large
+microsoft/DialoGPT-medium
+microsoft/DialoGPT-small
+mikabeebee/Peterbot
+milayue/neosh-bot1
+minsiam/DialoGPT-medium-harrypotterbot
+minsiam/DialoGPT-small-harrypotterbot
+miogfd1234/ll
+mittalnishit/DialoGPT-medium-rickman2
+mittalnishit/DialoGPT-small-rickman
+mjstamper/DialoGPT-small-samwise
+mk3smo/dialogpt-med-ahiru
+mk3smo/dialogpt-med-duck2
+mk3smo/dialogpt-med-duck3
+mk3smo/dialogpt-med-duck5
+mk3smo/dialogpt-med-duckfinal
+mk3smo/dialogpt-med-stt3
+mklucifer/DialoGPT-medium-DEADPOOL
+mklucifer/DialoGPT-small-DEADPOOL
+mluengas/DialogGPT-small-michaelscott
+model-mili/DailoGPT-Yukub-v3
+model-mili/DialoGPT-small-Sapph-v1
+model-mili/DialoGPT-small-Yukub-v2
+model-mili/DialoGPT-small-Yukub
+mohammedks713/DialoGPT-small-harrypotter
+mohammedks713/DialoGPT-small-jonsnow
+mra1ster/DialoGPT_scully_small
+muhardianab/DialoGPT-small-theoffice
+munezah/DialoGPT-small-aot
+munezah/DialoGPT-small-sherlock
+mutamuta/DialoGPT-small-rick
+mutamuta/DialoGPT-spongebob-small
+namanrana16/DialoGPT-small-TrumpBot
+nanometeres/DialoGPT-medium-halbot
+nanometeres/DialoGPT-small-halbot
+ncoop57/DiGPTame-medium
+niharikadeokar/DialoGPT-small-Jakebot
+nikhilpatil2532000/DialoGPT-small-harrypotter
+nimrazaheer/DialoGPT-small-harrypotter
+nitishk/IronStarkBot
+nlokam/DialoGPT-digibot3.0-new
+nlokam/Digibot
+nlokam/ada_V.3
+nlokam/ada_V.6
+nlokam/ada_V.7
+nlokam/books_to_bots_v.00
+noobed/DialoGPT-small-astley
+norie4/DialoGPT-small-kyutebot
+norie4/DialoGPT-small-memoji
+not7even/DialoGPT-small-7evenpool
+npc-engine/exported-bart-light-gail-chatbot
+ntjrrvarma/DialoGPT-small-RickBot
+nwl/DialoGPT-small-enhypen
+nytestalkerq/DialoGPT-medium-joshua
+oakkas/Dialge-small-harrypotter-oguz
+odinmay/joebot
+odinmay/zackbotmodel
+ogpat123/DialoGPT-small-Michael
+ogpat23/Jules-Chatbot
+omkar1309/RickBot
+omnimokha/DialoGPT-medium-jakeamal
+omnimokha/DialoGPT-small-jakeamal
+omnimokha/jakebot2
+oododo/DialoGPT-small-elon
+otto-camp/DialoGPT-small-RickBot
+overgrowth/jokeboy
+owencubes/DialoGPT-small-Josuke
+paladinx00/rh-bender
+parigaswetha/DialoGPT-small-jakeperalta
+parthsinha/DialoGPT-small-rickandmorty
+pashin/DialoGPT-small-ironman-2
+pashin/DialoGPT-small-ironman-3
+pashin/DialoGPT-small-ironman1
+pastlecry/DialoGPT-small-harrypotter
+peamjo/DialoGPT-small-morty
+person123/DialoGPT-small-petergriffin
+pewriebontal/DialoGPT-medium-Pewpewbon
+phantom-deluxe/dialoGPT-RickBot
+phantom-deluxe/dialoGPT-harry
+phozon/harry-potter-medium
+piyushdubey/DialoGPT-Mi
+pompeiifreckles/DialoGPT-medium-Rick
+ppn/DialoGPT-small-harrypotter
+pranavtharoor/test
+professional/DialoGPT-small-joshua
+ps2102/DialoGPT-small-harrypotter
+psblade/DialoGPT-medium-PotterBot
+puugz/DialoGPT-small-spiderman
+qwerty/DialoGPT-small-rick
+r3cdhummingbird/DialoGPT-medium-joshua
+r3dhummingbird/DialoGPT-medium-joshua
+r3dhummingbird/DialoGPT-medium-neku
+r3dhummingbird/DialoGPT-small-harrypotter
+r3dhummingbird/DialoGPT-small-neku
+rachelcorey/DialoGPT-medium-kramer
+rachelcorey/DialoGPT-medium-niles
+rafakat/Botsuana-rick
+rahul26/DialoGPT-small-rickandmorty
+rahulMishra05/discord-chat-bot
+raj2002jain/DialoGPT-small-Light
+ravephelps/DialoGPT-small-MichaelSbott
+redbloodyknife/DialoGPT-medium-shayo
+rhollings/DialoGPT_small_steverogers
+richiellei/Childe
+richiellei/Childe3
+richiellei/DialoGPT-small-rick
+richielleisart/Childe
+ridwanpratama/DialoGPT-small-misaki
+rinz/DialoGPT-small-Harry-Potterrr
+rlagusrlagus123/XTC20000
+rlagusrlagus123/XTC4096
+rmicheal48/DialoGPT-small-steven_universe
+rodrigodz/DialoGPT-medium-dxd
+romuNoob/Mine
+romuNoob/test
+rovai/AI
+rovai/CARRIE
+rovai/Chat_pytorch1
+rovai/chatbotmedium1
+rovai/chatbotmedium2
+rovai/chatbotmedium3
+rovai/chatbotmedium4
+rovai/chatbotone
+rpeng35/DialoGPT-small-erenyeager
+rrtong/DialoGPT-medium-shang-chi
+rsd511/DialoGPT-small-house
+rsedlr/RickBot
+rsedlr/RickBotExample
+ruriko/bacqua
+ruriko/konoaqua
+ruriko/konodio
+sachdevkartik/DialoGPT-small-rick
+saintseer121323/DialoGPT-small-kotonoha
+sakai026/Chizuru
+sakai026/Mizuhara
+sam213/DialoGPT-small-harrypotter
+sambotx4/scamantha
+samuelssonm/DialoGPT-small-rick
+sanjanareddy226/JakeBot
+sankalpjha1/mr.bot_haary
+satkinson/DialoGPT-medium-marvin
+satkinson/DialoGPT-small-marvin
+satvikag/chatbot
+satvikag/chatbot2
+sergunow/movie-chat
+setiadia/DialogGPT-small-HPBot
+shelb-doc/DialoGPT-medium-ash
+shihab/HarryPotter
+shonuff/DialoGPT-medium-konosuba
+shreeshaaithal/DialoGPT-small-Michael-Scott
+shreeshaaithal/Discord-AI-bot
+shreeshaaithal/whatsapp-medium-bot-2
+sidkhuntia/harrypotter
+sifclairhelix/DialoGPT-small-harrypot
+simrana5/RickBotExample
+skynex/DialoGPT-small-batman
+skynex/DialoGPT-small-finalbatman
+sleekmike/DialoGPT-small-joshua
+smilesandtea/DialoGPT-medium-Rick
+smmzhu/DialoGPT-small-SZ
+solfer/DialoGPT-small-ryuji
+spockinese/DialoGPT-small-sherlock
+sreyanghosh/DialoGPT-medium-joker
+srirachasenpai/DialoGPT-medium-harrypotter
+srv/DialoGPT-medium-Breaking_Bad
+ssam/DialoGPT-small-RickmfSanchez
+ssspider/DialoGPT-medium-harrypotter
+stfuowned/nek
+stfuowned/rick
+sthom/DialoGPT-small-tin
+sudip/bot1
+sudoabrar/DialoGPT-small-dwight
+suhasjain/DailoGPT-small-harrypotter
+swapnil165/DialoGPT-small-Rick
+terter/rick-bot-test-v2
+thatoneguy267/DialoGPT-small-Oscar
+thatoneguy267/bruhpleasehelpme
+theChanChanMan/DialoGPT-small-chandler
+thefryingpan/gpt-neo-125M-splishy
+theiconik/hermione-granger
+thesamuelpena/Dialog-medium-Sonic
+thesamuelpena/Dialog-medium-masterchief
+thetlwin/DialoGPT-small-ironman
+thinhda/chatbot
+thu-coai/CDial-GPT2_LCCC-base
+thu-coai/CDial-GPT_LCCC-base
+thu-coai/CDial-GPT_LCCC-large
+ticet11/DialoGPT-small-BOBBY
+timslams666/DialoGPT-small-rick
+tinega/DialoGPT-small-harrypotter
+tngo/DialoGPT-small-HankHill
+toiletwater/DialoGPT-medium-ironman
+tom1804/HP
+tom1804/HP_last
+tom1804/hp_new
+tomascerejo12/DialoGPT-small-Rick
+tosin/dialogpt_mwoz
+tosin/dialogpt_sv
+toyfreak/DialoGPT-small-addy
+toyfreak/DialoGPT-small-shy
+tpri/DialoGPT-small-pa
+tprincessazula/Dialog-GPT-small-AANG
+tprincessazula/Dialog-GPT-small-KATARA-AVATAR
+tprincessazula/Dialog-GPT-small-SOKKA-AVATAR
+tprincessazula/Dialog-GPT-small-harrypotter
+transfaeries/DialoGPT-Discord
+transfaeries/DialoGPT-medium-Discord-1.0
+transfaeries/DialoGPT-small-Discord-1.0
+transfaeries/Twilight-Sparkle-GPT
+trig/DialoGPT-small-harrypotter
+trig/multiverse-second
+trig/multiverse
+trig/sokka-chatbot-test
+trig/tlok-test
+troythewar/DialogGPT-small-harrypotter
+truthisneverlinear/EleventhDoctor
+ttntran/DialoGPT-small-human
+tuantt/GroundNet
+ughvom/Ginger
+ughvom/britnayBOTMAIN
+umr55766/DialogGPT-small-peppa-pig
+usamazaheer/DialoGPT-small-harrypotter
+uutkras/Pandabot
+uyharold86/DialoGPT-small-RickAndMorty
+valarikv/DialoGPT-small-bateman
+vibranium19/DialoGPT-medium-jake
+victordata/DialoGPT-small-Rick
+victorswedspot/DialoGPT-small-gandalf
+vijayv500/DialoGPT-small-Big-Bang-Theory-Series-Transcripts
+vijote/DialoGPT-small-Morty
+vivek-g-2009/DialoGPT-medium-harrypotter
+vlco-o/NLboto_o-aki-dialogpt
+vlco-o/NLboto_o-small-dialogpt
+wadeed/DialogGPT-small-chandlerbingg
+wanderer/DialoGPT-small-Phoebe
+wjching/DialoGPT-small-ricksanchez
+won/DialoGPT-small-harrypotter
+worms3401/DialoGPT-small-Eleonora
+worsterman/DialoGPT-small-mulder
+wtrClover/DialoGPT-small-Flutterbot
+wtrClover/DialoGPT-small-TwilightBot
+xdmason/pretrainedCas
+xiaoheiqaq/DialoGPT-mediumJojo
+xiaoheiqaq/DialoGPT-smallharrypotter
+yahya1994/DialoGPT-small-AOT-Eren
+yahya1994/DialoGPT-small-DN-L
+yahya1994/DialoGPT-small-DN-Light
+yahya1994/DialoGPT-small-DN-Ryuk
+yahya1994/DialoGPT-small-Gintama-Gintoki
+yahya1994/DialoGPT-small-Parasyte-Migi
+yahya1994/DialoGPT-small-ReZero-Rem
+yahya1994/DialoGPT-small-ReZero-Subaru
+yahya1994/DialoGPT-small-Ryuk
+yusufmorsi/georgebot
+zaydzuhri/lelouch-medium
+zemi/jakebot
+zen-satvik/BotGPT-medium-HP
+zentos/DialoGPT-small-spongebob
+zinary/DialoGPT-small-rick-new
+zuto37/DialoGPT-small-sadao
+Maxwere/DiabloGPT-medium-maxbot
+Grungle/DialoGPT-medium-butters
+sadkat/technoai
+Grungle/DialoGPT-medium-butters2
+kookyklavicle/sean-diaz-bot
+kookyklavicle/sean-diaz
+Aquasp34/DialoGPT-small-aqua1
+zenham/khemx
+aryanbhosale/smartharrypotterbot
+Britain/DialoGPT-small-ZifBotTwoFixed
+Britain/DialoGPT-small-DanyBotThree
+infinitylyj/DialogGPT-small-rick
+infinitylyj/DialogGPT-small-general
+infinitylyj/DialogGPT-medium-general
+jackyv/DialoGPT-small-pinocchio
+Freak55/DialoGPT-small-Phoenix-Wright
+Britain/DialoGPT-small-DanyBotThreeFixed
+Britain/DialoGPT-small-DanyBotTwo
+P4RZ1V4L/DialoGPT-medium-tonystark
+Britain/DialoGPT-small-DanyBotTwoNew
+zenham/mskeen_m_e4_16h
+zenham/khemx_m_e4_16h
+zenham/wail_m_e4_16h_2k
+RTM/vilang
+BeanBoi50404/DialoGPT-small-PeppaPigButBetter
+nabin19677/small-cartman
+Prime2911/DialoGPT-small-handsomejack
+Starry/KARENTRIES
+dietconk/DialogGPT-small-Orange
+mafeu/DialoGPT-medium-willem
+Prime2911/DialoGPT-medium-handsomejack
+Meowren/DialoGPT-small-Rick-Bot
+DB13067/Peterbot
+Savitar/DialoGPT-medium-RickandMorty
+MolePatrol/Olbot
+erinchocolate/DialoGPT-small-harrypotter
+Valouzze/FairuvenIA
+MehSatho/Tai-medium-Hermione
+Valouzze/MegaIA
+Makinitas/DialoGPT-small-RickAndMortyScripts
+darthrussel/DialoGPT-small-rickandmorty
+vanilladucky/Friends_chatting_bot
+vanilladucky/Friends_chatting_bot_redefined
+chocoduck/Joey_bot
+duanxingjuan/DialoGPT-medium-DEMON_SLAYER
+pinkducky/Monica_Bot
+Starry/HELLORUKAS
+pinkducky/Rachel_Bot
+trig/multiverse-third
+pinkducky/Ross_Bot
+duanxingjuan/DialoGPT-large-DEMON_SLAYER_v1
+duanxingjuan/DialoGPT-large-DEMON
+duanxingjuan/DialoGPT-large-DEMON1
+issue89/DialoGPT-small-house
+LeonLi279/DialoGPT-small-harrypotter
+MolePatrol/DialoGPT-Medium-ConnerBot
+MolePatrol/DialoGPT-Medium-MoleBot
+TheDaydreamer/ricky
+BeamBee/DialoGPT-small-Lavenza
+Garsic/DialoGPT-medium-pecorine
+CallForEcho/DialoGPT-small-harrypotter
+BeamBee/DialoGPT-small-LavenzaNumTwo
+Meowren/MichaelScottBott
+shalpin87/dialoGPT-homer-simpson
+darthrussel/DialoGPT-small-homerbot-halfdata
+TheGoldenToaster/DialoGPT-medium-Woody
+bemich/DialoGPT-small-GeorgeCostanza
+AAAA-4/DialoGPT-small-player_03
+Teyronebigdick/DialoGPT-small-harrypotter
+Sammith/DialoGPT-small-miachael
+Nxtxn01/DialoGPT-small-harrypotter
+Teyronebigdick/DialoGPT-small-terrydavis
+mczolly/DialoGPT-small-the-doctor
+crazypegasus/GPT-JonSnow
+MrYiRen/DialoGPT-small-harrypotter
+TropicalJuice/Dialog-PeterGriffin
+TheGoldenToaster/DialoGPT-medium-Bot
+MrYiRen/DialoGPT-small-harrypotter2
+gulgulglut/DialoGPT-small-Rick
+trev/DialoGPT-small-MLP
+RAJESHNEMANI/Chatbot_AI
+lilapapazian/DialoGPT-small-harrypotter
+Alethea/GPT2-chitchat
+florentiino/DialoGPT-small-harrypotter
+NUTELEX/Eva
+jessicammow/DialoGPT-small-ronswanson
+MrYiRen/DialoGPT-small-ZC
+jessicammow/DialoGPT-medium-leslieknope
+AmbricJohnson5888/death
+AmbricJohnson5888/claura
+DarrellTimothy/DialoGPT-small-harrypotter
+RarePizzaDog/Apes_Bot
+iyedr8/DialoGPT-small-rick
+MEDT/ChatBot
+NonzeroCornet34/DialoGPT-small-hansolo
+NonzeroCornet34/DialoGPT-small-philbot
+atomsspawn/DialoGPT-medium-dumbledore
+florentiino/DialoGPT-small-rick
+ShibaDeveloper/DialoGPT-small-harrypotter
+sahilnare78/DialogGPT-medium-harrypotter
+Garsic/DialoGPT-medium-jill
+mdm/DialoGPT-small-Kanye
+ScyKindness/Hatsune_Miku
+aaaacash/DialoGPT-large-michaelscott
+AntoDono/DialoGPT-Harry
+BFMeriem/model
+BFMeriem/chatbot-model
+StringCheese/Dialog-small-bigbang
+jakewillms17/capcake-model
+Shivierra/DialoGPT-small-technoblade
+Scaprod/DialoGPT-small-arbiter
+Tlacaelel/DialoGPT-small-jarvis
+spuun/kekbot-beta-1
+Coma/Beter
+Wavepaw/DialoGPT-medium-WardenIngo
+Akarsh3053/potter-chat-bot
+MachineBabs/RickBot
+MachineBabs/DocBrown
+spuun/kekbot-beta-1-medium
+MEDT/Chatbot_Medium
+tosin/dialogpt_mwoz_idioms
+tosin/dialogpt_afriwoz_wolof
+aakhilv/tonystark
+spuun/kekbot-beta-2-medium
+xiaoGato/DialoGPT-small-villanelle
+Jonesy/DialoGPT-small_FG
+deathknight67/DialoGPT-medium-joshua
+kyriinx/DialoGPT-small-glyph
+Jonesy/DialoGPT-medium_FG
+spuun/kekbot-beta-3-medium
+Lisia/DialoGPT-small-connor
+awvik360/DialoGPT-medium-plemons-04262022
+Jonesy/LisaOnIce
+kvnaraya/DialoGPT-small-michael
+Hyperspace/DialoGPT-small-Hyperdrive
+Azuris/DialoGPT-medium-ekidona
+aditeyabaral/sonobois
+Jonesy/HomersNightOut
+Andrei0086/Chat-small-bot
+awvik360/UncleRuckus
+captainswiftfox/rickandmorty
+radicalrascal/DialoGPT-medium-jimmy
+dmoz47/DialoGPT-small-peterparker
+niprestige/GPT-small-DusabeBot
+Shakerlicious/DialoGPT-small-descentbot
+atomsspawn/DialoGPT-small-shelbot
+atomsspawn/DialoGPT-small-sheldon
+Willow/DialoGPT-medium-willow
+IsekaiMeta/dapprf
+farjvr/DialoGPT-small-Mortyfar
+InSaiyan/DialoGPT-small-harrypotter
+IsekaiMeta/dapprf3
+emolyscheisse/DialoGPT-small-mandybot
+IsekaiMeta/dapprf4
+qgdmonilla/DialoGPT-small-harrypotter
+NHStudios/DialoGPT-small-jake
+Shakerlicious/DialoGPT-small-raquelbot
+annasham/DialoGPT-small-myneighborTotoro
+CaptAdorable/RickBot
+Willow/DialoGPT-large-willow
+Kabutopusu/DialoGPT-medium-NITWMae
+HarmlessTarget/DialoGPT-medium-Bender
+soni69/DialoGPT-medium-holmes
+captainswiftfox/DialoGPT-small-rick
+kathywu/DialoGPT-small-kathy
+mybot/DialoGPT-medium-harrypotter
+Dedemg1988/DialoGPT-small-michaelscott
+pedrobaiainin/DialoGPT-small-harrypotter
+kathywu/DialoGPT-medium-kathy
+SNCannon/DialoGPT-medium-merc
+THE-DDLM/DialoGPT-sebastian
+fatirali/DialoGPT-medium-harrypotter
+TejasARathod/DialoGPT-medium-BatmanBot
+Varick/dialo-jarvis
+Robinsd/HarryBot
+dipstheman/DialoGPT-small-humanconversation
+dipstheman/DialoGPT-small-humanconversationpart
+LinkTheSinger/DialoGPT-small-Kanna
+LinkTheSinger/DialoGPT-small-Kannav4
+Robinsd/HarryBot4
+SomeRandomGuy/tony
+Meowren/HumanBot
+marcoperez/DialoGPT-small-rickandmorty
+LarsBell/DialoGPT-small-billyloomis
+okwach/mawaidhaChatbot
+LooksLikeIveLost/DialoGPT-medium-me
+okwach/mawaidhaChatbot2
+thebyy/DialoGPT-small-mortyisarick
+rongina/DialoGPT-small-cartman
+fransoa/arrombado-dms
+ionite/DialoGPT-medium-MarkAI
+ddrmaster1000/DialoGPT-medium-rick
+PeritusDux/DialoGPT-small-rick
+HomerChatbot/HomerSimpson
+t8oo/DialoGPT-small-zeni
+t8oo/DialoGPT-small-zenigata
+sexomq/DialoGPT-medium-TeoBot
+Char135/DialoGPT-medium-sebastian
+HomerChatbot/DialoGPT-small-HomerSimpson
+trev/Twilight-Sparkle
+gigikenneth/family-guy-bot
+ulises801/DialoGPT-medium-rick
+fujuta/DialoGPT-medium-HarryPotter
+fujuta/DialoGPT-medium-RonWeasley
+fujuta/DialoGPT-medium-HermioneGrander
+deepparag/Aeona-Beta
+HomerChatbot/DialoGPT-small-homersimpsonbot
+redcy/FrasierBotv1
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn
+natdon/DialoGPT_Michael_Scott
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v3
+deathmite/DiabloGPT-small-potaru
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v4
+DaBaap/Chat-Bot-Batman
+Iwa/bot
+badlawyer/DialoGPT-medium-sherlock-bot
+thanhchauns2/DialoGPT-medium-Luna
+jayklaws0606/DialoGPT-small-jayBot
+RUCAIBox/mvp
+Flem/DialoGPT-medium-alastor
+keans/DialoGPT-small-highjacker
+jayklaws0606/dgpt-small-jaybot
+CodeMaestro/DialoGPT-small-TChalla
+ElMuchoDingDong/AudreyBotBlenderBot
+stfuowned/rickfinal
+DuskSigma/DialogGPTHomerSimpson
+hireddivas/dialoGPT-small-sonic2
+N0NAne/DialoGPT-small-harrypotter
+tinkoff-ai/response-quality-classifier-tiny
+tinkoff-ai/response-quality-classifier-base
+tinkoff-ai/response-quality-classifier-large
+tinkoff-ai/response-toxicity-classifier-base
+RUCAIBox/mvp-open-dialog
+RUCAIBox/mtl-open-dialog
+RUCAIBox/mvp-multi-task
+Cirilaron/DialoGPT-medium-raiden
+BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch
+lucataco/DialogGPT-med-Rick
+lucataco/DialoGPT-medium-rafa
+gloomyworm/DialoGPT-small-ortho
+kozlovtsev/DialoGPT-medium-harrypotter
+Cirilaron/DialoGPT-medium-jetstreamsam
+lucataco/DialoGPT-medium-omar
+lucataco/DialoGPT-medium-milo
+daedalus2003/HouseBot
+SallyXue/DialoGPT-small-harrypotter
+Averium/DialoGPT-medium-TailsBot
+nlokam99/ada_sample
+nlokam99/ada_sample_2
+nlokam99/ada_sample_3
+nlokam/adanimals_V1
+spuun/kekbot-beta-4-medium
+quirkys/DialoGPT-small-harrypotter
+markofhope/DialoGPT-medium-HarringtonBot
+AntoDono/DialoGPT-Bopy-Alpha-1.01
+Hermite/DialoGPT-large-hermite
+robinhad/gpt2-uk-conversational
+Browbon/DialoGPT-small-LucaChangretta
+gloomyworm/DialoGPT-medium-ortho
+Browbon/DialoGPT-medium-LucaChangretta
+Fluffypillow/DialoGPT-small-Rem
+Hermite/DialoGPT-large-hermite2
+Bman/DialoGPT-medium-peppapig
+ZipperXYZ/DialoGPT-medium-TheWorldMachine
+AlyxTheKitten/DialoGPT-medium-AgedBlaine-2
+Averium/DialoGPT-medium-TailsBot1.1
+Elijah629/DialoGPT-mrsanai
+ZipperXYZ/DialoGPT-medium-TheWorldMachine2
+damianruel/DialoGPT-medium-MySon
+ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive
+Elijah629/DialoGPT-shrek
+AlyxTheKitten/DialoGPT-medium-Jimmis-2
+dennis-fast/DialoGPT-ElonMusk
+Sealgair/DialoGPT-medium-Eyden
+crystallyzing/DialoGPT-small-nishikiyama
+crystallyzing/DialoGPT-small-kiryu
+NikkiTiredAf/DialoGPT-small-billy2
+Evokus/DialoGPT-small-harrypotter
+mcimmy/DialoGPT-small-bob
+Laggrif/DialoGPT-medium-Luke
+Laggrif/DialoGPT-medium-3PO
+ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2
+prprakash/DialoGPT-small-TonyStark
+sexomq/TeoBot-Romanian-medium
+Bman/DialoGPT-medium-dora
+Hermite/DialoGPT-large-hermite3
+Averium/FabioBot
+arem/DialoGPT-medium-rickandmorty
+soProf1998/DialoGPT-small-chattyrick
+soProf1998/DialoGPT-medium-chattyrick
+Dorin/DialoGPT-small-Rick
+OptimalHoiboy/DialoGPT-small-kasumai
+Hartmann/DialoGPT-small-koishikomeiji
+Konbai/DialoGPT-small-akagi
+Konbai/DialoGPT-small-akagi2
+JazzyLucas/DialoGPT-small-TonyStark
+mystery/DialoGPT-small-pinkiepie
+sexomq/TeoBot-Romanian-medium2
+erikycd/chatbot_hadita
+infinix/Sheldon-bot
+JamesonSpiff/chatBot_test_model
+Akito1961/DialoGPT-small-C3PO
+Naturealbe/DialoGPT-small-Technoblade
+zR0clu/DialoGPT-medium-Mr.Roboto
+reso/DialoGPT-medium-v3ga
+trimox/tryingnewstuff
+Nakul24/YC_Bot
+casperthegazer/DiabloGPT-medium-lukedot
+JamesStratford/PLord-bot-DialoGPT-medium
+CaptPyrite/DialoGPT-small-cat
+SafeTorpedo/DialoGPT-small-MichaelBot
+brianveebee/DialoGPT-medium-bender
+myynirew/DialoGPT-medium-shouko01
+myynirew/2-0OKUOHS
+smmzhu/DialoGPT-medium-sam
+myynirew/shouko0-3
+myynirew/dumbbot
+Lamia/DialoGPT-small-Sundrop
+ashtrindade/chatbot-stacey
+tinkoff-ai/ruDialoGPT-small
+tinkoff-ai/ruDialoGPT-medium
+24adamaliv/DialoGPT-medium-Will
+cybertelx/DialoGPT-small-drunkic0n
+Rick-C137/DialoGPT-small-rick
+debyve/dumbbot
+Amir-UL/JimBot
+BoxCrab/DialoGPT-small-Strider
+AbdalK25/DialoGPT-small-TheWiseBot
+casperthegazer/DialoGT-gandalf-urdot
+pineappleSoup/DialoGPT-medium-707
+Nakul24/AD_ChatBot
+TeaTM/DialoGPT-small-bushcat
+ionite/DialoGPT-medium-NakaAI
+Creepton/DDLCYuri-DialoGPT-small
+TeaTM/DialoGPT-large-bushcat
+yazinga/DialoGPT-medium-scout
+throwaway112358112358/DialoGPT-medium-script
+Jingna/test_hpv_discord
+anonchickenlegs/sartoshi-bot
+xander-cross/DialoGPT-small-EvilMortyTheBot
+Bman/DialoGPT-medium-shrek
+Yank2901/DialoGPT-small-Rick
+akshatpandeyme/DialoGPT-small-manpreet
+Jenwvwmabskvwh/DialoGPT-small-josh444
+akshatpandeyme/DialoGPT-small-parthiv
+akshatpandeyme/DialoGPT-small-ParthivBot
+seeksery/DialoGPT-calig
+akshatpandeyme/DialoGPT-small-AnyaBot
+Jordine/shitter
+model-attribution-challenge/DialoGPT-large
+seeksery/DialoGPT-calig2
+obl1t/DialoGPT-medium-Jotaro
+trickstters/DialoGPT-small-evanbot
+trickstters/evanbot-gpt
+AriakimTaiyo/gpt2-chat
+Yank2901/DialoGPT-small-Harry
+lizz27/DialoGPT-small-baymax
+obl1t/DialoGPT-medium-Jolyne
+seeksery/DialoGPT-calig3
+Jenwvwmabskvwh/DialoGPT-small-josh445
+trickstters/evbot2
+Jenwvwmabskvwh/DialoGPT-small-josh450
+lizz27/DialoGPT-medium-BaymaxBot
+soop/DialoGPT-medium-BaymaxBot
+abelblue3/DialoGPT-medium-baymax
+priyankac/DialoGPT-medium-BaymaxBot
+Ironpanther1/Testing
+tosin/dialogpt_afriwoz_pidgin
+Anon25/DialoGPT-Medium-BaymaxBot
+GoldenRedstone/DialoGPT-medium-Phoenix-Wright
+Primobot/DialoGPT-small-harrypotter
+Lyem/LyemBotv1
+JamesSantosxx/DialoGPT-small-harrypotter
+Lyem/LyemBotv2
+Ironpanther1/ArtoriaBot
+Swervin7s/DialoGPT-medium-anakin
+DogH2O/DialoGPT-small-naruto
+NoPeanuts/DialoGPT-small-po
+Gravitygaming/homerai
+Lyem/LyemBotv3
+celine45688/LuTing
+antwortemir/shouko04
+SebastianS/MetalSebastian
+notaproblem00/DialoGPT-small-bakugou
+myodoctor/DIALOGPT-medium-HarryPotterBot
+aniketface/DialoGPT-medium-elon
+noiseBase/DialoGPT-small-HarryPotter
+karan21/DialoGPT-medium-rickandmorty
+karan21/DialoGPT-medium-guin
+Sophiejs/DialoGPT-small-BlaineBot
+skouras/DialoGPT-small-swda
+skouras/DialoGPT-small-maptask
+TheodoreAinsley/LindaGold
+AlbedoAI/DialoGPT-large-Albedo
+AlbedoAI/DialoGPT-large-Albedo2
+willmay/DialoGPT-medium-will
+AlbedoAI/DialoGPT-medium-Albedo
+chulainn/DialoGPT-medium-Zuko
+ctoner2653/DialoGPT-medium-RickBoty
+Number4/DialoGPT-medium-harrypotter
+yummyhat/DialoGPT-small-spike
+EllyPony/flutterbot
+Suryansh-23/DialoGPT-small-MichaelScottOffice
+Cirilaron/DialoGPT-medium-vergil
+Izuuk/izuuk
+shungyan/Diablo-small-harrypotter
+bhavyasharma/DialoGPT-small-harrypotter
+nintwentydo/rickbot
+tylersfoot/DialoGPT-medium-rick
+EJoftheVern/DialoGPT-medium-shaggy
+xtraXpert/DialoGPT-small-RickAndMorty2
+ANIKEThash/DialoGPT-medium-character
+Noonw/DialoGPT-small-hijackersexurmom
+fat32man/elon_answers
+MinhP/DialoGPT-small-themis
+Noonw/DialoGPT-small-osamaflyplane
+Noonw/DialoGPT-small-ladenflyplane
+Noonw/DialoGPT-small-ladenonjet
+MinhP/DialoGPT-small-franco
+Karan59/DialoGPT-small-evaModel
+marblyso/DialoGPT-medium-marblesbagel
+Jojo17/DialoGPT-small-RickAndMorty
+deseipel/medium-LucyClarke_
+DiscordBackup/model0000
+SirSpiffy/IvanModel
+woodmtaylor/DialoGPT-small-Heej
+woodmtaylor/DialoGPT-medium-Heej
+OctaviusI/marisaV0
+ChloeMJM/DialoGPT-small-rick
+JDesignEra/DialoGPT-small-Anya
+MrE/DialoGPT-medium-SARGER4
+aarya-c111/DialoGPT-small-Rogers
+bozlucas/DialoGPT-medium-HermioneBot
+LasseVKP/DialoGPT-Mogens
+metaloopa/DialoGPT-medium-Rintaro
+ingen51/DialoGPT-medium-GPT4
+Divyesh/DialoGPT-medium-harrypotter
+Natsuki-Chan/DialoGPT-medium-luz
+akira2001/DialoGPT-medium-harrypotter
+osueng02/DialoGPT-small-STAN_BOT
+osueng02/DialoGPT-medium-STAN_BOT
+wormed/DialoGPT-small-denai
+RehanP123/DialoGPT-medium-kermit.old
+Nakul24/SM_Bot
+chulainn/DialoGPT-medium-Ragnar
+aniketface/DialoGPT-product
+shohanursobuj/DialoGPT
+marblyso/DialoGPT-medium-hero
+marblyso/DialoGPT-medium-kel
+marblyso/DialoGPT-medium-aubrey
+akil191/small-test-harryakakakaka
+sanpellegrino/CoryBot
+Arqhero/DialoGPT-small-adventuretime
+chulainn/DialoGPT-medium-Tyrion
+VTG/MentalHealthChatbotv1
+luminolblue/HomunculusGPT-testbot
+Paulina354/DialoGPT-small-rickandmorty
+khuranagarvit019/MentalHealthChatbot
+VirtualizedTrash/Chatbot
+pedrocaribe/DialoGPT-medium-LL
+queenaccila/DialoGPT-small-kashiwagi
+GarfExit/DialogGPT-medium-707
+marblyso/DialoGPT-medium-shepherd
+Spectre29/DialoGPT-small-Kaisa
+Spectre29/Kaisa-converse-model
+ZedTheUndead/Rick_fragment
+marblyso/DialoGPT-medium-mari
+Delicious/DialoGPT-small-harrypotter
+BBHKR/DialoGPT-small-jacksparrow
+Guwon/DialoGPT-small-Quincy
+epeicher/DialoGPT-small-homer-2
+timmychanga/DialoGPT-small-ashley
+mywateriswet/ShuanBot
+epeicher/DialoGPT-small-flanders
+Super-McTea/DialoGPT-small-McTea
+Eronzin/meuBotzindoEron
+Techdra/DialoGPT-large-theboy
+Eronzin/DialoGPT-small-Frodo
+gtgillott/gib
+AwesomeDWNJ/EmiBot
+CJ3/DialoGPT-medium-amber3
+GamerMan02/DialoGPT-medium-gamerbot2
+GamerMan02/DialoGPT-medium-gamerbot1
+Insomnic/DialoGPT-small-harrypotter
+Super-McTea/DialoGPT-small-McTeaV2
+FelipeJoazeiro/chatbot-morty
+microsoft/GODEL-v1_1-base-seq2seq
+microsoft/GODEL-v1_1-large-seq2seq
+Rencist/DialoGPT-small-rick
+scorpiofrens/DialoGPT-medium-ergon
+somemusicnerdwoops/DialoGPT-small-shadow
+powchang/DialoGPT2-medium-CAiFE
+ratneshrt/DialoGPT-small-Artico
+somemusicnerdwoops/DialoGPT-distilgpt2-sonicfandub
+Tsec-Research/DialoGPT-chandler-penny
+neonon/DialoGPT-medium-cloy
+ddae208s/DialoGPT-small-dimitri
+mossfarmer/VRANAK
+Matax/Aristrathor3000
+brownanchovy/Harry
+Overlrd/DialoGPT-small-cartman
+epeicher/DialoGPT-large-homer
+comradesocrates/DialoGPT-medium-stranger
+Rakublu/DialoGPT-small-yasuo
+neonon/DialoGPT-medium-htccc
+Alt41r/gpt-simpson
+Nimit-Jjw/DialoGPT-chandler-penny
+Quoc123/DialoGPT-small-AQUA
+marblyso/DialoGPT-medium-pearl
+estus2/rick-superu-rick2
+marblyso/DialoGPT-medium-marina
+rovenmusic/DialoGPT-small-melodybot
+deseipel/small-LucyClarke_
+rovenmusic/DialoGPT-small-melodybotv2
+rovenmusic/DialoGPT-small-melodybotv3
+epeicher/DialoGPT-medium-homer
+andrewkroening/GalaxyFarAway-DialoGPT-HanSolo
+nams/nams-bot
+Nicktherat/DialoGPT-medium-endella
+alfirsaafauzulh/DialoGPT-small-KamuiBastion
+rovenmusic/DialoGPT-small-melodyv10
+somesh212/Harry_Potter-BOT
+somesh212/Harry_Potter_botDialoGPT_Som2
+jmagine/DialoGPT-small-metahead
+somesh212/Harry_Potter_botDialoGPT_Som3
+rovenmusic/DialoGPT-small-melodyvfinal
+jmagine/DialoGPT-small-jmagine
+jmagine/DialoGPT-small-funded
+jmagine/DialoGPT-small-jimj
+andrewkroening/GalaxyFarAway-DialoGPT-LukeSkywalker
+andrewkroening/GalaxyFarAway-DialoGPT-Threepio
+andrewkroening/GalaxyFarAway-DialoGPT-Vader
+andrewkroening/GalaxyFarAway-DialoGPT-LeiaOrgana
+andrewkroening/GalaxyFarAway-DialoGPT-Yoda
+Wizardd/DialoGPT-small-sheldon
+BenKJH/DialoGPT-small-lucybotasg
+Ananjas/AwooAI
+Ananjas/AwooV2
+kookyklavicle/gpt-sean-diaz
+kookyklavicle/SeanDiazBot2
+Ananjas/AwooV3
+Overlrd/DialoGPT-medium-cartman
+Ananjas/AwooV6
+mathecas/HarryPotterBotAI
+Karina256/DialoGPT-small-dory
+Tony8657/DialoGPT-small-TonyStarkBot
+SebastianS/my_mim
+TFS668/DialoGPT-small-Rick
+redhoff/DialoGPT-Medium-RedBot
+FeriVOQ/DialoGPT-small-joshua
+Triobloid/DialoGPT-small-lianaharrypotter
+quinnzie/DialoGPT-small-sinister
+FarziBuilder/DialoGPT-medium-harrypotter
+sohampatil/DialoGPT-small-mentalchatbot
+gtkarber/DialoGPT-medium-columbo
+PaddlePaddle/plato-mini
+Junkan/DialoGPT-medium-Bilbo
+ThatSkyFox/DialoGPT-medium-whatsapp
+Ar4ikov/DialogAgentGPT2
+reallygoodtechdeals/Bingocat-ai-Dialo-GPT-medium
+thmauler/crashed
+OptionaI/DialoGPT-small-beepboopy
+davebathhews/DialoGPT-OTIS
+GGOM/SipBotGGOM
+davebathhews/DialoGPT-OTISBOT
+GGOM/WillBotGGOM
+GGOM/ElyasBotGGOM
+reallygoodtechdeals/steve-ai-Dialo-GPT-medium
+Crushtoe/DialoGPT-small-vangluss
+apotempest/DialoGPT-medium-geralt
+DiogoSabec/DialoGPT-small-joshua
+WaleedArif/DialoGPT-small-Micheal
+Crushtoe/DialoGPT-medium-vangluss
+Crushtoe/GODEL-v1_1-base-seq2seq-vangluss
+DiogoSabec/BOT
+Le033/DialoGPT-small-rickmorty
+Filosofas/DialoGPT-medium-PALPATINE2
+JadansTower/jobot
+NTMNathan/DialoGPT-small-harrypotter
+Ashypaws/DialoGPT-medium-Ashybot
+wmdosborne/DialoGPT-medium-kyritebot
+worms3402/DialoGPT-small-automata2
+Pi3141/DialoGPT-small-elon
+Grendar/Dialo-GPT-medium-shiro
+Pi3141/DialoGPT-medium-elon
+Pi3141/DialoGPT-medium-elon-2
+JoshuaPawlik/DialoGPT-medium-joshua
+Pi3141/DialoGPT-medium-elon-3
+josephthen3320/DialoGPT-small-walter
+robbiegwald/Rick
+Gurtej/Drbot
+Hereward/DialoGPT_medium_ObiWan_Kenobi
+Giu888/DialoGPT-small-sao
+Grendar/blenderbot-400M-distill-Shiro
+keeg8/Book-0-1500
+keeg8/Book-1500-1700
+keeg8/Book-1850-1900
+keeg8/Book-1700-1850
+karlreimond/DialoGPT-small-harrypotter
+lenartlola/SpongeBob
+lenartlola/rick-bot
+Deedlit/DialoGPT-small-southpark
+babylasagne/DialoGPT-small-narryuto
+babylasagne/DialoGPT-small-harry
+babylasagne/DialoGPT-small-spider
+babylasagne/DialoGPT-small-batman
+BradHeffernan/rickNmortyModel
+UmUDev/DialoGPT-medium-AlexVN
+ukikunz/gas-kenji-medium
+ukikunz/gas-kenji
+Isokeel/DialoGPT-medium-KMbot
+KakoSi/AcciGPT-smol
+Spoofed/DiabloGPT-small-peter
+sophiadt/DialoGPT-medium-707
+UmUDev/DialoGPT-medium-Alex
+PygmalionAI/pygmalion-350m
+sophiadt/DialoGPT-medium-reigen
+rexfi/DialoGPT-small-peter
+rexfi/NafezBot-DialoGPT
+caps1994/chris-bot
+rexfi/RickyBot
+allenai/cosmo-xl
+woodmtaylor/DialoGPT-large-Dumpling
+rexfi/MikeScottBot
+apfallinus/RickBot
+apfallinus/HarryBot
+apfallinus/MedBot
+apfallinus/AeonaBot
+apfallinus/BatmanBot
+apfallinus/AiBot
+LostXOR/TotallyNotARobot
+gachaddict/DialoGPT-medium-ike
+OctaviusI/staging
+PygmalionAI/pygmalion-1.3b
+Terrymir/DialoGPT-medium-Soraka
+SantiPingui58/DialoGPT-small-hika
+ss1612/montana-chat
+MrEmpty/DialoGPT-small-rickandmorty
+shikiskhakis/DialoGPT-small-blackdoom
+alexandreteles/GPTChizuru
+Chae/scottbot_med
+AhmedMostafa/DialoGPT-small-Rick
+metkoon/30dollarceo
+Dinocroth/DialoGPT-medium-Trevor-PhilipsV2
+metkoon/MatBot
+SmallQ/DialoGPT-small-Anya
+bigbossa/aiko6
+GK123/DialoGPT-medium-hanbot
+TheHappyDrone/DialoGPT-medium-salesman
+Pcik/DialoGPT-medium-Jaiden
+TheHappyDrone/DialoGPT-medium-Nexus-Nova
+Pcik/DialoGPT-medium-Dante
+AlmightyDeathCheater/DialoGPT-medium-harrypotter
+Pcik/DialoGPT-medium-Kirby
+Starry/COUNTNARC
+TheHappyDrone/DialoGPT-medium-Nexus-Nova-turing-v2
+wetwoteraq/DialoGPT-medium-aqua
+wetwoteraq/DialoGPT-small-peter
+wetwoteraq/DialoGPT-medium-peter
+lilexo2/DialoGPT-medium-Monica
+momo10/DialoGPT-small-harryPotter
+Antale123/ConorBot
+shikiskhakis/DialoGPT-small-xemnas
+Ecook/DialoGPT-medium-Ecook
+PygmalionAI/pygmalion-2.7b
+FowlerF/DiscordChatBot
+JoeRoganfan-69420/DialoGPT-medium-HarryPotterbot
+dusty310/DialoGPT-medium-Misaki
+Gurtej/Drbot2
+Gurtej/Drbot3
+Gurtej/Drbot4
+Gurtej/Drbot5
+Gurtej/Drbot6
+Gurtej/Drbot7
+Gurtej/Drbot8
+Gurtej/Drbot9
+PygmalionAI/pygmalion-6b
+Gurtej/Drbot11
+navygup/Mood-Tracker
+Maraslumunnus/DialoGPT-small-ivern
+DAS9051/BatemanChatBot
+SmallQLALA/DialoGPT-small-Anya
+RinkaDev/GPT-Peppa-Pig
+thu-coai/blenderbot-1B-augesc
+siyaT/DialoGPT-harrypotter-small
+keircare/DialoGPT-small-RickSanchez
+shiiiroe/DialoGPT-medium-kirito
+jdakillah/Rick
+kielljoy/DialoGPT-small-stupidspecialkay
+Ashypaws/DialoGPT-medium-Kitaibot
+jdakillah/RICK-V2
+jdakillah/Bender
+jdakillah/Generalbot
+kielljoy/DialoGPT-medium-ryanbot
+emre/spanish-dialoGPT
+vuminhtue/DialoGPT-large-HarryPotter3
+ralphsorz/DialoGPT-small-samwise
+SumYin/DialoGPT-small-Homer
+JamesRoy/DGPT-DC
+Blizzchor/DialoGPT-medium-HarryBotter
+gjhghjk/rick
+gjhghjk/rick2
+SumYin/ZeroTwo-Medium-DialoGPT
+Blizzchor/DialoGPT-medium-gamora
+Mydia2/DialoGPT-small-Flonnealive
+AL-CT/DialoGPT-small-slayer
+DhruvShek/Webraft-Ai
+arno2077/DiabloGPT-small-harrypotter
+keyonecs/fourept-debique-gpt
+Blizzchor/DialoGPT-medium-QuillLord
+callmeclover/Stinger-CONVRS_MODL
+aminFelah/DialogueGPT-very-small-harryPotter
+Keijuro/aeris-dialogpt
+Abdelrahman853/DialoGPT-small-echo
+Bearfoot/DialoGPT-medium-shrek
+arthme2/jay
+arthme2/DialoGPT-medium-Jay
+42meow/DialoGPT-medium-42meow
+Peeepy/Evie
+HorniFolks/Unicorn
+waifu-workshop/pygmalion-6b
+agenttylostudios/DialoGPT-small-Bocchi
+GregariousJamie/DialoGPT-small-jamie
+Fuwaguwa/DialoGPT-Medium-AzurLaneMusashi-v8
+s3nh/DialoGPT-large-Rick
+s3nh/DialoGPT-large-Morty
+s3nh/DialoGPT-small-morty
+Givinghawk/GPT-Morty
+DhruvShek/swearbot
+grart/DialoGPT-small-gillion
+interpixle/Sir_Caladan
+s3nh/DialoGPT-tony-montana
+s3nh/DialoGPT-small-harry-potter-goblet-of-fire
+s3nh/DialoGPT-small-hermione-granger-goblet-of-fire
+s3nh/DialoGPT-small-woody-toy-story
+s3nh/DialoGPT-small-buzz-toy-story
+puj0/DialoGPT-small-joshua
+julianvd49/DialoGPT-medium-EllieBot
+Sreyas/DialoGPT-small-elit
+DiscordRequestsAPI/DialoGPT-medium-NurDeeps
+MarinHinawa/DialoGPT-medium-Ene
+polandball/polanball
+whoami24142/DialoGPT-small-padilha
+DiscordRequestsAPI/NurDeeps-Bot
+Vaibhav-rm/GPT2-Shri-v1
+chrisrowles/DialoGPT-small-chrisrowles
+espeon98/DialoGPT-kenny-bot
+espeon98/DialoGPT-kenny-bot-2
+polandball/GPT-Polen
+chrisrowles/DialoGPT-medium-chrisrowles
+DiscordRequestsAPI/NurDeeps-Bot-2
+steerevo88/DialoGPT-small-baiken
+akiFQC/japanese-dialogpt-small-aozora
+Ngao/DialoGPT-small-ngao
+Mineroero/DialoGPT-medium-M4SOPMOD
+simple2312/DialoGPT-nayeon
+nemowet88/DialoGPT-small-ricktest
+Abraxas3d/house
+vampiregirl/DialoGPT-medium-lennoxram
+aisingapore/coherence-momentum
+simple2312/DialoGPT-Ellie
+simple2312/DialoGPT-Twice
+testaws/DialoGPT-small-joshua
+nemowet88/output-pythia-test
+Gurtej/Drbot12
+Gurtej/Drbot13
+Gurtej/Drbot14
+Gurtej/Drbot16
+EZSNoVa/DialogGPT-medium-NoVa
+mattallio/Archivist-medium-dialoGPT
+rlatt/DialoGPT-small-RickSanchez
+Lyforth/DialoGPT-Medium-Maribelle
+kittenwhiperer/Deadpool
+KumquatJoe/DialoGPT-medium-MaleToucherBot
+lmkhoa/GODEL_base_model
+JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023
+LrxLcs/DialogGPT2-SMAL
+Delcos/internal_chat_model_e2
+euvu/DialoGPT-small-harrypotter
+LrxLcs/GPT2-V2
+LrxLcs/GPT2-Test
+euvu/euvu-rickbot
+Weeeeeeeeeeeee00/DialoGPT-small-harrypotter
+slyslasher24/DialoGPT-Medium-Pondweed
+slyslasher24/DialoGPT-Small-Pondweed
+bradydawg/AI-Bot2
+aisingapore/rumour-detection-twitter
+RatInChat/Pilup7575
+rlatt/DialoGPT-large-RickSanchez
+Kira225784/Klarabot-test
+bigbossa/DialoGPT-small-aikogirl
+sckova/DialoGPT-small-joshua
+sckova/DialoGPT-medium-joshua
+sckova/DialoGPT-medium
+Beltenebros/DialoGPT-small-PerionOfGaul
+Byteno/DialoGPT-medium-glamrockfreddy
+audreycl/audreycl-testagain
+aisingapore/Lif3WayAp
+audreycl/DialoGPT-RoyalPurpleFish
+audreycl/DialoGPT-RPF
+Axelajs26/DialoGPT-small-alicetendou
+Noohance/DialoGPT-medium-noohbot
+Draptor/DialoGPT-small-coolco
+David042/DialoGPT-LucasBot
+Hobospider132/DialoGPT-Mahiru-Proto
+Draptor/DialoGPT-medium-moto
+aisingapore/SPANBert
+JYBX/DialoGPT-small-Penny
+JYBX/DialoGPT-small-Pennybot
+aisingapore/RoBERTa-base
+JYBX/DialoGPT-small-Amybot
+LuckyBor11/Figure
+FlyingGrayson0304/Gandalf-stupid-version
+BlinksFly/Harry_Potter-Ai
+PhilipN/DialoGPT-small-KeqingBot
+YTTD/DialoGPT-medium-sou
+PhilipN/DialoGPT-large-KeqingBot
+YTTD/DialoGPT-medium-souv2
+keonju/chat_bot
+MysteriousAmazon/DialoGPT-medium-alastor
+mICHPl/MINI_AI
+rlatt/DialoGPT-large-King-James-Bible-test
+v3nom1704/DialoGPT-small-potterbot
+Techcs002/DialoGPT-medium-AboTalkTest
+MysteriousAmazon/DialoGPT-medium-freddy
+ICAMPB204/DialoGPT-small-HarryPotter
+kelvinhang/DialoGPT-medium-badguy
+tatsumis6/MonikaAI
+kennethhendricks/DialoGPT-medium-PowPowGaming-Gen1
+rlatt/DialoGPT-large-King-James-Bible-test-accurate
+kennethhendricks/DialoGPT-medium-PowPowGaming
+kelvinhang/DialoGPT-medium-badguy2
+zami0011/qqpbksdj
+vladiyudi/Morty-data
+RazaK18/DialoGPT-small-harrypotter
+comradesocrates/DialoGPT-large-io
+kelvinhang/DialoGPT-medium-okakoro
+Monchic/chatwithkani
+zami0011/rickdick
+CallMeJeremy/DialoGPT-medium-THREEPIO
+Leomas/DialoGPT-medium-Leomas
+RehanP123/DialoGPT-large-kermit
+shahules786/Safetybot-T5-base
+huolongguo10/CDial-GPT2-LCCC-Base-copy
+yashR4J/TyrionBOT
+TakoIsATaco/DialoGPT-small-ShinAI
+MrLamBam/DialoGPT-medium-LUKEBot
+Zeda/DialoGPT-Medium-ZedaBot
+princedream/DialoGPT-small-harrypotter
+shahules786/Safetybot-mt5-base
+xiaomengdotcom/Chatgpt-harryP
+ProtonPLUS/Colab
+YTTD/DialoGPT-medium-saf
+jasondubon/HubermanGPT-small-v1
+YTTD/DialoGPT-medium-safv2
+YTTD/DialoGPT-medium-safv3
+kennethhendricks/DialoGPT-medium-jared-hendricks-gen1
+Cohee/pygmalion-6b-pyggyback-v6_40_v8p4_60
+DiogenesGois/DialoGPT-medium-Rick
+LordDanielDE/DialoGPT-medium-Hina
+ITG/DialoGPT-medium-spanish-chitchat
+kemsa51/DialoGPT-medium-cartman
+Mogwhy/DialoGPT-medium-Arrobot
+nRuaif/Pyg6B-V8P2
+Seer-luma/DialoGPT-small-SeerBot
+Dinoloverwii/DialoGPT-Sachibot
+flayeddie/Mike
+wooldover/krautbot
+kielljoy/DialoGPT-small-k
+WAHCLAN/DialoGPT-Medium-DAN
+ss1612/loki-chat
+IceBruhOne/mytestcharacter
+wooldover/pygbot
+IceBruhOne/DialoGPT-medium-subjectai
+YukioKoito/DialoGPT-small-ozua
+gaytrimoh/DialoGPT-small-harrypotter
+YukioKoito/DialoGPT-small-doog
+IceBruhOne/DialoGPT-medium-subjectai2
+custads23/DialoGPT-medium-aubrey
+HaHaMagpie/DialoGPT-small-phineas
+Carslo45/DialoGPT-medium-ddlc-monika
+zl111/ChatDoctor
+MarinHinawa/DialoGPT-medium-haruka
+custads23/DialoGPT-medium-basil
+IceBruhOne/DialoGPT-medium-complexai
+MarinHinawa/DialoGPT-medium-Shintaro
+jlsalty9999/DialoGPT-medium-Riddle
+custads23/DialoGPT-medium-mincy
+Wtfsquad/DialoGPT-small-pulpfictionVincent
+ss1612/erika-chatv4
+WAHCLAN/DialoGPT-Large-DAN
+Speedemon/jake-peralta-ai
+Speedemon/cobalt
+DeliveryBoy/DiabloGPT-medium-Kurisu
+AbbyRhea/DialoGPT-small-adrienbot
+monish162/kirthin-waifuu
+janna42/DialoGPT-small-phoenix
+AbbyRhea/DialoGPT-medium-AA
+FrozenSmoothie/DialoGPT-medium-star
+Fizi12341/astro_bot1234
+stiGGy/DialoGPT-medium-raymond
+patthebaker45/DialoGPT-small-Carlbot
+r4k4n1/DialoGPT-small-joshua
+Sukul/DialoGPT-small-Harsabot
+Sukul/DialoGPT-small-Harsabot1
+hihihotdog/DialoGPT-bot
+LarsJonasson/pythia-1.4b-deduped-sft-swedish
+mayaeary/pygmalion-6b-4bit-128g
+mayaeary/pygmalion-6b_dev-4bit-128g
+Inhaexpress/DialoGPT-medium-paimon
+sanyasna517/DialoGPT-medium-Zhongli
+StephenBrink/DialoGPT-small-will
+StanleyRoberts/Nix
+boudchicha/soluzione
+mayaeary/PPO_Pygway-V8p4_Dev-6b-4bit-128g
+ToborWinner/DialoGPT-medium-jolly
+mayaeary/PPO_Pygway-6b-Mix-4bit-128g
+ayushutkarsh/t3
+Inhaexpress/DialoGPT-medium-paimon2
+eepyblanky/DialoGPT-medium-malina
+eachadea/legacy-ggml-vicuna-13b-4bit
+eachadea/ggml-gpt4-x-alpaca-13b-native-4bit
+totallynotbrent/brotGPT
+Inhaexpress/DialoGPT-medium-harry_potter_ps
+robintan66/DialoGPT-small-harrypotter
+MajorCrayon7047/MadboneAssistantGPT-2
+VennuT/DialoGPT-medium-Alphinaud
+triple777/annicebot
+totallynotbrent/aaronGPTalpha
+Plaaasma/gerald-model
+yashugupta786/bart_large_xsum_samsum_conv_summarizer
+eachadea/legacy-ggml-vicuna-7b-4bit
+ColtonAi/Llmtrain
+ColtonAi/Chem4
+IchtacaKemeRaz/favabean
+Stromello/DialoGPT-medium-ZeroTwo
+totallynotbrent/brotGPTplus
+storminstakk/Stormin-Stakk
+ToddGoldfarb/Cadet-Tiny
+aghelan3/eggIncubationRepo
+hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_1024-1b7
+JosephusCheung/Guanaco
+raymondho/DialoGPT-small-harry
+Capitalist/DialoGPT-small-rick
+gfgddfg/DialoGPT-small-qiu_chat
+eachadea/ggml-toolpaca-13b-4bit
+CNR223/DialoGPT-small-MasterO
+Abigaming75/Bot_wa
+pranitrai07/DialoGPT-medium-harrypotter
+IlyaGusev/saiga_7b_lora
+Ancestral/Dolly_Shygmalion-6b-4bit-128g
+Ancestral/PPO_Shygmalion-6b-4bit-128g
+wyskiski/winonabot
+hcpwr/DialoGPT-medium-samantha
+Roguwan/DialoGPT-medium-rogu
+totallynotbrent/aaronGPTplus
+Ancestral/Dolly_Malion-6b-4bit-128g
+vantozdad/DialoGPT-medium-Dumbledore
+Abyss-fyf/DialoGPT-small-discord
+CrystalzAura/DialoGPT-small-elysia
+eachadea/ggml-gpt4all-7b-4bit
+inu-ai/alpaca-guanaco-japanese-gpt-1b
+Husnul/pepper-bot-morty
+TheBloke/vicuna-13B-1.1-GPTQ
+CRD716/ggml-vicuna-1.1-quantized
+4bit/pygmalion-6b-4bit-128g
+Reaver1092/DialoGPT-small-bones
+Ibnelaiq/Makise-Amadeus-Kurisu-small
+inu-ai/dolly-japanese-gpt-1b
+clawrex/DialoGPT-medium-walt
+IlyaGusev/saiga_13b_lora
+Zeda/DialoGPT-Large-ZedaBot
+Ibnelaiq/Makise-Amadeus-Kurisu
+Jaxon/DialoGPT-medium-kirito
+glitchie/bb
+Aqua002/DialoGPT-small-deadpool
+Aqua002/discord-chatbot
+lemoniada/Przembot
+Avitas8485/Dialogpt-small-v1
+Jprafol/DialoGPT-large-ARCHIBot
+Jprafol/DialoGPT-large-ARCHIBotV2
+spitfire4794/ben-ultra
+IlyaGusev/saiga_30b_lora
+NbAiLab/nb-gpt-j-6B-norpaca
+winglian/vicuna-self-reflect-13b
+0x044/test-1
+0x044/dgpt
+ss1612/erika-chatv6
+TestingCoder463632/DialoGPT-small-palpatine
+Blizzchor/DialoGPT-medium-BarryB
+sasha0552/pygmalion-6b-f16-ggml
+kavindu999/BetterEnglishGPT-v1
+kavindu999/BetterEnglishGPT-v2
+EnterNameBros/DialoGPT-small-FoxySan
+OrientalDude/DialoGPT-medium-GOKU
+Avitas8485/Dialogpt-medium-v1
+finex/pfe-mohamed-Harry
+Avitas8485/Dialogpt-medium-finetuned
+psyamk/DialoGPT-small-harrypotter
+Jamesonn/DialoGPT-small-jumin
+CNXT/CNXT
+Ilangraterol/Dataset_model
+IlyaGusev/saiga_30b_ggml
+Locutusque/gpt2-conversational-or-qa
+TrippingFollowing39/AMOGUS
+moomoomer/DialoGPT-medium-garfield
+PygmalionAI/pygmalion-7b
+Viperxyz/DialoGPT-small-Cartman
+Neko-Institute-of-Science/pygmalion-7b
+TehVenom/Pygmalion-7b-Merged-Safetensors
+BiaDd/DialoGPT-medium-Punko
+NewBreaker/chatglm-6b-int4
+TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors
+TehVenom/Pygmalion-7b-4bit-Q4_1-GGML
+userzyzz/piggySharded
+steinhaug/models-bck
+blueberrycheesecake/DialoGPT-small-misssophie
+Imablank/P1GM4L10N-7B-MERGED_WEIGHTS
+MrToast/idk
+SouroJ/DialoGPT-medium-Mordecai
+sasha0552/pygmalion-7b-bf16
+swajan/DialoGPT-small-Trail-1
+RobiKenobi/DialoGPT-medium-pete
+sasha0552/pygmalion-7b-f16-ggml
+sasha0552/pygmalion-7b-f16
+winglian/llama-adapter-13b
+MatLumber/Bisho
+iconical/MortyChatbotAI
+swajan/Trail-1
+swajan/Trail-2
+Misfit2/DialoGPT-large-Sonic
+ToddGoldfarb/Cadet-Medium
+ajpieroni/DiabloGPT-medium-medea
+AliiaR/DialoGPT-medium-empathetic-dialogues
+Chun121/ChocolaChat
+lemoniada/kicerobot
+Kazeyami-o7/DialoGPT-medium-beterbiffin
+Elucia/Diluc_Bot
+Elucia/Diluc_Bot_1.1
+Elucia/Diluc_Bot_1.2
+neurofumo/DialoGPT-small-joshua
+Elucia/Diluc_Bot_1.3
+GraphicStylz/Stylz
+naybiblu/ChizuruBot
+calvindoingstuff/DialoGPT-medium-luffy
+xZephy/DialoGPT-small-HelperBot
+crazywombat/DialoGPT-small-abandonware
+anshengli2/DialoGPT-small-counter-hate
+sephwalker3/piggy-7b
+apricxty/DialoGPT-small-chatbot
+leadmaister/langchain-prompt-master
+Covriar/DialoGPT-med-kiryu
+yesuns/DialoGPT-small-yesun
+davidviriato/DialoGPT-small-joshua
+VMware/open-llama-0.3T-7B-open-instruct-v1.1
+prabhguron/DialoGPT-small-harrypotter
+xHexyy/small-test
+malteos/bloom-6b4-clp-german-oasst-v0.1
+Pcik/DialoGPT-medium-Ruby
+sasha0552/pygmalion-7b-q4_0-ggml
+sasha0552/pygmalion-7b-q4_1-ggml
+sasha0552/pygmalion-7b-q5_0-ggml
+sasha0552/pygmalion-7b-q5_1-ggml
+sasha0552/pygmalion-7b-q8_0-ggml
+rjorg543/DialoGPT-small-ben
+eachadea/ggml-gpt4-x-vicuna-13b
+Tlethal/DialoGPT-small-harrypotter
+xHexyy/test2
+xHexyy/test3
+ldilov/stablelm-tuned-alpha-7b-4bit-128g-descact-sym-true-sequential
+AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token
+jun-ai/BeethovenBot
+channashi/DialoGPT-small-rocket
+biscuitbutb/biscuitbot-dialogpt-model
+ytrbqrkflbvbhy/DialoGPT-small-me-rus
+Pruz0/VescGPT
+IlyaGusev/saiga_7b_ggml
+IlyaGusev/saiga_13b_ggml
+TechTay/DialoGPT-small-Luciano
+BlackBull/yeet
+WAHCLAN/DialoGPT-Medium-SAM
+MistyIce/dialog-gpt-Heshan
+Pruz0/LennGPT
+Wanfq/MAKER-mwoz-full-kb-t5-base
+Wanfq/MAKER-mwoz-full-kb-t5-large
+Wanfq/MAKER-smd-condensed-kb-t5-base
+Wanfq/MAKER-smd-condensed-kb-t5-large
+Wanfq/MAKER-camrest-condensed-kb-t5-base
+Wanfq/MAKER-camrest-condensed-kb-t5-large
+Wanfq/MAKER-camrest-full-kb-t5-base
+Wanfq/MAKER-camrest-full-kb-t5-large
+Wanfq/MAKER-mwoz-condensed-kb-t5-base
+Wanfq/MAKER-mwoz-condensed-kb-t5-large
+raphaman/test
+Pruz0/HaLLGPT
+Binaryy/blender-bot-distill-finetuned
+alex297/DialoGPT-small-sparky
+Pruz0/GeoGPT
+Pruz0/PruzGPT
+dorkai/pygmalion-2.7b
+ikocx-to24/DialoGPT-medium-plankton
+th3d4nk/llamaModel1
+PygmalionAI/pygmalion-13b
+TehVenom/Pygmalion-13b-Merged
+ivaan01/TFG-Mauri
+alex297/DialoGPT-medium-fox
+Crataco/Pygmalion-1.3B-GGML
+SaintMcMuffins/DialoGPT-small-brain2.0
+dujade18/DialoGPT-medium-dwightoffice
+TehVenom/Pygmalion-13b-8bit-GPTQ
+helloerikaaa/chandlerGPT
+SaintMcMuffins/Brain2.1
+kb2c37g/DialoGPT-small-Rick
+alex297/DialoGPT-small-fox
+TeraSpace/dialofrednocontext
+EnterNameBros/DialoGPT-small-Senko
+EnterNameBros/DialoGPT-small-Senko-san
+4bit/pyg-7b
+EnterNameBros/DialoGPT-small-Senko-san-ver
+Lumiras/rachbot
+kevintest1234/DialoGPT-small-harrypotter
+EnterNameBros/DialoGPT-small-Senko-san-ver-2
+EnterNameBros/DialoGPT-large-Senko-san-ver-2
+Delmarfish/Delmar
+diankymar/kitty
+TatonkaHF/ruDialoGpt3-medium-finetuned-russian-joke
+EggsInAJar/DialoGPT-small-MerrickBot
+DBoi/Mayreel2
+hosst/FridgeLLM
+loitran/DialoGPT-medium-peppapig
+Syamil/DialoGPT-small-pixal
+Avitas8485/Dialogpt-medium-v2
+Inhaexpress/DialoGPT-medium-harrypotter
+loitran/DialoGPT-medium-HarryPotter
+Syamil/DialoGPT-medium-pixal
+roykim/ko_chat
+Syamil/DialoGPT-medium-pixals
+minhcrafters/DialoGPT-small-Fukuya
+Warren00/DialoGPT-Med-peppa05a
+Syamil/DialoGPT-medium-pixalbot
+LelouchH/DiabloGPT-small-RaidenBot
+Inhaexpress/DialoGPT-medium-shrek124
+Inhaexpress/DialoGPT-medium-terra1
+nascar123/Discordtester000
+EnterNameBros/Offical-Senko-medium-update
+EnterNameBros/Offical-Senko-medium-update-2
+EnterNameBros/Offical-Senko-medium-update-3
+EnterNameBros/Senko-medium
+jiezhou1996/test
+ElMater06/SpaceCore
+EnterNameBros/Offical-Senko-medium
+EnterNameBros/Senko-san
+DBoi/Mayreel
+VMware/open-llama-0.7T-7B-open-instruct-v1.1
+Warren00/DialoGPT-Small-Peppa06_053123
+mpalacio/DialoGPT_ootwl
+protag07/DialoGPT-small-harrypotter
+h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2
+cosimoiaia/Loquace-70m
+cosimoiaia/Loquace-410m
+MareNoceda/DialoGPT-medium-Luz
+GarrisonBot/DialoGPT-medium-herbertgarrison
+cosimoiaia/Loquace-12B
+cosimoiaia/Loquace-7B
+Deojoandco/ahGPT-small-v1
+PeachHeles/bmo
+Rays236/DialoGPT-small-harrypotter
+Deojoandco/ahGPT-small-v2
+Syamil/DialoGPT-medium-newpixal
+Coderhuynin/DialoGPT-large-TonyStark
+SotirisLegkas/final_socratic_dialoGPT
+ademfatnassi/bonjourGPT-small
+ikocx-to24/DialoGPT-small-planktongpt2
+EricYou/RickBot
+Ayaakaa/DialoGPT-small-Yoisaki-Kanade
+DoesNoPro/DialoGPT-small-RaidenG
+rajeshbot/DialoGPT-medium-Harry-to-Hari
+DoesNoPro/DialoGPT-small-RaidenG2
+SamsonP/pygmalion-6b-sft
+Deojoandco/ahDialoGPT-small-v4
+Syamil/GPTNeo-PIXAL-Model
+Syamil/GPTNeo-PIXAL-new
+Lattori/DiabloGPT-small-ConanBot
+Badzee/DialoGPT-medium-jackbot
+meowsynth/DialoGPT-small-sophie
+EnterNameBros/Senko-san-medium-baby
+Deojoandco/ah-GPT2-v4
+cosimoiaia/Loquace-20B
+EnterNameBros/Senko-san-medium-fox
+MarkyMarx/DialoGPT-medium-jimmybot2
+DhruvShek/DialoGPT
+Doge22/DialoGPT-medium-max
+lyogavin/Anima33B
+steerevo88/testThotBot
+steerevo88/workingthotBot
+YTTD/DialoGPT-medium-keiji
+MisguidedKerbal/DialoGPT-medium-kerbal
+Blueify/DialoGPT-small-model-lotr
+steerevo88/newthotBot
+paripi/Malishka
+finex/pfe-mohamed2023-RON
+DhruvShek/CMDGPT
+finex/pfe-mohamed2023-Hermione
+SkylerBlu9/DialoGPT-medium-CitrAI
+SkylerBlu9/DialoGPT-medium-autismobot
+MisguidedKerbal/DialoGPT-kerbalV2
+EnterNameBros/Senko-san-medium-a
+dderr/testmodel
+priyanshdahiya/DialoGPT-small-rick
+Goodnoway/DialoGPT-nerbalV2
+WompWomp1/DialoGPT-medium-Kirin
+lyogavin/Anima33B-merged
+peytonai/DialoGPT-small-wali-joshua
+MisguidedKerbal/DialoGPT-kerbalV3
+WompWomp1/DialoGPT-medium-Kaori
+OmarDiab/DialoGPT-small-Amogus
+servetier/DialoGPT-large-miguel
+OmarDiab/DialoGPT-small-Amogus-2
+steveglover/falcon-7b-instruct-telco-chat
+Lazycuber/Janemalion-6B
+Goodnoway/DialoGPT-nerbalV4
+gvij/gpt-j-6B-alpaca-gpt4
+papahawk/keya-560m
+JavRedstone/DialoGPT-small-tesseractist
+imuncomfortable/DiabloGPT-small-CocoAtarashi
+Amod/falcon7b-fine-tuned-therapy-merged
+Oshirigami1980/DialoGPT-medium-Steven
+Drevanil/DialoGPT-small-try
+Yaewe/1
+DataHammer/mozi_emotional_7b
+udxyz/HarryPotterBot
+Kasyapa/DialoGPT-medium-hagridbot
+lyogavin/Anima33B-DPO-Belle-1k
+JeanL-0/TestingModel-01
+TejasC2/DialoGPT-TejasBot
+lyogavin/Anima33B-DPO-Belle-1k-merged
+InterruptAI/Interrupt-350M
+Lucideds/Lucideds
+EnterNameBros/Senko-san-medium-sc
+EnterNameBros/Senko-san-medium-scl
+DaddySen/tighnari
+ettevyemerald/DialoGPT-medium-beomgyu
+minhcrafters/DialoGPT-small-mindwandering
+JNDankwah/DialoGPT-small-ThorCB
+minhcrafters/DialoGPT-medium-Zephirel
+papahawk/falcon-40b
+sonntt/DialoGPT-small-mindwandering
+pundapog/DialoGPT-medium-ethanbot
+TheBloke/Pygmalion-7B-SuperHOT-8K-GGML
+TheBloke/Pygmalion-7B-SuperHOT-8K-fp16
+pobierz69/model-6b-read-desc
+sidca/Cam
+EnterNameBros/Senko-san-medium-abc
+abhi-8/DialoGPT-medium-Michael
+abhi-8/DialoGPT-medium-Rick
+abhi-8/DialoGPT-medium-Joshua-twevy
+spitfire4794/dialogpt-small-rick
+abhi-8/Joshua-bot
+Justus-Jonas/Imaginary-Embeddings-Classic
+Justus-Jonas/Imaginary-Embeddings-SpeakerTokens
+Justus-Jonas/Imaginary-Embeddings-SpeakerTokens-STP
+spitfire4794/dialogpt-small-morty
+Kauru/DialoGPT-medium-Ranni
+crazydamns/DialoGPT-Johnny2
+jpandeinge/DialoGPT-medium-Oshiwambo-Bot
+custads23/pygmalion-1.3b
+HatCha01/DialoGPT-small-Batman
+crazydamns/DialoGPT-Johnny3
+assembleteams/curiouspi
+Kauru/DialoGPT-medium-Ranniv2
+SatwikShrivastava/narutoAI-chatbot
+digitalmax1/max
+adr2432/small-Joshua-Bot
+ObsessedCitrus/DialoGPT-small-PeterBot_ChatBot
+suarkadipa/HubermanGPT-small-v1
+suarkadipa/HarryPotterGPT-small-v1
+wevie1978/DialoGPT-medium-Kebb
+kopeqwerty/DialoGPT-medium-idotbot
+zelalt/Chatbot_T5-Prmtrs
+jarvissss/DialoGPT-medium-idotbot
+Magmadue/DiabloGPT-small-ei
+nicbull/DialoGPT-small-cryptonic
+nicbull/DialoGPT-small-cryptonic2
+chloe0x0/DialoGPT-small-Muty
+chloe0x0/mutyGPT
+alexwang05/DialoGPT-small-soph
+BHAndersonJr/DialoGPT-small-fry
+timothykim04/DialoGPT-medium-timothykim
+timothykim04/DialoGPT-medium-harrypotter
+Luca999/Limitlessai99
+Madgimmy/DiabloGPT-small-Madgimmy
+chloe0x0/mutyGPT-v2
+nuggster/DialoGPT-small-ianbot
+we1kkk/llama2-hf-qlora-oasst1
+IlyaGusev/saiga2_7b_lora
+IlyaGusev/gigasaiga_lora
+jliu03/JustinBot
+heliosbrahma/falcon-7b-finetuned-mental-health-conversational
+drunknmonk/GPT-Chandler
+jun-ai/llama2-qlora-finetunined-french
+WompWomp1/DialoGPT-large-Kirin
+WompWomp1/DialoGPT-large-Kirin-2
+WompWomp1/DialoGPT-large-Rin
+or4cl3ai/Aiden_t5
+jstawski/Llama-2-13b-hf-finetuned-SNG
+Gelmo/Halouf
+IlyaGusev/saiga2_13b_lora
+sophji/DialoGPT-small-GodlyLJ
+ATrapenard/Discord-Impersonation-Bot
+hiamitabha/llama2forbittlerobot
+IlyaGusev/saiga2_7b_gguf
+IlyaGusev/saiga2_13b_gguf
+TejasC2/DialoGPT-TejasBot2
+CNR223/DialoGPT-medium-MalcolmReynold
+minh-hahaha/DialoGPT-small-harrypotter
+phucnq1591999/SolanaChatBot
+marclove/llama-2-7b-chat-functions
+Sheerapi/test
+YukioKoito/DialoGPT-small-chibi
+YukioKoito/DialoGPT-small-twilight
+amzrana/lora
+ierhon/basic-chatbot
+Pula23/Hggjg
+Focs/DialoGPT-medium-tony-stark
+Kenobiwan/DialoGPT-small-AizakkuBot2
+drado/DialoGPT-small-joshua
+rah-1/Rahulio
+tanishqvashisht/DialoGPT-small-Joshua
+Kenobiwan/DialoGPT-small-AizakkuBot3
+Ridloo/DialogGPT-small-harrypotter
+dyuhong80/DialoGPT-large-ModerateEffortBombGPT
+ai-forever/paper_persi_chat
+paralleldynamix/paralleldynamix-model101
+kelSidenna/SoftwareRequirements-T5-Base
+renahime/DialoGPT-medium-umineko
+Shaun1204/RedGPT-Gormlee
+diwas7777/HarryBot
+heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational
+kelSidenna/SoftwareReq-DialoGPT-medium
+shanover/medbot-conv
+J-Wiggler/DialoGPT-medium-Stanley
+gearski/DialoGPT-small-itskleb
+wozniakclub/llama-2-7b-medtext-llama2
+gearski/DialoGPT-medium-itskleb
+rebornrulz/Rulz-AI
+Quantsr/DialogGPT-small-Aeris
+ostorc/rick-sanchez-chatbot
+nicbull/DialoGPT-medium-nic
+nicbull/DialoGPT-medium-nic2
+gorkemgoknar/llama2-7f-moviechatbot-ggml-q4
+aka-nikko/ainz-ooal-gown
+llSourcell/medllama2_7b
+xtuner/Llama-2-7b-qlora-moss-003-sft
+xtuner/Llama-2-7b-qlora-arxiv-gentitle
+xtuner/internlm-7b-qlora-arxiv-gentitle
+xtuner/internlm-7b-qlora-alpaca-enzh
+xtuner/Baichuan-7B-qlora-arxiv-gentitle
+xtuner/Baichuan-7B-qlora-alpaca-enzh
+nicbull/DialoGPT-medium-leric
+Ian-14/llm13
+theastro/starkbot
+yupimrandy/DialoGPT-medium-butcher
+hclaim/clamgptattempt4
+yupimrandy/DialoGPT-medium-hughie
+nekohacker591/google1
+zhmx31/Mychatbot
+sk8ingcat/DialoGPT-small-TonyStark
+SanchoJR/meX
+xtuner/Qwen-7B-qlora-moss-003-sft
+xtuner/Qwen-7B-qlora-arxiv-gentitle
+xtuner/Qwen-7B-qlora-alpaca-enzh
+xtuner/Qwen-7B-qlora-oasst1
+xtuner/Baichuan-7B-qlora-oasst1
+xtuner/internlm-7b-qlora-oasst1
+4bit/medllama2_7b
+JGKD/JangoGPTv1.0
+kwankwan1000/DialoGPT-small-peppa
+JGKD/JangoGPTv1.5
+SoniR/config
+mjyh/falcon-7b-qlora-sclue-20230601-04-merged
+sadzip/SiberianPersona-ruGPT-3.5-qlora
+Wolffire88/DialoGPT-medium-Android16
+nolly3317/DialoGPT-small-alice
+feelinrealcute/pym-6b
+nixsy/AvasLove
+feelinrealcute/pym-13b7
+AleksiDu/HarryPotterBot
+Belcebuzzz/DialoGPT-small-TomoGF
+xtuner/internlm-7b-qlora-lawyer
+xtuner/internlm-7b-qlora-colorist
+xtuner/internlm-7b-qlora-coder
+xtuner/internlm-7b-qlora-open-platypus
+xtuner/internlm-7b-qlora-sql
+inception-mbzuai/jais-13b-chat
+Fredithefish/Guanaco-3B-Uncensored
+garrachonr/LlamaDos
+literallywood/DialoGPT-small-ekansh
+IALABS/Arturosfastfood
+javieitor/DialoGPT-medium-Rick
+Kuduxaaa/ava-small
+Al-Hathboor-Bikal-ai-2023/SRTIP-GPT-F7B-base
+L-R/LLmRa-355M
+Fredithefish/Guanaco-3B-Uncensored-v2
+xtuner/Llama-2-7b-qlora-colorist
+KE-AI/basicchatbot-kel
+josepholiver/TEST_MODEL_1
+PlaceReporter99/Utility_Bot_Chat
+J-Wiggler2/Caesar
+J-Wiggler2/Caesar2
+matvalan/vittae-cot
+Dawnstarhunter/DialoGPT-medium-Eveline
+sahilxyd/DialoGPT-small-joshua
+EnterNameBros/Senko-san-medium-abcd
+6adityaverma/DialoGPT-large-Walter
+6adityaverma/DialoGPT-large-Rick
+IlyaGusev/saiga2_70b_lora
+AyushK0808/StarWarsBot
+EnterNameBros/Senko-ai-medium
+Fredithefish/Guanaco-7B-Uncensored
+IlyaGusev/saiga2_70b_gguf
+glassofwine/DialoGPT-medium-johanwine
+zattio770/120-Days-of-LORA-v2-13B
+cannice/blenderbot-400M-distill-empathetic
+Likelihood94/Jackoftrades
+Hapski/DialoGPT-small-nene
+Fredithefish/Guanaco-13B-Uncensored
+kitbear444/DialoGPT-medium-kit
+SonnyAu/DialoGPT-dumbledore
+TheBloke/Guanaco-7B-Uncensored-GGUF
+TheBloke/Guanaco-13B-Uncensored-GGUF
+TheBloke/Guanaco-7B-Uncensored-GPTQ
+TheBloke/Guanaco-13B-Uncensored-GPTQ
+TheBloke/Guanaco-3B-Uncensored-v2-GPTQ
+TheBloke/Guanaco-3B-Uncensored-v2-GGML
+Codexister/DialoGPT-medium-KafkaBotV1
+mfodwo/STUGPT-small-v1
+asas-ai/jais-13b-chat-8bit
+SoupChickn/Valeen-DialoGPT
+Codexister/DialoGPT-medium-KafkaBotV2
+KoalaAI/OPT-1.3b-Chat
+Nafaille/nafaille6b
+DiTy/dialogpt
+Severus27/BeingWell_llama2_7b
+rayho/DialoGPT-small-polysoft
+TuningAI/Llama2_13B_startup_Assistant
+dipxsy/testmodel
+dipxsy/Jarvis-small
+Lazycuber/L2-7b-Chat-Guanaco-Uncensored
+dipxsy/jarvis-blend
+TheBloke/Guanaco-13B-Uncensored-AWQ
+TheBloke/Guanaco-7B-Uncensored-AWQ
+wstock04/shiddeatorBotV1
+Boqianshen/llama-2-7b-miniguanaco
+sebastiantrbl/distilgpt2-finetuned-wikitext2
+herzlixh/DialoGPTs_HarryFromHogwarts
+poiccard/jais-13b-chat-adn
+sebastiantrbl/test-DialoGPT-finetune
+uffergist/DialoGPT-small-cummy
+wstock04/shiddeatorBotV3.0
+wstock04/shiddeatorBotDUMB
+Applekinz/John
+Or4cl3/1nsfw
+sebastiantrbl/DialoGPT-finetuned-daily-dialog
+LTC-AI-Labs/L2-7b-Base-WVG-Uncensored
+hussain2030/jais13bchat2
+subabi/DialoGPT-medium-subabicord
+marblyso/DialoGPT-medium-collin
+Crataco/Pygmalion-6B-GGML
+dipxsy/jl
+testerhubhai/krnedo
+IAteSpaghettiForLunch/DialoGPT-medium-GLADoS
+IAteSpaghettiForLunch/GLADoSBOT
+Nikolai5592/DialoGPT-Medium-RickBot
+KuroganeNiello/medium-NebBot

litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

litellm/llms/huggingface_restapi.py ADDED Viewed

	@@ -0,0 +1,750 @@

+## Uses the huggingface text generation inference API
+import os, copy, types
+import json
+from enum import Enum
+import httpx, requests
+from .base import BaseLLM
+import time
+import litellm
+from typing import Callable, Dict, List, Any
+from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
+from typing import Optional
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class HuggingfaceError(Exception):
+    def __init__(
+        self,
+        status_code,
+        message,
+        request: Optional[httpx.Request] = None,
+        response: Optional[httpx.Response] = None,
+    ):
+        self.status_code = status_code
+        self.message = message
+        if request is not None:
+            self.request = request
+        else:
+            self.request = httpx.Request(
+                method="POST", url="https://api-inference.huggingface.co/models"
+            )
+        if response is not None:
+            self.response = response
+        else:
+            self.response = httpx.Response(
+                status_code=status_code, request=self.request
+            )
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class HuggingfaceConfig:
+    """
+    Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate
+    """
+    best_of: Optional[int] = None
+    decoder_input_details: Optional[bool] = None
+    details: Optional[bool] = True  # enables returning logprobs + best of
+    max_new_tokens: Optional[int] = None
+    repetition_penalty: Optional[float] = None
+    return_full_text: Optional[
+        bool
+    ] = False  # by default don't return the input as part of the output
+    seed: Optional[int] = None
+    temperature: Optional[float] = None
+    top_k: Optional[int] = None
+    top_n_tokens: Optional[int] = None
+    top_p: Optional[int] = None
+    truncate: Optional[int] = None
+    typical_p: Optional[float] = None
+    watermark: Optional[bool] = None
+    def __init__(
+        self,
+        best_of: Optional[int] = None,
+        decoder_input_details: Optional[bool] = None,
+        details: Optional[bool] = None,
+        max_new_tokens: Optional[int] = None,
+        repetition_penalty: Optional[float] = None,
+        return_full_text: Optional[bool] = None,
+        seed: Optional[int] = None,
+        temperature: Optional[float] = None,
+        top_k: Optional[int] = None,
+        top_n_tokens: Optional[int] = None,
+        top_p: Optional[int] = None,
+        truncate: Optional[int] = None,
+        typical_p: Optional[float] = None,
+        watermark: Optional[bool] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def output_parser(generated_text: str):
+    """
+    Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens.
+    Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763
+    """
+    chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
+    for token in chat_template_tokens:
+        if generated_text.strip().startswith(token):
+            generated_text = generated_text.replace(token, "", 1)
+        if generated_text.endswith(token):
+            generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1]
+    return generated_text
+tgi_models_cache = None
+conv_models_cache = None
+def read_tgi_conv_models():
+    try:
+        global tgi_models_cache, conv_models_cache
+        # Check if the cache is already populated
+        # so we don't keep on reading txt file if there are 1k requests
+        if (tgi_models_cache is not None) and (conv_models_cache is not None):
+            return tgi_models_cache, conv_models_cache
+        # If not, read the file and populate the cache
+        tgi_models = set()
+        script_directory = os.path.dirname(os.path.abspath(__file__))
+        # Construct the file path relative to the script's directory
+        file_path = os.path.join(
+            script_directory,
+            "huggingface_llms_metadata",
+            "hf_text_generation_models.txt",
+        )
+        with open(file_path, "r") as file:
+            for line in file:
+                tgi_models.add(line.strip())
+        # Cache the set for future use
+        tgi_models_cache = tgi_models
+        # If not, read the file and populate the cache
+        file_path = os.path.join(
+            script_directory,
+            "huggingface_llms_metadata",
+            "hf_conversational_models.txt",
+        )
+        conv_models = set()
+        with open(file_path, "r") as file:
+            for line in file:
+                conv_models.add(line.strip())
+        # Cache the set for future use
+        conv_models_cache = conv_models
+        return tgi_models, conv_models
+    except:
+        return set(), set()
+def get_hf_task_for_model(model):
+    # read text file, cast it to set
+    # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt"
+    tgi_models, conversational_models = read_tgi_conv_models()
+    if model in tgi_models:
+        return "text-generation-inference"
+    elif model in conversational_models:
+        return "conversational"
+    elif "roneneldan/TinyStories" in model:
+        return None
+    else:
+        return "text-generation-inference"  # default to tgi
+class Huggingface(BaseLLM):
+    _client_session: Optional[httpx.Client] = None
+    _aclient_session: Optional[httpx.AsyncClient] = None
+    def __init__(self) -> None:
+        super().__init__()
+    def validate_environment(self, api_key, headers):
+        default_headers = {
+            "content-type": "application/json",
+        }
+        if api_key and headers is None:
+            default_headers[
+                "Authorization"
+            ] = f"Bearer {api_key}"  # Huggingface Inference Endpoint default is to accept bearer tokens
+            headers = default_headers
+        elif headers:
+            headers = headers
+        else:
+            headers = default_headers
+        return headers
+    def convert_to_model_response_object(
+        self,
+        completion_response,
+        model_response,
+        task,
+        optional_params,
+        encoding,
+        input_text,
+        model,
+    ):
+        if task == "conversational":
+            if len(completion_response["generated_text"]) > 0:  # type: ignore
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response[
+                    "generated_text"
+                ]  # type: ignore
+        elif task == "text-generation-inference":
+            if (
+                not isinstance(completion_response, list)
+                or not isinstance(completion_response[0], dict)
+                or "generated_text" not in completion_response[0]
+            ):
+                raise HuggingfaceError(
+                    status_code=422,
+                    message=f"response is not in expected format - {completion_response}",
+                )
+            if len(completion_response[0]["generated_text"]) > 0:
+                model_response["choices"][0]["message"]["content"] = output_parser(
+                    completion_response[0]["generated_text"]
+                )
+            ## GETTING LOGPROBS + FINISH REASON
+            if (
+                "details" in completion_response[0]
+                and "tokens" in completion_response[0]["details"]
+            ):
+                model_response.choices[0].finish_reason = completion_response[0][
+                    "details"
+                ]["finish_reason"]
+                sum_logprob = 0
+                for token in completion_response[0]["details"]["tokens"]:
+                    if token["logprob"] != None:
+                        sum_logprob += token["logprob"]
+                model_response["choices"][0]["message"]._logprob = sum_logprob
+            if "best_of" in optional_params and optional_params["best_of"] > 1:
+                if (
+                    "details" in completion_response[0]
+                    and "best_of_sequences" in completion_response[0]["details"]
+                ):
+                    choices_list = []
+                    for idx, item in enumerate(
+                        completion_response[0]["details"]["best_of_sequences"]
+                    ):
+                        sum_logprob = 0
+                        for token in item["tokens"]:
+                            if token["logprob"] != None:
+                                sum_logprob += token["logprob"]
+                        if len(item["generated_text"]) > 0:
+                            message_obj = Message(
+                                content=output_parser(item["generated_text"]),
+                                logprobs=sum_logprob,
+                            )
+                        else:
+                            message_obj = Message(content=None)
+                        choice_obj = Choices(
+                            finish_reason=item["finish_reason"],
+                            index=idx + 1,
+                            message=message_obj,
+                        )
+                        choices_list.append(choice_obj)
+                    model_response["choices"].extend(choices_list)
+        else:
+            if len(completion_response[0]["generated_text"]) > 0:
+                model_response["choices"][0]["message"]["content"] = output_parser(
+                    completion_response[0]["generated_text"]
+                )
+        ## CALCULATING USAGE
+        prompt_tokens = 0
+        try:
+            prompt_tokens = len(
+                encoding.encode(input_text)
+            )  ##[TODO] use the llama2 tokenizer here
+        except:
+            # this should remain non blocking we should not block a response returning if calculating usage fails
+            pass
+        output_text = model_response["choices"][0]["message"].get("content", "")
+        if output_text is not None and len(output_text) > 0:
+            completion_tokens = 0
+            try:
+                completion_tokens = len(
+                    encoding.encode(
+                        model_response["choices"][0]["message"].get("content", "")
+                    )
+                )  ##[TODO] use the llama2 tokenizer here
+            except:
+                # this should remain non blocking we should not block a response returning if calculating usage fails
+                pass
+        else:
+            completion_tokens = 0
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        model_response._hidden_params["original_response"] = completion_response
+        return model_response
+    def completion(
+        self,
+        model: str,
+        messages: list,
+        api_base: Optional[str],
+        headers: Optional[dict],
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        timeout: float,
+        encoding,
+        api_key,
+        logging_obj,
+        custom_prompt_dict={},
+        acompletion: bool = False,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+    ):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            headers = self.validate_environment(api_key, headers)
+            task = get_hf_task_for_model(model)
+            print_verbose(f"{model}, {task}")
+            completion_url = ""
+            input_text = ""
+            if "https" in model:
+                completion_url = model
+            elif api_base:
+                completion_url = api_base
+            elif "HF_API_BASE" in os.environ:
+                completion_url = os.getenv("HF_API_BASE", "")
+            elif "HUGGINGFACE_API_BASE" in os.environ:
+                completion_url = os.getenv("HUGGINGFACE_API_BASE", "")
+            else:
+                completion_url = f"https://api-inference.huggingface.co/models/{model}"
+            ## Load Config
+            config = litellm.HuggingfaceConfig.get_config()
+            for k, v in config.items():
+                if (
+                    k not in optional_params
+                ):  # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in
+                    optional_params[k] = v
+            ### MAP INPUT PARAMS
+            if task == "conversational":
+                inference_params = copy.deepcopy(optional_params)
+                inference_params.pop("details")
+                inference_params.pop("return_full_text")
+                past_user_inputs = []
+                generated_responses = []
+                text = ""
+                for message in messages:
+                    if message["role"] == "user":
+                        if text != "":
+                            past_user_inputs.append(text)
+                        text = message["content"]
+                    elif message["role"] == "assistant" or message["role"] == "system":
+                        generated_responses.append(message["content"])
+                data = {
+                    "inputs": {
+                        "text": text,
+                        "past_user_inputs": past_user_inputs,
+                        "generated_responses": generated_responses,
+                    },
+                    "parameters": inference_params,
+                }
+                input_text = "".join(message["content"] for message in messages)
+            elif task == "text-generation-inference":
+                # always send "details" and "return_full_text" as params
+                if model in custom_prompt_dict:
+                    # check if the model has a registered custom prompt
+                    model_prompt_details = custom_prompt_dict[model]
+                    prompt = custom_prompt(
+                        role_dict=model_prompt_details.get("roles", None),
+                        initial_prompt_value=model_prompt_details.get(
+                            "initial_prompt_value", ""
+                        ),
+                        final_prompt_value=model_prompt_details.get(
+                            "final_prompt_value", ""
+                        ),
+                        messages=messages,
+                    )
+                else:
+                    prompt = prompt_factory(model=model, messages=messages)
+                data = {
+                    "inputs": prompt,
+                    "parameters": optional_params,
+                    "stream": True
+                    if "stream" in optional_params and optional_params["stream"] == True
+                    else False,
+                }
+                input_text = prompt
+            else:
+                # Non TGI and Conversational llms
+                # We need this branch, it removes 'details' and 'return_full_text' from params
+                if model in custom_prompt_dict:
+                    # check if the model has a registered custom prompt
+                    model_prompt_details = custom_prompt_dict[model]
+                    prompt = custom_prompt(
+                        role_dict=model_prompt_details.get("roles", {}),
+                        initial_prompt_value=model_prompt_details.get(
+                            "initial_prompt_value", ""
+                        ),
+                        final_prompt_value=model_prompt_details.get(
+                            "final_prompt_value", ""
+                        ),
+                        bos_token=model_prompt_details.get("bos_token", ""),
+                        eos_token=model_prompt_details.get("eos_token", ""),
+                        messages=messages,
+                    )
+                else:
+                    prompt = prompt_factory(model=model, messages=messages)
+                inference_params = copy.deepcopy(optional_params)
+                inference_params.pop("details")
+                inference_params.pop("return_full_text")
+                data = {
+                    "inputs": prompt,
+                    "parameters": inference_params,
+                    "stream": True
+                    if "stream" in optional_params and optional_params["stream"] == True
+                    else False,
+                }
+                input_text = prompt
+            ## LOGGING
+            logging_obj.pre_call(
+                input=input_text,
+                api_key=api_key,
+                additional_args={
+                    "complete_input_dict": data,
+                    "task": task,
+                    "headers": headers,
+                    "api_base": completion_url,
+                    "acompletion": acompletion,
+                },
+            )
+            ## COMPLETION CALL
+            if acompletion is True:
+                ### ASYNC STREAMING
+                if optional_params.get("stream", False):
+                    return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model, timeout=timeout)  # type: ignore
+                else:
+                    ### ASYNC COMPLETION
+                    return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, input_text=input_text, model=model, optional_params=optional_params, timeout=timeout)  # type: ignore
+            ### SYNC STREAMING
+            if "stream" in optional_params and optional_params["stream"] == True:
+                response = requests.post(
+                    completion_url,
+                    headers=headers,
+                    data=json.dumps(data),
+                    stream=optional_params["stream"],
+                )
+                return response.iter_lines()
+            ### SYNC COMPLETION
+            else:
+                response = requests.post(
+                    completion_url, headers=headers, data=json.dumps(data)
+                )
+                ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
+                is_streamed = False
+                if (
+                    response.__dict__["headers"].get("Content-Type", "")
+                    == "text/event-stream"
+                ):
+                    is_streamed = True
+                # iterate over the complete streamed response, and return the final answer
+                if is_streamed:
+                    streamed_response = CustomStreamWrapper(
+                        completion_stream=response.iter_lines(),
+                        model=model,
+                        custom_llm_provider="huggingface",
+                        logging_obj=logging_obj,
+                    )
+                    content = ""
+                    for chunk in streamed_response:
+                        content += chunk["choices"][0]["delta"]["content"]
+                    completion_response: List[Dict[str, Any]] = [
+                        {"generated_text": content}
+                    ]
+                    ## LOGGING
+                    logging_obj.post_call(
+                        input=input_text,
+                        api_key=api_key,
+                        original_response=completion_response,
+                        additional_args={"complete_input_dict": data, "task": task},
+                    )
+                else:
+                    ## LOGGING
+                    logging_obj.post_call(
+                        input=input_text,
+                        api_key=api_key,
+                        original_response=response.text,
+                        additional_args={"complete_input_dict": data, "task": task},
+                    )
+                    ## RESPONSE OBJECT
+                    try:
+                        completion_response = response.json()
+                        if isinstance(completion_response, dict):
+                            completion_response = [completion_response]
+                    except:
+                        import traceback
+                        raise HuggingfaceError(
+                            message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}",
+                            status_code=response.status_code,
+                        )
+                print_verbose(f"response: {completion_response}")
+                if (
+                    isinstance(completion_response, dict)
+                    and "error" in completion_response
+                ):
+                    print_verbose(f"completion error: {completion_response['error']}")
+                    print_verbose(f"response.status_code: {response.status_code}")
+                    raise HuggingfaceError(
+                        message=completion_response["error"],
+                        status_code=response.status_code,
+                    )
+                return self.convert_to_model_response_object(
+                    completion_response=completion_response,
+                    model_response=model_response,
+                    task=task,
+                    optional_params=optional_params,
+                    encoding=encoding,
+                    input_text=input_text,
+                    model=model,
+                )
+        except HuggingfaceError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if exception_mapping_worked:
+                raise e
+            else:
+                import traceback
+                raise HuggingfaceError(status_code=500, message=traceback.format_exc())
+    async def acompletion(
+        self,
+        api_base: str,
+        data: dict,
+        headers: dict,
+        model_response: ModelResponse,
+        task: str,
+        encoding: Any,
+        input_text: str,
+        model: str,
+        optional_params: dict,
+        timeout: float
+    ):
+        response = None
+        try:
+            async with httpx.AsyncClient(timeout=timeout) as client:
+                response = await client.post(
+                    url=api_base, json=data, headers=headers
+                )
+                response_json = response.json()
+                if response.status_code != 200:
+                    raise HuggingfaceError(
+                        status_code=response.status_code,
+                        message=response.text,
+                        request=response.request,
+                        response=response,
+                    )
+                ## RESPONSE OBJECT
+                return self.convert_to_model_response_object(
+                    completion_response=response_json,
+                    model_response=model_response,
+                    task=task,
+                    encoding=encoding,
+                    input_text=input_text,
+                    model=model,
+                    optional_params=optional_params,
+                )
+        except Exception as e:
+            if isinstance(e, httpx.TimeoutException):
+                raise HuggingfaceError(status_code=500, message="Request Timeout Error")
+            elif response is not None and hasattr(response, "text"):
+                raise HuggingfaceError(
+                    status_code=500,
+                    message=f"{str(e)}\n\nOriginal Response: {response.text}",
+                )
+            else:
+                raise HuggingfaceError(status_code=500, message=f"{str(e)}")
+    async def async_streaming(
+        self,
+        logging_obj,
+        api_base: str,
+        data: dict,
+        headers: dict,
+        model_response: ModelResponse,
+        model: str,
+        timeout: float
+    ):
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = client.stream(
+                "POST", url=f"{api_base}", json=data, headers=headers
+            )
+            async with response as r:
+                if r.status_code != 200:
+                    raise HuggingfaceError(
+                        status_code=r.status_code,
+                        message="An error occurred while streaming",
+                    )
+                streamwrapper = CustomStreamWrapper(
+                    completion_stream=r.aiter_lines(),
+                    model=model,
+                    custom_llm_provider="huggingface",
+                    logging_obj=logging_obj,
+                )
+                async for transformed_chunk in streamwrapper:
+                    yield transformed_chunk
+    def embedding(
+        self,
+        model: str,
+        input: list,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        logging_obj=None,
+        model_response=None,
+        encoding=None,
+    ):
+        super().embedding()
+        headers = self.validate_environment(api_key, headers=None)
+        # print_verbose(f"{model}, {task}")
+        embed_url = ""
+        if "https" in model:
+            embed_url = model
+        elif api_base:
+            embed_url = api_base
+        elif "HF_API_BASE" in os.environ:
+            embed_url = os.getenv("HF_API_BASE", "")
+        elif "HUGGINGFACE_API_BASE" in os.environ:
+            embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
+        else:
+            embed_url = f"https://api-inference.huggingface.co/models/{model}"
+        if "sentence-transformers" in model:
+            if len(input) == 0:
+                raise HuggingfaceError(
+                    status_code=400,
+                    message="sentence transformers requires 2+ sentences",
+                )
+            data = {
+                "inputs": {
+                    "source_sentence": input[0],
+                    "sentences": [
+                        "That is a happy dog",
+                        "That is a very happy person",
+                        "Today is a sunny day",
+                    ],
+                }
+            }
+        else:
+            data = {"inputs": input}  # type: ignore
+        ## LOGGING
+        logging_obj.pre_call(
+            input=input,
+            api_key=api_key,
+            additional_args={
+                "complete_input_dict": data,
+                "headers": headers,
+                "api_base": embed_url,
+            },
+        )
+        ## COMPLETION CALL
+        response = requests.post(embed_url, headers=headers, data=json.dumps(data))
+        ## LOGGING
+        logging_obj.post_call(
+            input=input,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=response,
+        )
+        embeddings = response.json()
+        if "error" in embeddings:
+            raise HuggingfaceError(status_code=500, message=embeddings["error"])
+        output_data = []
+        if "similarities" in embeddings:
+            for idx, embedding in embeddings["similarities"]:
+                output_data.append(
+                    {
+                        "object": "embedding",
+                        "index": idx,
+                        "embedding": embedding,  # flatten list returned from hf
+                    }
+                )
+        else:
+            for idx, embedding in enumerate(embeddings):
+                if isinstance(embedding, float):
+                    output_data.append(
+                        {
+                            "object": "embedding",
+                            "index": idx,
+                            "embedding": embedding,  # flatten list returned from hf
+                        }
+                    )
+                elif isinstance(embedding, list) and isinstance(embedding[0], float):
+                    output_data.append(
+                        {
+                            "object": "embedding",
+                            "index": idx,
+                            "embedding": embedding,  # flatten list returned from hf
+                        }
+                    )
+                else:
+                    output_data.append(
+                        {
+                            "object": "embedding",
+                            "index": idx,
+                            "embedding": embedding[0][
+                                0
+                            ],  # flatten list returned from hf
+                        }
+                    )
+        model_response["object"] = "list"
+        model_response["data"] = output_data
+        model_response["model"] = model
+        input_tokens = 0
+        for text in input:
+            input_tokens += len(encoding.encode(text))
+        model_response["usage"] = {
+            "prompt_tokens": input_tokens,
+            "total_tokens": input_tokens,
+        }
+        return model_response

litellm/llms/maritalk.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time, traceback
+from typing import Callable, Optional, List
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import litellm
+class MaritalkError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class MaritTalkConfig:
+    """
+    The class `MaritTalkConfig` provides configuration for the MaritTalk's API interface. Here are the parameters:
+    - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default is 1.
+    - `model` (string): The model used for conversation. Default is 'maritalk'.
+    - `do_sample` (boolean): If set to True, the API will generate a response using sampling. Default is True.
+    - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.7.
+    - `top_p` (number): Selection threshold for token inclusion based on cumulative probability. Default is 0.95.
+    - `repetition_penalty` (number): Penalty for repetition in the generated conversation. Default is 1.
+    - `stopping_tokens` (list of string): List of tokens where the conversation can be stopped/stopped.
+    """
+    max_tokens: Optional[int] = None
+    model: Optional[str] = None
+    do_sample: Optional[bool] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    stopping_tokens: Optional[List[str]] = None
+    def __init__(
+        self,
+        max_tokens: Optional[int] = None,
+        model: Optional[str] = None,
+        do_sample: Optional[bool] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        repetition_penalty: Optional[float] = None,
+        stopping_tokens: Optional[List[str]] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Key {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url = api_base
+    model = model
+    ## Load Config
+    config = litellm.MaritTalkConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > maritalk_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "messages": messages,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=messages,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data},
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=messages,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise MaritalkError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                if len(completion_response["answer"]) > 0:
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["answer"]
+            except Exception as e:
+                raise MaritalkError(
+                    message=response.text, status_code=response.status_code
+                )
+        ## CALCULATING USAGE
+        prompt = "".join(m["content"] for m in messages)
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+def embedding(
+    model: str,
+    input: list,
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+):
+    pass

litellm/llms/nlp_cloud.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, Usage
+class NLPCloudError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class NLPCloudConfig:
+    """
+    Reference: https://docs.nlpcloud.com/#generation
+    - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
+    - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
+    - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
+    - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
+    - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
+    - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
+    - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
+    - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+    - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
+    - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
+    - `num_beams` (int): Optional. Number of beams for beam search.
+    - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
+    """
+    max_length: Optional[int] = None
+    length_no_input: Optional[bool] = None
+    end_sequence: Optional[str] = None
+    remove_end_sequence: Optional[bool] = None
+    remove_input: Optional[bool] = None
+    bad_words: Optional[list] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    top_k: Optional[int] = None
+    repetition_penalty: Optional[float] = None
+    num_beams: Optional[int] = None
+    num_return_sequences: Optional[int] = None
+    def __init__(
+        self,
+        max_length: Optional[int] = None,
+        length_no_input: Optional[bool] = None,
+        end_sequence: Optional[str] = None,
+        remove_end_sequence: Optional[bool] = None,
+        remove_input: Optional[bool] = None,
+        bad_words: Optional[list] = None,
+        temperature: Optional[float] = None,
+        top_p: Optional[float] = None,
+        top_k: Optional[int] = None,
+        repetition_penalty: Optional[float] = None,
+        num_beams: Optional[int] = None,
+        num_return_sequences: Optional[int] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Token {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.NLPCloudConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    completion_url_fragment_1 = api_base
+    completion_url_fragment_2 = "/generation"
+    model = model
+    text = " ".join(message["content"] for message in messages)
+    data = {
+        "text": text,
+        **optional_params,
+    }
+    completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
+    ## LOGGING
+    logging_obj.pre_call(
+        input=text,
+        api_key=api_key,
+        additional_args={
+            "complete_input_dict": data,
+            "headers": headers,
+            "api_base": completion_url,
+        },
+    )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url,
+        headers=headers,
+        data=json.dumps(data),
+        stream=optional_params["stream"] if "stream" in optional_params else False,
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return clean_and_iterate_chunks(response)
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+            input=text,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        try:
+            completion_response = response.json()
+        except:
+            raise NLPCloudError(message=response.text, status_code=response.status_code)
+        if "error" in completion_response:
+            raise NLPCloudError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                if len(completion_response["generated_text"]) > 0:
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["generated_text"]
+            except:
+                raise NLPCloudError(
+                    message=json.dumps(completion_response),
+                    status_code=response.status_code,
+                )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = completion_response["nb_input_tokens"]
+        completion_tokens = completion_response["nb_generated_tokens"]
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens,
+        )
+        model_response.usage = usage
+        return model_response
+# def clean_and_iterate_chunks(response):
+#     def process_chunk(chunk):
+#         print(f"received chunk: {chunk}")
+#         cleaned_chunk = chunk.decode("utf-8")
+#         # Perform further processing based on your needs
+#         return cleaned_chunk
+#     for line in response.iter_lines():
+#         if line:
+#             yield process_chunk(line)
+def clean_and_iterate_chunks(response):
+    buffer = b""
+    for chunk in response.iter_content(chunk_size=1024):
+        if not chunk:
+            break
+        buffer += chunk
+        while b"\x00" in buffer:
+            buffer = buffer.replace(b"\x00", b"")
+            yield buffer.decode("utf-8")
+            buffer = b""
+    # No more data expected, yield any remaining data in the buffer
+    if buffer:
+        yield buffer.decode("utf-8")
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/ollama.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import requests, types, time
+import json, uuid
+import traceback
+from typing import Optional
+import litellm
+import httpx, aiohttp, asyncio
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class OllamaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="http://localhost:11434")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class OllamaConfig:
+    """
+    Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
+    The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
+    - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
+    - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
+    - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
+    - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
+    - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
+    - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
+    - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
+    - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
+    - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
+    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
+    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
+    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
+    - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
+    - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
+    - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
+    - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
+    - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
+    """
+    mirostat: Optional[int] = None
+    mirostat_eta: Optional[float] = None
+    mirostat_tau: Optional[float] = None
+    num_ctx: Optional[int] = None
+    num_gqa: Optional[int] = None
+    num_thread: Optional[int] = None
+    repeat_last_n: Optional[int] = None
+    repeat_penalty: Optional[float] = None
+    temperature: Optional[float] = None
+    stop: Optional[
+        list
+    ] = None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
+    tfs_z: Optional[float] = None
+    num_predict: Optional[int] = None
+    top_k: Optional[int] = None
+    top_p: Optional[float] = None
+    system: Optional[str] = None
+    template: Optional[str] = None
+    def __init__(
+        self,
+        mirostat: Optional[int] = None,
+        mirostat_eta: Optional[float] = None,
+        mirostat_tau: Optional[float] = None,
+        num_ctx: Optional[int] = None,
+        num_gqa: Optional[int] = None,
+        num_thread: Optional[int] = None,
+        repeat_last_n: Optional[int] = None,
+        repeat_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        stop: Optional[list] = None,
+        tfs_z: Optional[float] = None,
+        num_predict: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        system: Optional[str] = None,
+        template: Optional[str] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+# ollama implementation
+def get_ollama_response(
+    api_base="http://localhost:11434",
+    model="llama2",
+    prompt="Why is the sky blue?",
+    optional_params=None,
+    logging_obj=None,
+    acompletion: bool = False,
+    model_response=None,
+    encoding=None,
+):
+    if api_base.endswith("/api/generate"):
+        url = api_base
+    else:
+        url = f"{api_base}/api/generate"
+    ## Load Config
+    config = litellm.OllamaConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    optional_params["stream"] = optional_params.get("stream", False)
+    data = {"model": model, "prompt": prompt, **optional_params}
+    ## LOGGING
+    logging_obj.pre_call(
+        input=None,
+        api_key=None,
+        additional_args={
+            "api_base": url,
+            "complete_input_dict": data,
+            "headers": {},
+            "acompletion": acompletion,
+        },
+    )
+    if acompletion is True:
+        if optional_params.get("stream", False) == True:
+            response = ollama_async_streaming(
+                url=url,
+                data=data,
+                model_response=model_response,
+                encoding=encoding,
+                logging_obj=logging_obj,
+            )
+        else:
+            response = ollama_acompletion(
+                url=url,
+                data=data,
+                model_response=model_response,
+                encoding=encoding,
+                logging_obj=logging_obj,
+            )
+        return response
+    elif optional_params.get("stream", False) == True:
+        return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
+    response = requests.post(url=f"{url}", json=data, timeout=litellm.request_timeout)
+    if response.status_code != 200:
+        raise OllamaError(status_code=response.status_code, message=response.text)
+    ## LOGGING
+    logging_obj.post_call(
+        input=prompt,
+        api_key="",
+        original_response=response.text,
+        additional_args={
+            "headers": None,
+            "api_base": api_base,
+        },
+    )
+    response_json = response.json()
+    ## RESPONSE OBJECT
+    model_response["choices"][0]["finish_reason"] = "stop"
+    if optional_params.get("format", "") == "json":
+        message = litellm.Message(
+            content=None,
+            tool_calls=[
+                {
+                    "id": f"call_{str(uuid.uuid4())}",
+                    "function": {"arguments": response_json["response"], "name": ""},
+                    "type": "function",
+                }
+            ],
+        )
+        model_response["choices"][0]["message"] = message
+    else:
+        model_response["choices"][0]["message"]["content"] = response_json["response"]
+    model_response["created"] = int(time.time())
+    model_response["model"] = "ollama/" + model
+    prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt)))  # type: ignore
+    completion_tokens = response_json["eval_count"]
+    model_response["usage"] = litellm.Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+    return model_response
+def ollama_completion_stream(url, data, logging_obj):
+    with httpx.stream(
+        url=url, json=data, method="POST", timeout=litellm.request_timeout
+    ) as response:
+        try:
+            if response.status_code != 200:
+                raise OllamaError(
+                    status_code=response.status_code, message=response.text
+                )
+            streamwrapper = litellm.CustomStreamWrapper(
+                completion_stream=response.iter_lines(),
+                model=data["model"],
+                custom_llm_provider="ollama",
+                logging_obj=logging_obj,
+            )
+            for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+        except Exception as e:
+            raise e
+async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
+    try:
+        client = httpx.AsyncClient()
+        async with client.stream(
+            url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
+        ) as response:
+            if response.status_code != 200:
+                raise OllamaError(
+                    status_code=response.status_code, message=response.text
+                )
+            streamwrapper = litellm.CustomStreamWrapper(
+                completion_stream=response.aiter_lines(),
+                model=data["model"],
+                custom_llm_provider="ollama",
+                logging_obj=logging_obj,
+            )
+            async for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+    except Exception as e:
+        traceback.print_exc()
+async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
+    data["stream"] = False
+    try:
+        timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            resp = await session.post(url, json=data)
+            if resp.status != 200:
+                text = await resp.text()
+                raise OllamaError(status_code=resp.status, message=text)
+            ## LOGGING
+            logging_obj.post_call(
+                input=data["prompt"],
+                api_key="",
+                original_response=resp.text,
+                additional_args={
+                    "headers": None,
+                    "api_base": url,
+                },
+            )
+            response_json = await resp.json()
+            ## RESPONSE OBJECT
+            model_response["choices"][0]["finish_reason"] = "stop"
+            if data.get("format", "") == "json":
+                message = litellm.Message(
+                    content=None,
+                    tool_calls=[
+                        {
+                            "id": f"call_{str(uuid.uuid4())}",
+                            "function": {
+                                "arguments": response_json["response"],
+                                "name": "",
+                            },
+                            "type": "function",
+                        }
+                    ],
+                )
+                model_response["choices"][0]["message"] = message
+            else:
+                model_response["choices"][0]["message"]["content"] = response_json[
+                    "response"
+                ]
+            model_response["created"] = int(time.time())
+            model_response["model"] = "ollama/" + data["model"]
+            prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"])))  # type: ignore
+            completion_tokens = response_json["eval_count"]
+            model_response["usage"] = litellm.Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens,
+            )
+            return model_response
+    except Exception as e:
+        traceback.print_exc()
+        raise e
+async def ollama_aembeddings(
+    api_base="http://localhost:11434",
+    model="llama2",
+    prompt="Why is the sky blue?",
+    optional_params=None,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+):
+    if api_base.endswith("/api/embeddings"):
+        url = api_base
+    else:
+        url = f"{api_base}/api/embeddings"
+    ## Load Config
+    config = litellm.OllamaConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=None,
+        api_key=None,
+        additional_args={"api_base": url, "complete_input_dict": data, "headers": {}},
+    )
+    timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        response = await session.post(url, json=data)
+        if response.status != 200:
+            text = await response.text()
+            raise OllamaError(status_code=response.status, message=text)
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key="",
+            original_response=response.text,
+            additional_args={
+                "headers": None,
+                "api_base": api_base,
+            },
+        )
+        response_json = await response.json()
+        embeddings = response_json["embedding"]
+        ## RESPONSE OBJECT
+        output_data = []
+        for idx, embedding in enumerate(embeddings):
+            output_data.append(
+                {"object": "embedding", "index": idx, "embedding": embedding}
+            )
+        model_response["object"] = "list"
+        model_response["data"] = output_data
+        model_response["model"] = model
+        input_tokens = len(encoding.encode(prompt))
+        model_response["usage"] = {
+            "prompt_tokens": input_tokens,
+            "total_tokens": input_tokens,
+        }
+        return model_response

litellm/llms/ollama_chat.py ADDED Viewed

	@@ -0,0 +1,333 @@

+import requests, types, time
+import json, uuid
+import traceback
+from typing import Optional
+import litellm
+import httpx, aiohttp, asyncio
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class OllamaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="http://localhost:11434")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class OllamaConfig:
+    """
+    Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
+    The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
+    - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
+    - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
+    - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
+    - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
+    - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
+    - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
+    - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
+    - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
+    - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
+    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
+    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
+    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
+    - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
+    - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
+    - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
+    - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
+    - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
+    """
+    mirostat: Optional[int] = None
+    mirostat_eta: Optional[float] = None
+    mirostat_tau: Optional[float] = None
+    num_ctx: Optional[int] = None
+    num_gqa: Optional[int] = None
+    num_thread: Optional[int] = None
+    repeat_last_n: Optional[int] = None
+    repeat_penalty: Optional[float] = None
+    temperature: Optional[float] = None
+    stop: Optional[
+        list
+    ] = None  # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
+    tfs_z: Optional[float] = None
+    num_predict: Optional[int] = None
+    top_k: Optional[int] = None
+    top_p: Optional[float] = None
+    system: Optional[str] = None
+    template: Optional[str] = None
+    def __init__(
+        self,
+        mirostat: Optional[int] = None,
+        mirostat_eta: Optional[float] = None,
+        mirostat_tau: Optional[float] = None,
+        num_ctx: Optional[int] = None,
+        num_gqa: Optional[int] = None,
+        num_thread: Optional[int] = None,
+        repeat_last_n: Optional[int] = None,
+        repeat_penalty: Optional[float] = None,
+        temperature: Optional[float] = None,
+        stop: Optional[list] = None,
+        tfs_z: Optional[float] = None,
+        num_predict: Optional[int] = None,
+        top_k: Optional[int] = None,
+        top_p: Optional[float] = None,
+        system: Optional[str] = None,
+        template: Optional[str] = None,
+    ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != "self" and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {
+            k: v
+            for k, v in cls.__dict__.items()
+            if not k.startswith("__")
+            and not isinstance(
+                v,
+                (
+                    types.FunctionType,
+                    types.BuiltinFunctionType,
+                    classmethod,
+                    staticmethod,
+                ),
+            )
+            and v is not None
+        }
+# ollama implementation
+def get_ollama_response(
+    api_base="http://localhost:11434",
+    model="llama2",
+    messages=None,
+    optional_params=None,
+    logging_obj=None,
+    acompletion: bool = False,
+    model_response=None,
+    encoding=None,
+):
+    if api_base.endswith("/api/chat"):
+        url = api_base
+    else:
+        url = f"{api_base}/api/chat"
+    ## Load Config
+    config = litellm.OllamaConfig.get_config()
+    for k, v in config.items():
+        if (
+            k not in optional_params
+        ):  # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    optional_params["stream"] = optional_params.get("stream", False)
+    data = {"model": model, "messages": messages, **optional_params}
+    ## LOGGING
+    logging_obj.pre_call(
+        input=None,
+        api_key=None,
+        additional_args={
+            "api_base": url,
+            "complete_input_dict": data,
+            "headers": {},
+            "acompletion": acompletion,
+        },
+    )
+    if acompletion is True:
+        if optional_params.get("stream", False) == True:
+            response = ollama_async_streaming(
+                url=url,
+                data=data,
+                model_response=model_response,
+                encoding=encoding,
+                logging_obj=logging_obj,
+            )
+        else:
+            response = ollama_acompletion(
+                url=url,
+                data=data,
+                model_response=model_response,
+                encoding=encoding,
+                logging_obj=logging_obj,
+            )
+        return response
+    elif optional_params.get("stream", False) == True:
+        return ollama_completion_stream(url=url, data=data, logging_obj=logging_obj)
+    response = requests.post(
+        url=f"{url}",
+        json=data,
+    )
+    if response.status_code != 200:
+        raise OllamaError(status_code=response.status_code, message=response.text)
+    ## LOGGING
+    logging_obj.post_call(
+        input=messages,
+        api_key="",
+        original_response=response.text,
+        additional_args={
+            "headers": None,
+            "api_base": api_base,
+        },
+    )
+    response_json = response.json()
+    ## RESPONSE OBJECT
+    model_response["choices"][0]["finish_reason"] = "stop"
+    if data.get("format", "") == "json":
+        message = litellm.Message(
+            content=None,
+            tool_calls=[
+                {
+                    "id": f"call_{str(uuid.uuid4())}",
+                    "function": {
+                        "arguments": response_json["message"]["content"],
+                        "name": "",
+                    },
+                    "type": "function",
+                }
+            ],
+        )
+        model_response["choices"][0]["message"] = message
+    else:
+        model_response["choices"][0]["message"] = response_json["message"]
+    model_response["created"] = int(time.time())
+    model_response["model"] = "ollama/" + model
+    prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+    completion_tokens = response_json["eval_count"]
+    model_response["usage"] = litellm.Usage(
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        total_tokens=prompt_tokens + completion_tokens,
+    )
+    return model_response
+def ollama_completion_stream(url, data, logging_obj):
+    with httpx.stream(
+        url=url, json=data, method="POST", timeout=litellm.request_timeout
+    ) as response:
+        try:
+            if response.status_code != 200:
+                raise OllamaError(
+                    status_code=response.status_code, message=response.iter_lines()
+                )
+            streamwrapper = litellm.CustomStreamWrapper(
+                completion_stream=response.iter_lines(),
+                model=data["model"],
+                custom_llm_provider="ollama_chat",
+                logging_obj=logging_obj,
+            )
+            for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+        except Exception as e:
+            raise e
+async def ollama_async_streaming(url, data, model_response, encoding, logging_obj):
+    try:
+        client = httpx.AsyncClient()
+        async with client.stream(
+            url=f"{url}", json=data, method="POST", timeout=litellm.request_timeout
+        ) as response:
+            if response.status_code != 200:
+                raise OllamaError(
+                    status_code=response.status_code, message=response.text
+                )
+            streamwrapper = litellm.CustomStreamWrapper(
+                completion_stream=response.aiter_lines(),
+                model=data["model"],
+                custom_llm_provider="ollama_chat",
+                logging_obj=logging_obj,
+            )
+            async for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+    except Exception as e:
+        traceback.print_exc()
+async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
+    data["stream"] = False
+    try:
+        timeout = aiohttp.ClientTimeout(total=litellm.request_timeout)  # 10 minutes
+        async with aiohttp.ClientSession(timeout=timeout) as session:
+            resp = await session.post(url, json=data)
+            if resp.status != 200:
+                text = await resp.text()
+                raise OllamaError(status_code=resp.status, message=text)
+            response_json = await resp.json()
+            ## LOGGING
+            logging_obj.post_call(
+                input=data,
+                api_key="",
+                original_response=response_json,
+                additional_args={
+                    "headers": None,
+                    "api_base": url,
+                },
+            )
+            ## RESPONSE OBJECT
+            model_response["choices"][0]["finish_reason"] = "stop"
+            if data.get("format", "") == "json":
+                message = litellm.Message(
+                    content=None,
+                    tool_calls=[
+                        {
+                            "id": f"call_{str(uuid.uuid4())}",
+                            "function": {
+                                "arguments": response_json["message"]["content"],
+                                "name": "",
+                            },
+                            "type": "function",
+                        }
+                    ],
+                )
+                model_response["choices"][0]["message"] = message
+            else:
+                model_response["choices"][0]["message"] = response_json["message"]
+            model_response["created"] = int(time.time())
+            model_response["model"] = "ollama/" + data["model"]
+            prompt_tokens = response_json["prompt_eval_count"]  # type: ignore
+            completion_tokens = response_json["eval_count"]
+            model_response["usage"] = litellm.Usage(
+                prompt_tokens=prompt_tokens,
+                completion_tokens=completion_tokens,
+                total_tokens=prompt_tokens + completion_tokens,
+            )
+            return model_response
+    except Exception as e:
+        traceback.print_exc()
+        raise e