Spaces:

nonhuman
/

nnnn

Runtime error

App Files Files Community

nonhuman commited on Dec 5, 2023

Commit

395201c

1 Parent(s): 17261b5

Upload 165 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

litellm/__init__.py +408 -0
litellm/_version.py +6 -0
litellm/budget_manager.py +155 -0
litellm/caching.py +275 -0
litellm/cost.json +5 -0
litellm/deprecated_litellm_server/.env.template +43 -0
litellm/deprecated_litellm_server/Dockerfile +10 -0
litellm/deprecated_litellm_server/README.md +3 -0
litellm/deprecated_litellm_server/__init__.py +2 -0
litellm/deprecated_litellm_server/main.py +193 -0
litellm/deprecated_litellm_server/requirements.txt +7 -0
litellm/deprecated_litellm_server/server_utils.py +86 -0
litellm/exceptions.py +166 -0
litellm/integrations/__init__.py +1 -0
litellm/integrations/aispend.py +177 -0
litellm/integrations/berrispend.py +184 -0
litellm/integrations/custom_logger.py +83 -0
litellm/integrations/helicone.py +114 -0
litellm/integrations/langfuse.py +75 -0
litellm/integrations/langsmith.py +76 -0
litellm/integrations/litedebugger.py +231 -0
litellm/integrations/llmonitor.py +127 -0
litellm/integrations/prompt_layer.py +72 -0
litellm/integrations/supabase.py +116 -0
litellm/integrations/traceloop.py +78 -0
litellm/integrations/weights_biases.py +219 -0
litellm/llms/__init__.py +1 -0
litellm/llms/ai21.py +194 -0
litellm/llms/aleph_alpha.py +278 -0
litellm/llms/anthropic.py +187 -0
litellm/llms/azure.py +414 -0
litellm/llms/base.py +47 -0
litellm/llms/baseten.py +149 -0
litellm/llms/bedrock.py +627 -0
litellm/llms/cohere.py +273 -0
litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt +2523 -0
litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt +0 -0
litellm/llms/huggingface_restapi.py +604 -0
litellm/llms/maritalk.py +164 -0
litellm/llms/nlp_cloud.py +212 -0
litellm/llms/ollama.py +231 -0
litellm/llms/oobabooga.py +124 -0
litellm/llms/openai.py +590 -0
litellm/llms/palm.py +177 -0
litellm/llms/petals.py +189 -0
litellm/llms/prompt_templates/factory.py +360 -0
litellm/llms/replicate.py +302 -0
litellm/llms/sagemaker.py +190 -0
litellm/llms/together_ai.py +198 -0
litellm/llms/tokenizers/anthropic_tokenizer.json +0 -0

litellm/__init__.py ADDED Viewed

	@@ -0,0 +1,408 @@

+### INIT VARIABLES ###
+import threading, requests
+from typing import Callable, List, Optional, Dict, Union, Any
+from litellm.caching import Cache
+import httpx
+input_callback: List[Union[str, Callable]] = []
+success_callback: List[Union[str, Callable]] = []
+failure_callback: List[Union[str, Callable]] = []
+callbacks: List[Callable] = []
+_async_success_callback: List[Callable] = [] # internal variable - async custom callbacks are routed here.
+pre_call_rules: List[Callable] = []
+post_call_rules: List[Callable] = []
+set_verbose = False
+email: Optional[
+    str
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+token: Optional[
+    str
+] = None  # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+telemetry = True
+max_tokens = 256  # OpenAI Defaults
+drop_params = False
+retry = True
+api_key: Optional[str] = None
+openai_key: Optional[str] = None
+azure_key: Optional[str] = None
+anthropic_key: Optional[str] = None
+replicate_key: Optional[str] = None
+cohere_key: Optional[str] = None
+maritalk_key: Optional[str] = None
+ai21_key: Optional[str] = None
+openrouter_key: Optional[str] = None
+huggingface_key: Optional[str] = None
+vertex_project: Optional[str] = None
+vertex_location: Optional[str] = None
+togetherai_api_key: Optional[str] = None
+baseten_key: Optional[str] = None
+aleph_alpha_key: Optional[str] = None
+nlp_cloud_key: Optional[str] = None
+use_client: bool = False
+logging: bool = True
+caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+caching_with_models: bool = False  # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
+cache: Optional[Cache] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
+model_alias_map: Dict[str, str] = {}
+max_budget: float = 0.0 # set the max budget across all providers
+_current_cost = 0 # private variable, used if max budget is set
+error_logs: Dict = {}
+add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
+client_session: Optional[httpx.Client] = None
+aclient_session: Optional[httpx.AsyncClient] = None
+model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
+model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
+suppress_debug_info = False
+#### RELIABILITY ####
+request_timeout: Optional[float] = 6000
+num_retries: Optional[int] = None
+fallbacks: Optional[List] = None
+context_window_fallbacks: Optional[List] = None
+allowed_fails: int = 0
+####### SECRET MANAGERS #####################
+secret_manager_client: Optional[Any] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
+#############################################
+def get_model_cost_map(url: str):
+    try:
+        with requests.get(url, timeout=5) as response:  # set a 5 second timeout for the get request
+            response.raise_for_status()                 # Raise an exception if the request is unsuccessful
+            content = response.json()
+            return content
+    except Exception as e:
+        import importlib.resources
+        import json
+        with importlib.resources.open_text("litellm", "model_prices_and_context_window_backup.json") as f:
+            content = json.load(f)
+            return content
+model_cost = get_model_cost_map(url=model_cost_map_url)
+custom_prompt_dict:Dict[str, dict] = {}
+####### THREAD-SPECIFIC DATA ###################
+class MyLocal(threading.local):
+    def __init__(self):
+        self.user = "Hello World"
+_thread_context = MyLocal()
+def identify(event_details):
+    # Store user in thread local data
+    if "user" in event_details:
+        _thread_context.user = event_details["user"]
+####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
+api_base = None
+headers = None
+api_version = None
+organization = None
+config_path = None
+####### COMPLETION MODELS ###################
+open_ai_chat_completion_models: List = []
+open_ai_text_completion_models: List = []
+cohere_models: List = []
+anthropic_models: List = []
+openrouter_models: List = []
+vertex_chat_models: List = []
+vertex_code_chat_models: List = []
+vertex_text_models: List = []
+vertex_code_text_models: List = []
+ai21_models: List = []
+nlp_cloud_models: List = []
+aleph_alpha_models: List = []
+bedrock_models: List = []
+deepinfra_models: List = []
+perplexity_models: List = []
+for key, value in model_cost.items():
+    if value.get('litellm_provider') == 'openai':
+        open_ai_chat_completion_models.append(key)
+    elif value.get('litellm_provider') == 'text-completion-openai':
+        open_ai_text_completion_models.append(key)
+    elif value.get('litellm_provider') == 'cohere':
+        cohere_models.append(key)
+    elif value.get('litellm_provider') == 'anthropic':
+        anthropic_models.append(key)
+    elif value.get('litellm_provider') == 'openrouter':
+        split_string = key.split('/', 1)
+        openrouter_models.append(split_string[1])
+    elif value.get('litellm_provider') == 'vertex_ai-text-models':
+        vertex_text_models.append(key)
+    elif value.get('litellm_provider') == 'vertex_ai-code-text-models':
+        vertex_code_text_models.append(key)
+    elif value.get('litellm_provider') == 'vertex_ai-chat-models':
+        vertex_chat_models.append(key)
+    elif value.get('litellm_provider') == 'vertex_ai-code-chat-models':
+        vertex_code_chat_models.append(key)
+    elif value.get('litellm_provider') == 'ai21':
+        ai21_models.append(key)
+    elif value.get('litellm_provider') == 'nlp_cloud':
+        nlp_cloud_models.append(key)
+    elif value.get('litellm_provider') == 'aleph_alpha':
+        aleph_alpha_models.append(key)
+    elif value.get('litellm_provider') == 'bedrock':
+        bedrock_models.append(key)
+    elif value.get('litellm_provider') == 'deepinfra':
+        deepinfra_models.append(key)
+    elif value.get('litellm_provider') == 'perplexity':
+        perplexity_models.append(key)
+# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
+openai_compatible_endpoints: List = [
+    "api.perplexity.ai",
+    "api.endpoints.anyscale.com/v1",
+    "api.deepinfra.com/v1/openai"
+]
+# well supported replicate llms
+replicate_models: List = [
+    # llama replicate supported LLMs
+    "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
+    "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
+    "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
+    # Vicuna
+    "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
+    "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
+    # Flan T-5
+    "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
+    # Others
+    "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
+    "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
+]
+huggingface_models: List = [
+    "meta-llama/Llama-2-7b-hf",
+    "meta-llama/Llama-2-7b-chat-hf",
+    "meta-llama/Llama-2-13b-hf",
+    "meta-llama/Llama-2-13b-chat-hf",
+    "meta-llama/Llama-2-70b-hf",
+    "meta-llama/Llama-2-70b-chat-hf",
+    "meta-llama/Llama-2-7b",
+    "meta-llama/Llama-2-7b-chat",
+    "meta-llama/Llama-2-13b",
+    "meta-llama/Llama-2-13b-chat",
+    "meta-llama/Llama-2-70b",
+    "meta-llama/Llama-2-70b-chat",
+]  # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
+together_ai_models: List = [
+    # llama llms - chat
+    "togethercomputer/llama-2-70b-chat",
+    # llama llms - language / instruct
+    "togethercomputer/llama-2-70b",
+    "togethercomputer/LLaMA-2-7B-32K",
+    "togethercomputer/Llama-2-7B-32K-Instruct",
+    "togethercomputer/llama-2-7b",
+    # falcon llms
+    "togethercomputer/falcon-40b-instruct",
+    "togethercomputer/falcon-7b-instruct",
+    # alpaca
+    "togethercomputer/alpaca-7b",
+    # chat llms
+    "HuggingFaceH4/starchat-alpha",
+    # code llms
+    "togethercomputer/CodeLlama-34b",
+    "togethercomputer/CodeLlama-34b-Instruct",
+    "togethercomputer/CodeLlama-34b-Python",
+    "defog/sqlcoder",
+    "NumbersStation/nsql-llama-2-7B",
+    "WizardLM/WizardCoder-15B-V1.0",
+    "WizardLM/WizardCoder-Python-34B-V1.0",
+    # language llms
+    "NousResearch/Nous-Hermes-Llama2-13b",
+    "Austism/chronos-hermes-13b",
+    "upstage/SOLAR-0-70b-16bit",
+    "WizardLM/WizardLM-70B-V1.0",
+] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
+baseten_models: List = ["qvv0xeq", "q841o8w", "31dxrj3"]  # FALCON 7B  # WizardLM  # Mosaic ML
+petals_models = [
+    "petals-team/StableBeluga2",
+]
+ollama_models = [
+    "llama2"
+]
+maritalk_models = [
+    "maritalk"
+]
+model_list = (
+    open_ai_chat_completion_models
+    + open_ai_text_completion_models
+    + cohere_models
+    + anthropic_models
+    + replicate_models
+    + openrouter_models
+    + huggingface_models
+    + vertex_chat_models
+    + vertex_text_models
+    + ai21_models
+    + together_ai_models
+    + baseten_models
+    + aleph_alpha_models
+    + nlp_cloud_models
+    + ollama_models
+    + bedrock_models
+    + deepinfra_models
+    + perplexity_models
+    + maritalk_models
+)
+provider_list: List = [
+    "openai",
+    "custom_openai",
+    "cohere",
+    "anthropic",
+    "replicate",
+    "huggingface",
+    "together_ai",
+    "openrouter",
+    "vertex_ai",
+    "palm",
+    "ai21",
+    "baseten",
+    "azure",
+    "sagemaker",
+    "bedrock",
+    "vllm",
+    "nlp_cloud",
+    "petals",
+    "oobabooga",
+    "ollama",
+    "deepinfra",
+    "perplexity",
+    "anyscale",
+    "maritalk",
+    "custom", # custom apis
+]
+models_by_provider: dict = {
+    "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
+    "cohere": cohere_models,
+    "anthropic": anthropic_models,
+    "replicate": replicate_models,
+    "huggingface": huggingface_models,
+    "together_ai": together_ai_models,
+    "baseten": baseten_models,
+    "openrouter": openrouter_models,
+    "vertex_ai": vertex_chat_models + vertex_text_models,
+    "ai21": ai21_models,
+    "bedrock": bedrock_models,
+    "petals": petals_models,
+    "ollama": ollama_models,
+    "deepinfra": deepinfra_models,
+    "perplexity": perplexity_models,
+    "maritalk": maritalk_models
+}
+# mapping for those models which have larger equivalents
+longer_context_model_fallback_dict: dict = {
+    # openai chat completion models
+    "gpt-3.5-turbo": "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
+    "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
+    "gpt-4": "gpt-4-32k",
+    "gpt-4-0314": "gpt-4-32k-0314",
+    "gpt-4-0613": "gpt-4-32k-0613",
+    # anthropic
+    "claude-instant-1": "claude-2",
+    "claude-instant-1.2": "claude-2",
+    # vertexai
+    "chat-bison": "chat-bison-32k",
+    "chat-bison@001": "chat-bison-32k",
+    "codechat-bison": "codechat-bison-32k",
+    "codechat-bison@001": "codechat-bison-32k",
+    # openrouter
+    "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
+    "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
+}
+####### EMBEDDING MODELS ###################
+open_ai_embedding_models: List = ["text-embedding-ada-002"]
+cohere_embedding_models: List = [
+    "embed-english-v3.0",
+    "embed-english-light-v3.0",
+    "embed-multilingual-v3.0",
+    "embed-english-v2.0",
+    "embed-english-light-v2.0",
+    "embed-multilingual-v2.0",
+]
+bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]
+all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
+from .timeout import timeout
+from .utils import (
+    client,
+    exception_type,
+    get_optional_params,
+    modify_integration,
+    token_counter,
+    cost_per_token,
+    completion_cost,
+    get_litellm_params,
+    Logging,
+    acreate,
+    get_model_list,
+    get_max_tokens,
+    get_model_info,
+    register_prompt_template,
+    validate_environment,
+    check_valid_key,
+    get_llm_provider,
+    completion_with_config,
+    register_model,
+    encode,
+    decode,
+    _calculate_retry_after,
+    _should_retry,
+    get_secret
+)
+from .llms.huggingface_restapi import HuggingfaceConfig
+from .llms.anthropic import AnthropicConfig
+from .llms.replicate import ReplicateConfig
+from .llms.cohere import CohereConfig
+from .llms.ai21 import AI21Config
+from .llms.together_ai import TogetherAIConfig
+from .llms.palm import PalmConfig
+from .llms.nlp_cloud import NLPCloudConfig
+from .llms.aleph_alpha import AlephAlphaConfig
+from .llms.petals import PetalsConfig
+from .llms.vertex_ai import VertexAIConfig
+from .llms.sagemaker import SagemakerConfig
+from .llms.ollama import OllamaConfig
+from .llms.maritalk import MaritTalkConfig
+from .llms.bedrock import AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig
+from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
+from .llms.azure import AzureOpenAIConfig
+from .main import *  # type: ignore
+from .integrations import *
+from .exceptions import (
+    AuthenticationError,
+    InvalidRequestError,
+    BadRequestError,
+    RateLimitError,
+    ServiceUnavailableError,
+    OpenAIError,
+    ContextWindowExceededError,
+    BudgetExceededError,
+    APIError,
+    Timeout,
+    APIConnectionError,
+    APIResponseValidationError
+)
+from .budget_manager import BudgetManager
+from .proxy.proxy_cli import run_server
+from .router import Router

litellm/_version.py ADDED Viewed

	@@ -0,0 +1,6 @@

+import importlib_metadata
+try:
+    version = importlib_metadata.version("litellm")
+except:
+    pass

litellm/budget_manager.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import os, json, time
+import litellm
+from litellm.utils import ModelResponse
+import requests, threading
+from typing import Optional, Union, Literal
+class BudgetManager:
+    def __init__(self, project_name: str, client_type: str = "local", api_base: Optional[str] = None):
+        self.client_type = client_type
+        self.project_name = project_name
+        self.api_base = api_base or "https://api.litellm.ai"
+        ## load the data or init the initial dictionaries
+        self.load_data()
+    def print_verbose(self, print_statement):
+        if litellm.set_verbose:
+            import logging
+            logging.info(print_statement)
+    def load_data(self):
+        if self.client_type == "local":
+            # Check if user dict file exists
+            if os.path.isfile("user_cost.json"):
+                # Load the user dict
+                with open("user_cost.json", 'r') as json_file:
+                    self.user_dict = json.load(json_file)
+            else:
+                self.print_verbose("User Dictionary not found!")
+                self.user_dict = {}
+            self.print_verbose(f"user dict from local: {self.user_dict}")
+        elif self.client_type == "hosted":
+            # Load the user_dict from hosted db
+            url = self.api_base + "/get_budget"
+            headers = {'Content-Type': 'application/json'}
+            data = {
+                'project_name' : self.project_name
+            }
+            response = requests.post(url, headers=headers, json=data)
+            response = response.json()
+            if response["status"] == "error":
+                self.user_dict = {} # assume this means the user dict hasn't been stored yet
+            else:
+                self.user_dict = response["data"]
+    def create_budget(self, total_budget: float, user: str, duration: Optional[Literal["daily", "weekly", "monthly", "yearly"]] = None, created_at: float = time.time()):
+        self.user_dict[user] = {"total_budget": total_budget}
+        if duration is None:
+            return self.user_dict[user]
+        if duration == 'daily':
+            duration_in_days = 1
+        elif duration == 'weekly':
+            duration_in_days = 7
+        elif duration == 'monthly':
+            duration_in_days = 28
+        elif duration == 'yearly':
+            duration_in_days = 365
+        else:
+            raise ValueError("""duration needs to be one of ["daily", "weekly", "monthly", "yearly"]""")
+        self.user_dict[user] = {"total_budget": total_budget, "duration": duration_in_days, "created_at": created_at, "last_updated_at": created_at}
+        self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
+        return self.user_dict[user]
+    def projected_cost(self, model: str, messages: list, user: str):
+        text = "".join(message["content"] for message in messages)
+        prompt_tokens = litellm.token_counter(model=model, text=text)
+        prompt_cost, _ = litellm.cost_per_token(model=model, prompt_tokens=prompt_tokens, completion_tokens=0)
+        current_cost = self.user_dict[user].get("current_cost", 0)
+        projected_cost = prompt_cost + current_cost
+        return projected_cost
+    def get_total_budget(self, user: str):
+        return self.user_dict[user]["total_budget"]
+    def update_cost(self, user: str, completion_obj: Optional[ModelResponse] = None, model: Optional[str] = None, input_text: Optional[str] = None, output_text: Optional[str] = None):
+        if model and input_text and output_text:
+            prompt_tokens = litellm.token_counter(model=model, messages=[{"role": "user", "content": input_text}])
+            completion_tokens = litellm.token_counter(model=model, messages=[{"role": "user", "content": output_text}])
+            prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = litellm.cost_per_token(model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
+            cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+        elif completion_obj:
+            cost = litellm.completion_cost(completion_response=completion_obj)
+            model = completion_obj['model'] # if this throws an error try, model = completion_obj['model']
+        else:
+            raise ValueError("Either a chat completion object or the text response needs to be passed in. Learn more - https://docs.litellm.ai/docs/budget_manager")
+        self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get("current_cost", 0)
+        if "model_cost" in self.user_dict[user]:
+            self.user_dict[user]["model_cost"][model] = cost + self.user_dict[user]["model_cost"].get(model, 0)
+        else:
+            self.user_dict[user]["model_cost"] = {model: cost}
+        self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
+        return {"user": self.user_dict[user]}
+    def get_current_cost(self, user):
+        return self.user_dict[user].get("current_cost", 0)
+    def get_model_cost(self, user):
+        return self.user_dict[user].get("model_cost", 0)
+    def is_valid_user(self, user: str) -> bool:
+        return user in self.user_dict
+    def get_users(self):
+        return list(self.user_dict.keys())
+    def reset_cost(self, user):
+        self.user_dict[user]["current_cost"] = 0
+        self.user_dict[user]["model_cost"] = {}
+        return {"user": self.user_dict[user]}
+    def reset_on_duration(self, user: str):
+        # Get current and creation time
+        last_updated_at = self.user_dict[user]["last_updated_at"]
+        current_time = time.time()
+        # Convert duration from days to seconds
+        duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60
+        # Check if duration has elapsed
+        if current_time - last_updated_at >= duration_in_seconds:
+            # Reset cost if duration has elapsed and update the creation time
+            self.reset_cost(user)
+            self.user_dict[user]["last_updated_at"] = current_time
+            self._save_data_thread()  # Save the data
+    def update_budget_all_users(self):
+        for user in self.get_users():
+            if "duration" in self.user_dict[user]:
+                self.reset_on_duration(user)
+    def _save_data_thread(self):
+        thread = threading.Thread(target=self.save_data) # [Non-Blocking]: saves data without blocking execution
+        thread.start()
+    def save_data(self):
+        if self.client_type == "local":
+            import json
+            # save the user dict
+            with open("user_cost.json", 'w') as json_file:
+                json.dump(self.user_dict, json_file, indent=4)  # Indent for pretty formatting
+            return {"status": "success"}
+        elif self.client_type == "hosted":
+            url = self.api_base + "/set_budget"
+            headers = {'Content-Type': 'application/json'}
+            data = {
+                'project_name' : self.project_name,
+                "user_dict": self.user_dict
+            }
+            response = requests.post(url, headers=headers, json=data)
+            response = response.json()
+            return response

litellm/caching.py ADDED Viewed

	@@ -0,0 +1,275 @@

+# +-----------------------------------------------+
+# |                                               |
+# |           Give Feedback / Get Help            |
+# | https://github.com/BerriAI/litellm/issues/new |
+# |                                               |
+# +-----------------------------------------------+
+#
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+import litellm
+import time, logging
+import json, traceback, ast
+from typing import Optional
+def get_prompt(*args, **kwargs):
+    # make this safe checks, it should not throw any exceptions
+    if len(args) > 1:
+        messages = args[1]
+        prompt = " ".join(message["content"] for message in messages)
+        return prompt
+    if "messages" in kwargs:
+        messages = kwargs["messages"]
+        prompt = " ".join(message["content"] for message in messages)
+        return prompt
+    return None
+def print_verbose(print_statement):
+    if litellm.set_verbose:
+        print(print_statement) # noqa
+class BaseCache:
+    def set_cache(self, key, value, **kwargs):
+        raise NotImplementedError
+    def get_cache(self, key, **kwargs):
+        raise NotImplementedError
+class InMemoryCache(BaseCache):
+    def __init__(self):
+        # if users don't provider one, use the default litellm cache
+        self.cache_dict = {}
+        self.ttl_dict = {}
+    def set_cache(self, key, value, **kwargs):
+        self.cache_dict[key] = value
+        if "ttl" in kwargs:
+            self.ttl_dict[key] = time.time() + kwargs["ttl"]
+    def get_cache(self, key, **kwargs):
+        if key in self.cache_dict:
+            if key in self.ttl_dict:
+                if time.time() > self.ttl_dict[key]:
+                    self.cache_dict.pop(key, None)
+                    return None
+            original_cached_response = self.cache_dict[key]
+            try:
+                cached_response = json.loads(original_cached_response)
+            except:
+                cached_response = original_cached_response
+            if isinstance(cached_response, dict):
+                    cached_response['cache'] = True  # set cache-hit flag to True
+            return cached_response
+        return None
+    def flush_cache(self):
+        self.cache_dict.clear()
+        self.ttl_dict.clear()
+class RedisCache(BaseCache):
+    def __init__(self, host, port, password, **kwargs):
+        import redis
+        # if users don't provider one, use the default litellm cache
+        self.redis_client = redis.Redis(host=host, port=port, password=password, **kwargs)
+    def set_cache(self, key, value, **kwargs):
+        ttl = kwargs.get("ttl", None)
+        try:
+            self.redis_client.set(name=key, value=str(value), ex=ttl)
+        except Exception as e:
+            # NON blocking - notify users Redis is throwing an exception
+            logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
+    def get_cache(self, key, **kwargs):
+        try:
+            # TODO convert this to a ModelResponse object
+            cached_response = self.redis_client.get(key)
+            if cached_response != None:
+                # cached_response is in `b{} convert it to ModelResponse
+                cached_response = cached_response.decode("utf-8")  # Convert bytes to string
+                try:
+                    cached_response = json.loads(cached_response)  # Convert string to dictionary
+                except:
+                    cached_response = ast.literal_eval(cached_response)
+                if isinstance(cached_response, dict):
+                    cached_response['cache'] = True  # set cache-hit flag to True
+                return cached_response
+        except Exception as e:
+            # NON blocking - notify users Redis is throwing an exception
+            traceback.print_exc()
+            logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
+    def flush_cache(self):
+        self.redis_client.flushall()
+class DualCache(BaseCache):
+    """
+    This updates both Redis and an in-memory cache simultaneously.
+    When data is updated or inserted, it is written to both the in-memory cache + Redis.
+    This ensures that even if Redis hasn't been updated yet, the in-memory cache reflects the most recent data.
+    """
+    def __init__(self, in_memory_cache: Optional[InMemoryCache] =None, redis_cache: Optional[RedisCache] =None) -> None:
+        super().__init__()
+        # If in_memory_cache is not provided, use the default InMemoryCache
+        self.in_memory_cache = in_memory_cache or InMemoryCache()
+        # If redis_cache is not provided, use the default RedisCache
+        self.redis_cache = redis_cache
+    def set_cache(self, key, value, **kwargs):
+        # Update both Redis and in-memory cache
+        try:
+            print_verbose(f"set cache: key: {key}; value: {value}")
+            if self.in_memory_cache is not None:
+                self.in_memory_cache.set_cache(key, value, **kwargs)
+            if self.redis_cache is not None:
+                self.redis_cache.set_cache(key, value, **kwargs)
+        except Exception as e:
+            print_verbose(e)
+    def get_cache(self, key, **kwargs):
+        # Try to fetch from in-memory cache first
+        try:
+            print_verbose(f"get cache: cache key: {key}")
+            result = None
+            if self.in_memory_cache is not None:
+                in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
+                if in_memory_result is not None:
+                    result = in_memory_result
+            if self.redis_cache is not None:
+                # If not found in in-memory cache, try fetching from Redis
+                redis_result = self.redis_cache.get_cache(key, **kwargs)
+                if redis_result is not None:
+                    # Update in-memory cache with the value from Redis
+                    self.in_memory_cache.set_cache(key, redis_result, **kwargs)
+                result = redis_result
+            print_verbose(f"get cache: cache result: {result}")
+            return result
+        except Exception as e:
+            traceback.print_exc()
+    def flush_cache(self):
+        if self.in_memory_cache is not None:
+            self.in_memory_cache.flush_cache()
+        if self.redis_cache is not None:
+            self.redis_cache.flush_cache()
+#### LiteLLM.Completion Cache ####
+class Cache:
+    def __init__(
+            self,
+            type="local",
+            host=None,
+            port=None,
+            password=None,
+            **kwargs
+    ):
+        """
+        Initializes the cache based on the given type.
+        Args:
+            type (str, optional): The type of cache to initialize. Defaults to "local".
+            host (str, optional): The host address for the Redis cache. Required if type is "redis".
+            port (int, optional): The port number for the Redis cache. Required if type is "redis".
+            password (str, optional): The password for the Redis cache. Required if type is "redis".
+            **kwargs: Additional keyword arguments for redis.Redis() cache
+        Raises:
+            ValueError: If an invalid cache type is provided.
+        Returns:
+            None
+        """
+        if type == "redis":
+            self.cache = RedisCache(host, port, password, **kwargs)
+        if type == "local":
+            self.cache = InMemoryCache()
+        if "cache" not in litellm.input_callback:
+            litellm.input_callback.append("cache")
+        if "cache" not in litellm.success_callback:
+            litellm.success_callback.append("cache")
+    def get_cache_key(self, *args, **kwargs):
+        """
+        Get the cache key for the given arguments.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            str: The cache key generated from the arguments, or None if no cache key could be generated.
+        """
+        cache_key =""
+        for param in kwargs:
+            # ignore litellm params here
+            if param in set(["model", "messages", "temperature", "top_p", "n", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice"]):
+                # check if param == model and model_group is passed in, then override model with model_group
+                if param == "model" and kwargs.get("metadata", None) is not None and kwargs["metadata"].get("model_group", None) is not None:
+                    param_value = kwargs["metadata"].get("model_group", None) # for litellm.Router use model_group for caching over `model`
+                else:
+                    param_value = kwargs[param]
+                cache_key+= f"{str(param)}: {str(param_value)}"
+        return cache_key
+    def generate_streaming_content(self, content):
+        chunk_size = 5  # Adjust the chunk size as needed
+        for i in range(0, len(content), chunk_size):
+            yield {'choices': [{'delta': {'role': 'assistant', 'content': content[i:i + chunk_size]}}]}
+            time.sleep(0.02)
+    def get_cache(self, *args, **kwargs):
+        """
+        Retrieves the cached result for the given arguments.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            The cached result if it exists, otherwise None.
+        """
+        try:  # never block execution
+            if "cache_key" in kwargs:
+                cache_key = kwargs["cache_key"]
+            else:
+                cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
+                cached_result = self.cache.get_cache(cache_key)
+                if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
+                    # if streaming is true and we got a cache hit, return a generator
+                    return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
+                return cached_result
+        except Exception as e:
+            logging.debug(f"An exception occurred: {traceback.format_exc()}")
+            return None
+    def add_cache(self, result, *args, **kwargs):
+        """
+        Adds a result to the cache.
+        Args:
+            *args: args to litellm.completion() or embedding()
+            **kwargs: kwargs to litellm.completion() or embedding()
+        Returns:
+            None
+        """
+        try:
+            if "cache_key" in kwargs:
+                cache_key = kwargs["cache_key"]
+            else:
+                cache_key = self.get_cache_key(*args, **kwargs)
+            if cache_key is not None:
+                if isinstance(result, litellm.ModelResponse):
+                    result = result.model_dump_json()
+                self.cache.set_cache(cache_key, result, **kwargs)
+        except Exception as e:
+            pass

litellm/cost.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+    "gpt-3.5-turbo-0613": 0.00015000000000000001,
+    "claude-2": 0.00016454,
+    "gpt-4-0613": 0.015408
+}

litellm/deprecated_litellm_server/.env.template ADDED Viewed

	@@ -0,0 +1,43 @@

+# # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
+# AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
+# OPENAI_API_KEY = ""
+# HUGGINGFACE_API_KEY=""
+# TOGETHERAI_API_KEY=""
+# REPLICATE_API_KEY=""
+# ## bedrock / sagemaker
+# AWS_ACCESS_KEY_ID = ""
+# AWS_SECRET_ACCESS_KEY = ""
+# AZURE_API_KEY = ""
+# AZURE_API_BASE = ""
+# AZURE_API_VERSION = ""
+# ANTHROPIC_API_KEY = ""
+# COHERE_API_KEY = ""
+# ## CONFIG FILE ##
+# # CONFIG_FILE_PATH = ""  # uncomment to point to config file
+# ## LOGGING ##
+# SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
+# ### LANGFUSE
+# LANGFUSE_PUBLIC_KEY = ""
+# LANGFUSE_SECRET_KEY = ""
+# # Optional, defaults to https://cloud.langfuse.com
+# LANGFUSE_HOST = "" # optional
+# ## CACHING ##
+# ### REDIS
+# REDIS_HOST = ""
+# REDIS_PORT = ""
+# REDIS_PASSWORD = ""

litellm/deprecated_litellm_server/Dockerfile ADDED Viewed

	@@ -0,0 +1,10 @@

+# FROM python:3.10
+# ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
+# COPY . /app
+# WORKDIR /app
+# RUN pip install -r requirements.txt
+# EXPOSE $PORT
+# CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10

litellm/deprecated_litellm_server/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # litellm-server [experimental]
2	+
3	+ Deprecated. See litellm/proxy

litellm/deprecated_litellm_server/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # from .main import *
2	+ # from .server_utils import *

litellm/deprecated_litellm_server/main.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# import os, traceback
+# from fastapi import FastAPI, Request, HTTPException
+# from fastapi.routing import APIRouter
+# from fastapi.responses import StreamingResponse, FileResponse
+# from fastapi.middleware.cors import CORSMiddleware
+# import json, sys
+# from typing import Optional
+# sys.path.insert(
+#     0, os.path.abspath("../")
+# )  # Adds the parent directory to the system path - for litellm local dev
+# import litellm
+# try:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# except ImportError:
+#     from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
+# import dotenv
+# dotenv.load_dotenv() # load env variables
+# app = FastAPI(docs_url="/", title="LiteLLM API")
+# router = APIRouter()
+# origins = ["*"]
+# app.add_middleware(
+#     CORSMiddleware,
+#     allow_origins=origins,
+#     allow_credentials=True,
+#     allow_methods=["*"],
+#     allow_headers=["*"],
+# )
+# #### GLOBAL VARIABLES ####
+# llm_router: Optional[litellm.Router] = None
+# llm_model_list: Optional[list] = None
+# server_settings: Optional[dict] = None
+# set_callbacks() # sets litellm callbacks for logging if they exist in the environment
+# if "CONFIG_FILE_PATH" in os.environ:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
+# else:
+#     llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
+# #### API ENDPOINTS ####
+# @router.get("/v1/models")
+# @router.get("/models")  # if project requires model list
+# def model_list():
+#     all_models = litellm.utils.get_valid_models()
+#     if llm_model_list:
+#         all_models += llm_model_list
+#     return dict(
+#         data=[
+#             {
+#                 "id": model,
+#                 "object": "model",
+#                 "created": 1677610602,
+#                 "owned_by": "openai",
+#             }
+#             for model in all_models
+#         ],
+#         object="list",
+#     )
+# # for streaming
+# def data_generator(response):
+#     for chunk in response:
+#         yield f"data: {json.dumps(chunk)}\n\n"
+# @router.post("/v1/completions")
+# @router.post("/completions")
+# async def completion(request: Request):
+#     data = await request.json()
+#     response = litellm.completion(
+#         **data
+#     )
+#     if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#             return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#     return response
+# @router.post("/v1/embeddings")
+# @router.post("/embeddings")
+# async def embedding(request: Request):
+#     try:
+#         data = await request.json()
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
+#             api_key = request.headers.get("authorization")
+#             api_key = api_key.replace("Bearer", "").strip() # type: ignore
+#             if len(api_key.strip()) > 0:
+#                 api_key = api_key
+#                 data["api_key"] = api_key
+#         response = litellm.embedding(
+#             **data
+#         )
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.post("/v1/chat/completions")
+# @router.post("/chat/completions")
+# @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
+# async def chat_completion(request: Request, model: Optional[str] = None):
+#     global llm_model_list, server_settings
+#     try:
+#         data = await request.json()
+#         server_model = server_settings.get("completion_model", None) if server_settings else None
+#         data["model"] = server_model or model or data["model"]
+#         ## CHECK KEYS ##
+#         # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
+#         # env_validation = litellm.validate_environment(model=data["model"])
+#         # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
+#         #     if "authorization" in request.headers:
+#         #         api_key = request.headers.get("authorization")
+#         #     elif "api-key" in request.headers:
+#         #         api_key = request.headers.get("api-key")
+#         #     print(f"api_key in headers: {api_key}")
+#         #     if " " in api_key:
+#         #         api_key = api_key.split(" ")[1]
+#         #     print(f"api_key split: {api_key}")
+#         #     if len(api_key) > 0:
+#         #         api_key = api_key
+#         #         data["api_key"] = api_key
+#         #         print(f"api_key in data: {api_key}")
+#         ## CHECK CONFIG ##
+#         if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
+#             for m in llm_model_list:
+#                 if data["model"] == m["model_name"]:
+#                     for key, value in m["litellm_params"].items():
+#                         data[key] = value
+#                     break
+#         response = litellm.completion(
+#             **data
+#         )
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         # return {"error": error_msg}
+#         raise HTTPException(status_code=500, detail=error_msg)
+# @router.post("/router/completions")
+# async def router_completion(request: Request):
+#     global llm_router
+#     try:
+#         data = await request.json()
+#         if "model_list" in data:
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None:
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+#         # openai.ChatCompletion.create replacement
+#         response = await llm_router.acompletion(model="gpt-3.5-turbo",
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.post("/router/embedding")
+# async def router_embedding(request: Request):
+#     global llm_router
+#     try:
+#         data = await request.json()
+#         if "model_list" in data:
+#             llm_router = litellm.Router(model_list=data.pop("model_list"))
+#         if llm_router is None:
+#             raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
+#         response = await llm_router.aembedding(model="gpt-3.5-turbo",  # type: ignore
+#                         messages=[{"role": "user", "content": "Hey, how's it going?"}])
+#         if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
+#                 return StreamingResponse(data_generator(response), media_type='text/event-stream')
+#         return response
+#     except Exception as e:
+#         error_traceback = traceback.format_exc()
+#         error_msg = f"{str(e)}\n\n{error_traceback}"
+#         return {"error": error_msg}
+# @router.get("/")
+# async def home(request: Request):
+#     return "LiteLLM: RUNNING"
+# app.include_router(router)

litellm/deprecated_litellm_server/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+# openai
+# fastapi
+# uvicorn
+# boto3
+# litellm
+# python-dotenv
+# redis

litellm/deprecated_litellm_server/server_utils.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# import os, litellm
+# import pkg_resources
+# import dotenv
+# dotenv.load_dotenv() # load env variables
+# def print_verbose(print_statement):
+#     pass
+# def get_package_version(package_name):
+#     try:
+#         package = pkg_resources.get_distribution(package_name)
+#         return package.version
+#     except pkg_resources.DistributionNotFound:
+#         return None
+# # Usage example
+# package_name = "litellm"
+# version = get_package_version(package_name)
+# if version:
+#     print_verbose(f"The version of {package_name} is {version}")
+# else:
+#     print_verbose(f"{package_name} is not installed")
+# import yaml
+# import dotenv
+# from typing import Optional
+# dotenv.load_dotenv() # load env variables
+# def set_callbacks():
+#     ## LOGGING
+#     if len(os.getenv("SET_VERBOSE", "")) > 0:
+#         if os.getenv("SET_VERBOSE") == "True":
+#             litellm.set_verbose = True
+#             print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
+#         else:
+#             litellm.set_verbose = False
+#     ### LANGFUSE
+#     if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
+#         litellm.success_callback = ["langfuse"]
+#         print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
+#     ## CACHING
+#     ### REDIS
+#     # if len(os.getenv("REDIS_HOST", "")) >  0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
+#     #     print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
+#     #     from litellm.caching import Cache
+#     #     litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
+#     #     print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
+# def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
+#     config = {}
+#     server_settings  = {}
+#     try:
+#         if os.path.exists(config_file_path): # type: ignore
+#             with open(config_file_path, 'r') as file: # type: ignore
+#                 config = yaml.safe_load(file)
+#         else:
+#             pass
+#     except:
+#         pass
+#     ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
+#     server_settings = config.get("server_settings", None)
+#     if server_settings:
+#         server_settings = server_settings
+#     ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
+#     litellm_settings = config.get('litellm_settings', None)
+#     if litellm_settings:
+#         for key, value in litellm_settings.items():
+#             setattr(litellm, key, value)
+#     ## MODEL LIST
+#     model_list = config.get('model_list', None)
+#     if model_list:
+#         router = litellm.Router(model_list=model_list)
+#     ## ENVIRONMENT VARIABLES
+#     environment_variables = config.get('environment_variables', None)
+#     if environment_variables:
+#         for key, value in environment_variables.items():
+#             os.environ[key] = value
+#     return router, model_list, server_settings

litellm/exceptions.py ADDED Viewed

	@@ -0,0 +1,166 @@

+# +-----------------------------------------------+
+# |                                               |
+# |           Give Feedback / Get Help            |
+# | https://github.com/BerriAI/litellm/issues/new |
+# |                                               |
+# +-----------------------------------------------+
+#
+#  Thank you users! We ❤️ you! - Krrish & Ishaan
+## LiteLLM versions of the OpenAI Exception Types
+from openai import (
+    AuthenticationError,
+    BadRequestError,
+    RateLimitError,
+    APIStatusError,
+    OpenAIError,
+    APIError,
+    APITimeoutError,
+    APIConnectionError,
+    APIResponseValidationError
+)
+import httpx
+class AuthenticationError(AuthenticationError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 401
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(
+            self.message,
+            response=response,
+            body=None
+        )  # Call the base class constructor with the parameters it needs
+class BadRequestError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message,
+            response=response,
+            body=None
+        )  # Call the base class constructor with the parameters it needs
+class Timeout(APITimeoutError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 408
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        super().__init__(
+            request=request
+        )  # Call the base class constructor with the parameters it needs
+class RateLimitError(RateLimitError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 429
+        self.message = message
+        self.llm_provider = llm_provider
+        self.modle = model
+        super().__init__(
+            self.message,
+            response=response,
+            body=None
+        )  # Call the base class constructor with the parameters it needs
+# sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
+class ContextWindowExceededError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider, response: httpx.Response):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            message=self.message,
+            model=self.model, # type: ignore
+            llm_provider=self.llm_provider, # type: ignore
+            response=response
+        )  # Call the base class constructor with the parameters it needs
+class ServiceUnavailableError(APIStatusError):  # type: ignore
+    def __init__(self, message, llm_provider, model, response: httpx.Response):
+        self.status_code = 503
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(
+            self.message,
+            response=response,
+            body=None
+        )  # Call the base class constructor with the parameters it needs
+# raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
+class APIError(APIError): # type: ignore
+    def __init__(self, status_code, message, llm_provider, model, request: httpx.Request):
+        self.status_code = status_code
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        super().__init__(
+            self.message,
+            request=request, # type: ignore
+            body=None
+        )
+# raised if an invalid request (not get, delete, put, post) is made
+class APIConnectionError(APIConnectionError):  # type: ignore
+    def __init__(self, message, llm_provider, model, request: httpx.Request):
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        self.status_code = 500
+        super().__init__(
+            message=self.message,
+            request=request
+        )
+# raised if an invalid request (not get, delete, put, post) is made
+class APIResponseValidationError(APIResponseValidationError):  # type: ignore
+    def __init__(self, message, llm_provider, model):
+        self.message = message
+        self.llm_provider = llm_provider
+        self.model = model
+        request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        response = httpx.Response(status_code=500, request=request)
+        super().__init__(
+            response=response,
+            body=None,
+            message=message
+        )
+class OpenAIError(OpenAIError):  # type: ignore
+    def __init__(self, original_exception):
+        self.status_code = original_exception.http_status
+        super().__init__(
+            http_body=original_exception.http_body,
+            http_status=original_exception.http_status,
+            json_body=original_exception.json_body,
+            headers=original_exception.headers,
+            code=original_exception.code,
+        )
+        self.llm_provider = "openai"
+class BudgetExceededError(Exception):
+    def __init__(self, current_cost, max_budget):
+        self.current_cost = current_cost
+        self.max_budget = max_budget
+        message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
+        super().__init__(message)
+## DEPRECATED ##
+class InvalidRequestError(BadRequestError):  # type: ignore
+    def __init__(self, message, model, llm_provider):
+        self.status_code = 400
+        self.message = message
+        self.model = model
+        self.llm_provider = llm_provider
+        super().__init__(
+            self.message, f"{self.model}"
+        )  # Call the base class constructor with the parameters it needs

litellm/integrations/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import *

litellm/integrations/aispend.py ADDED Viewed

	@@ -0,0 +1,177 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime
+model_cost = {
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
+}
+class AISpendLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
+        self.api_key = os.getenv("AISPEND_API_KEY")
+    def price_calculator(self, model, response_obj, start_time, end_time):
+        # try and find if the model is in the model_cost map
+        # else default to the average of the costs
+        prompt_tokens_cost_usd_dollar = 0
+        completion_tokens_cost_usd_dollar = 0
+        if model in model_cost:
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
+            # replicate models are charged based on time
+            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
+            model_run_time = end_time - start_time  # assuming time in seconds
+            cost_usd_dollar = model_run_time * 0.0032
+            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
+            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
+        else:
+            # calculate average input cost
+            input_cost_sum = 0
+            output_cost_sum = 0
+            for model in model_cost:
+                input_cost_sum += model_cost[model]["input_cost_per_token"]
+                output_cost_sum += model_cost[model]["output_cost_per_token"]
+            avg_input_cost = input_cost_sum / len(model_cost.keys())
+            avg_output_cost = output_cost_sum / len(model_cost.keys())
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    def log_event(self, model, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        try:
+            print_verbose(
+                f"AISpend Logging - Enters logging function for model {model}"
+            )
+            url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
+            headers = {
+                "Authorization": f"Bearer {self.api_key}",
+                "Content-Type": "application/json",
+            }
+            response_timestamp = datetime.datetime.fromtimestamp(
+                int(response_obj["created"])
+            ).strftime("%Y-%m-%d")
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
+            completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
+            data = [
+                {
+                    "requests": 1,
+                    "requests_context": 1,
+                    "context_tokens": response_obj["usage"]["prompt_tokens"],
+                    "requests_generated": 1,
+                    "generated_tokens": response_obj["usage"]["completion_tokens"],
+                    "recorded_date": response_timestamp,
+                    "model_id": response_obj["model"],
+                    "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
+                    "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
+                }
+            ]
+            print_verbose(f"AISpend Logging - final data object: {data}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/berrispend.py ADDED Viewed

	@@ -0,0 +1,184 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime
+model_cost = {
+    "gpt-3.5-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-35-turbo": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },  # azure model name
+    "gpt-3.5-turbo-0613": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-0301": {
+        "max_tokens": 4000,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+    },
+    "gpt-3.5-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-35-turbo-16k": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },  # azure model name
+    "gpt-3.5-turbo-16k-0613": {
+        "max_tokens": 16000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+    },
+    "gpt-4": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-0613": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.00006,
+    },
+    "gpt-4-32k": {
+        "max_tokens": 8000,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+    },
+    "claude-instant-1": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00000163,
+        "output_cost_per_token": 0.00000551,
+    },
+    "claude-2": {
+        "max_tokens": 100000,
+        "input_cost_per_token": 0.00001102,
+        "output_cost_per_token": 0.00003268,
+    },
+    "text-bison-001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000004,
+        "output_cost_per_token": 0.000004,
+    },
+    "chat-bison-001": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000002,
+        "output_cost_per_token": 0.000002,
+    },
+    "command-nightly": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000015,
+        "output_cost_per_token": 0.000015,
+    },
+}
+class BerriSpendLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
+    def price_calculator(self, model, response_obj, start_time, end_time):
+        # try and find if the model is in the model_cost map
+        # else default to the average of the costs
+        prompt_tokens_cost_usd_dollar = 0
+        completion_tokens_cost_usd_dollar = 0
+        if model in model_cost:
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        elif "replicate" in model:
+            # replicate models are charged based on time
+            # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
+            model_run_time = end_time - start_time  # assuming time in seconds
+            cost_usd_dollar = model_run_time * 0.0032
+            prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
+            completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
+        else:
+            # calculate average input cost
+            input_cost_sum = 0
+            output_cost_sum = 0
+            for model in model_cost:
+                input_cost_sum += model_cost[model]["input_cost_per_token"]
+                output_cost_sum += model_cost[model]["output_cost_per_token"]
+            avg_input_cost = input_cost_sum / len(model_cost.keys())
+            avg_output_cost = output_cost_sum / len(model_cost.keys())
+            prompt_tokens_cost_usd_dollar = (
+                model_cost[model]["input_cost_per_token"]
+                * response_obj["usage"]["prompt_tokens"]
+            )
+            completion_tokens_cost_usd_dollar = (
+                model_cost[model]["output_cost_per_token"]
+                * response_obj["usage"]["completion_tokens"]
+            )
+        return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
+    def log_event(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
+        # Method definition
+        try:
+            print_verbose(
+                f"BerriSpend Logging - Enters logging function for model {model}"
+            )
+            url = f"https://berrispend.berri.ai/spend"
+            headers = {"Content-Type": "application/json"}
+            (
+                prompt_tokens_cost_usd_dollar,
+                completion_tokens_cost_usd_dollar,
+            ) = self.price_calculator(model, response_obj, start_time, end_time)
+            total_cost = (
+                prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
+            )
+            response_time = (end_time - start_time).total_seconds()
+            if "response" in response_obj:
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "response": response_obj["choices"][0]["message"]["content"],
+                        "account_id": self.account_id,
+                    }
+                ]
+            elif "error" in response_obj:
+                data = [
+                    {
+                        "response_time": response_time,
+                        "model_id": response_obj["model"],
+                        "total_cost": total_cost,
+                        "messages": messages,
+                        "error": response_obj["error"],
+                        "account_id": self.account_id,
+                    }
+                ]
+            print_verbose(f"BerriSpend Logging - final data object: {data}")
+            response = requests.post(url, headers=headers, json=data)
+        except:
+            # traceback.print_exc()
+            print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/custom_logger.py ADDED Viewed

	@@ -0,0 +1,83 @@

+#### What this does ####
+#    On success, logs events to Promptlayer
+import dotenv, os
+import requests
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
+    # Class variables or attributes
+    def __init__(self):
+        pass
+    def log_pre_api_call(self, model, messages, kwargs):
+        pass
+    def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_stream_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_success_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    def log_failure_event(self, kwargs, response_obj, start_time, end_time):
+        pass
+    #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
+    def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
+        try:
+            kwargs["model"] = model
+            kwargs["messages"] = messages
+            kwargs["log_event_type"] = "pre_api_call"
+            callback_func(
+                kwargs,
+            )
+            print_verbose(
+                f"Custom Logger - model call details: {kwargs}"
+            )
+        except:
+            traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func):
+        # Method definition
+        try:
+            kwargs["log_event_type"] = "post_api_call"
+            callback_func(
+                kwargs, # kwargs to func
+                response_obj,
+                start_time,
+                end_time,
+            )
+            print_verbose(
+                f"Custom Logger - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+            pass
+    async def async_log_event(self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func):
+        # Method definition
+        try:
+            kwargs["log_event_type"] = "post_api_call"
+            await callback_func(
+                kwargs, # kwargs to func
+                response_obj,
+                start_time,
+                end_time,
+            )
+            print_verbose(
+                f"Custom Logger - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
+            pass

litellm/integrations/helicone.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#### What this does ####
+#    On success, logs events to Helicone
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class HeliconeLogger:
+    # Class variables or attributes
+    helicone_model_list = ["gpt", "claude"]
+    def __init__(self):
+        # Instance variables
+        self.provider_url = "https://api.openai.com/v1"
+        self.key = os.getenv("HELICONE_API_KEY")
+    def claude_mapping(self, model, messages, response_obj):
+        from anthropic import HUMAN_PROMPT, AI_PROMPT
+        prompt = f"{HUMAN_PROMPT}"
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += f"{HUMAN_PROMPT}{message['content']}"
+                else:
+                    prompt += f"{AI_PROMPT}{message['content']}"
+            else:
+                prompt += f"{HUMAN_PROMPT}{message['content']}"
+        prompt += f"{AI_PROMPT}"
+        claude_provider_request = {"model": model, "prompt": prompt}
+        claude_response_obj = {
+            "completion": response_obj["choices"][0]["message"]["content"],
+            "model": model,
+            "stop_reason": "stop_sequence",
+        }
+        return claude_provider_request, claude_response_obj
+    def log_success(
+        self, model, messages, response_obj, start_time, end_time, print_verbose
+    ):
+        # Method definition
+        try:
+            print_verbose(
+                f"Helicone Logging - Enters logging function for model {model}"
+            )
+            model = (
+                model
+                if any(
+                    accepted_model in model
+                    for accepted_model in self.helicone_model_list
+                )
+                else "gpt-3.5-turbo"
+            )
+            provider_request = {"model": model, "messages": messages}
+            if "claude" in model:
+                provider_request, response_obj = self.claude_mapping(
+                    model=model, messages=messages, response_obj=response_obj
+                )
+            providerResponse = {
+                "json": response_obj,
+                "headers": {"openai-version": "2020-10-01"},
+                "status": 200,
+            }
+            # Code to be executed
+            url = "https://api.hconeai.com/oai/v1/log"
+            headers = {
+                "Authorization": f"Bearer {self.key}",
+                "Content-Type": "application/json",
+            }
+            start_time_seconds = int(start_time.timestamp())
+            start_time_milliseconds = int(
+                (start_time.timestamp() - start_time_seconds) * 1000
+            )
+            end_time_seconds = int(end_time.timestamp())
+            end_time_milliseconds = int(
+                (end_time.timestamp() - end_time_seconds) * 1000
+            )
+            data = {
+                "providerRequest": {
+                    "url": self.provider_url,
+                    "json": provider_request,
+                    "meta": {"Helicone-Auth": f"Bearer {self.key}"},
+                },
+                "providerResponse": providerResponse,
+                "timing": {
+                    "startTime": {
+                        "seconds": start_time_seconds,
+                        "milliseconds": start_time_milliseconds,
+                    },
+                    "endTime": {
+                        "seconds": end_time_seconds,
+                        "milliseconds": end_time_milliseconds,
+                    },
+                },  # {"seconds": .., "milliseconds": ..}
+            }
+            response = requests.post(url, headers=headers, json=data)
+            if response.status_code == 200:
+                print_verbose("Helicone Logging - Success!")
+            else:
+                print_verbose(
+                    f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
+                )
+                print_verbose(f"Helicone Logging - Error {response.text}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/langfuse.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#### What this does ####
+#    On success, logs events to Langfuse
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class LangFuseLogger:
+    # Class variables or attributes
+    def __init__(self):
+        try:
+            from langfuse import Langfuse
+        except Exception as e:
+            raise Exception(f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\033[0m")
+        # Instance variables
+        self.secret_key = os.getenv("LANGFUSE_SECRET_KEY")
+        self.public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
+        self.langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
+        self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
+        self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
+        self.Langfuse =  Langfuse(
+            public_key=self.public_key,
+            secret_key=self.secret_key,
+            host=self.langfuse_host,
+            release=self.langfuse_release,
+            debug=self.langfuse_debug
+        )
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        from langfuse.model import InitialGeneration, Usage
+        try:
+            print_verbose(
+                f"Langfuse Logging - Enters logging function for model {kwargs}"
+            )
+            litellm_params = kwargs.get("litellm_params", {})
+            metadata = litellm_params.get("metadata", {})
+            prompt = [kwargs.get('messages')]
+            optional_params = kwargs.get("optional_params", {})
+            # langfuse only accepts str, int, bool, float for logging
+            for param, value in optional_params.items():
+                if not isinstance(value, (str, int, bool, float)):
+                    try:
+                        optional_params[param] = str(value)
+                    except:
+                        # if casting value to str fails don't block logging
+                        pass
+            # end of processing langfuse ########################
+            self.Langfuse.generation(InitialGeneration(
+                name=metadata.get("generation_name", "litellm-completion"),
+                startTime=start_time,
+                endTime=end_time,
+                model=kwargs['model'],
+                modelParameters=optional_params,
+                prompt=prompt,
+                completion=response_obj['choices'][0]['message'],
+                usage=Usage(
+                    prompt_tokens=response_obj['usage']['prompt_tokens'],
+                    completion_tokens=response_obj['usage']['completion_tokens']
+                ),
+                metadata=metadata
+            ))
+            self.Langfuse.flush()
+            print_verbose(
+                f"Langfuse Layer Logging - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/langsmith.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#### What this does ####
+#    On success, logs events to Langsmith
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class LangsmithLogger:
+    # Class variables or attributes
+    def __init__(self):
+        self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
+        metadata = {}
+        if "litellm_params" in kwargs:
+            metadata = kwargs["litellm_params"].get("metadata", {})
+        # set project name and run_name for langsmith logging
+        # users can pass project_name and run name to litellm.completion()
+        # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
+        # if not set litellm will use default project_name = litellm-completion, run_name = LLMRun
+        project_name = metadata.get("project_name", "litellm-completion")
+        run_name = metadata.get("run_name", "LLMRun")
+        print_verbose(f"Langsmith Logging - project_name: {project_name}, run_name {run_name}")
+        try:
+            print_verbose(
+                f"Langsmith Logging - Enters logging function for model {kwargs}"
+            )
+            import requests
+            import datetime
+            from datetime import timezone
+            try:
+                start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
+                end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
+            except:
+                start_time = datetime.datetime.utcnow().isoformat()
+                end_time = datetime.datetime.utcnow().isoformat()
+            # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
+            new_kwargs = {}
+            for key in kwargs:
+                value = kwargs[key]
+                if key == "start_time" or key =="end_time":
+                    pass
+                elif type(value) != dict:
+                    new_kwargs[key] = value
+            requests.post(
+                "https://api.smith.langchain.com/runs",
+                json={
+                    "name": run_name,
+                    "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
+                    "inputs": {
+                        **new_kwargs
+                    },
+                    "outputs": response_obj,
+                    "session_name": project_name,
+                    "start_time": start_time,
+                    "end_time": end_time,
+                },
+                headers={
+                    "x-api-key": self.langsmith_api_key
+                }
+            )
+            print_verbose(
+                f"Langsmith Layer Logging - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/litedebugger.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import requests, traceback, json, os
+import types
+class LiteDebugger:
+    user_email = None
+    dashboard_url = None
+    def __init__(self, email=None):
+        self.api_url = "https://api.litellm.ai/debugger"
+        self.validate_environment(email)
+        pass
+    def validate_environment(self, email):
+        try:
+            self.user_email = (email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL"))
+            if self.user_email == None: # if users are trying to use_client=True but token not set
+                raise ValueError("litellm.use_client = True but no token or email passed. Please set it in litellm.token")
+            self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
+            try:
+                print(
+                    f"\033[92mHere's your LiteLLM Dashboard 👉 \033[94m\033[4m{self.dashboard_url}\033[0m"
+                )
+            except:
+                print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
+            if self.user_email == None:
+                raise ValueError(
+                    "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
+                )
+        except Exception as e:
+            raise ValueError(
+                "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
+            )
+    def input_log_event(
+        self,
+        model,
+        messages,
+        end_user,
+        litellm_call_id,
+        call_type,
+        print_verbose,
+        litellm_params,
+        optional_params,
+    ):
+        print_verbose(f"LiteDebugger: Pre-API Call Logging for call id {litellm_call_id}")
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
+            )
+            def remove_key_value(dictionary, key):
+                new_dict = dictionary.copy()  # Create a copy of the original dictionary
+                new_dict.pop(key)  # Remove the specified key-value pair from the copy
+                return new_dict
+            updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
+            if call_type == "embedding":
+                for message in messages: # assuming the input is a list as required by the embedding function
+                    litellm_data_obj = {
+                        "model": model,
+                        "messages": [{"role": "user", "content": message}],
+                        "end_user": end_user,
+                        "status": "initiated",
+                        "litellm_call_id": litellm_call_id,
+                        "user_email": self.user_email,
+                        "litellm_params": updated_litellm_params,
+                        "optional_params": optional_params,
+                    }
+                    print_verbose(
+                        f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
+                print_verbose(f"LiteDebugger: embedding api response - {response.text}")
+            elif call_type == "completion":
+                litellm_data_obj = {
+                    "model": model,
+                    "messages": messages if isinstance(messages, list) else [{"role": "user", "content": messages}],
+                    "end_user": end_user,
+                    "status": "initiated",
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                    "litellm_params": updated_litellm_params,
+                    "optional_params": optional_params,
+                }
+                print_verbose(
+                    f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+                print_verbose(f"LiteDebugger: completion api response - {response.text}")
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+            pass
+    def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream):
+        print_verbose(f"LiteDebugger: Post-API Call Logging for call id {litellm_call_id}")
+        try:
+            if call_type == "embedding":
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": str(original_response["data"][0]["embedding"][:5])}, # don't store the entire vector
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and not stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": original_response},
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            elif call_type == "completion" and stream:
+                litellm_data_obj = {
+                    "status": "received",
+                    "additional_details": {"original_response": "Streamed response" if isinstance(original_response, types.GeneratorType) else original_response},
+                    "litellm_call_id": litellm_call_id,
+                    "user_email": self.user_email,
+                }
+            print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
+            response = requests.post(
+                url=self.api_url,
+                headers={"content-type": "application/json"},
+                data=json.dumps(litellm_data_obj),
+            )
+            print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+    def log_event(
+        self,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,
+        call_type,
+        stream = False
+    ):
+        print_verbose(f"LiteDebugger: Success/Failure Call Logging for call id {litellm_call_id}")
+        try:
+            print_verbose(
+                f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
+            )
+            total_cost = 0  # [TODO] implement cost tracking
+            response_time = (end_time - start_time).total_seconds()
+            if call_type == "completion" and stream == False:
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "total_cost": total_cost,
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+            elif call_type == "embedding":
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "total_cost": total_cost,
+                    "response": str(response_obj["data"][0]["embedding"][:5]),
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+            elif call_type == "completion" and stream == True:
+                if len(response_obj["content"]) > 0: # don't log the empty strings
+                    litellm_data_obj = {
+                        "response_time": response_time,
+                        "total_cost": total_cost,
+                        "response": response_obj["content"],
+                        "litellm_call_id": litellm_call_id,
+                        "status": "success",
+                    }
+                    print_verbose(
+                        f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                    )
+                    response = requests.post(
+                        url=self.api_url,
+                        headers={"content-type": "application/json"},
+                        data=json.dumps(litellm_data_obj),
+                    )
+            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
+                litellm_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure",
+                    "user_email": self.user_email,
+                }
+                print_verbose(
+                    f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
+                )
+                response = requests.post(
+                    url=self.api_url,
+                    headers={"content-type": "application/json"},
+                    data=json.dumps(litellm_data_obj),
+                )
+                print_verbose(f"LiteDebugger: api response - {response.text}")
+        except:
+            print_verbose(
+                f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
+            )
+            pass

litellm/integrations/llmonitor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+#### What this does ####
+#    On success + failure, log events to aispend.io
+import datetime
+import traceback
+import dotenv
+import os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+# convert to {completion: xx, tokens: xx}
+def parse_usage(usage):
+    return {
+        "completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
+        "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
+    }
+def parse_messages(input):
+    if input is None:
+        return None
+    def clean_message(message):
+        # if is strin, return as is
+        if isinstance(message, str):
+            return message
+        if "message" in message:
+            return clean_message(message["message"])
+        text = message["content"]
+        if text == None:
+            text = message.get("function_call", None)
+        return {
+            "role": message["role"],
+            "text": text,
+        }
+    if isinstance(input, list):
+        if len(input) == 1:
+            return clean_message(input[0])
+        else:
+            return [clean_message(msg) for msg in input]
+    else:
+        return clean_message(input)
+class LLMonitorLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
+        self.app_id = os.getenv("LLMONITOR_APP_ID")
+    def log_event(
+        self,
+        type,
+        event,
+        run_id,
+        model,
+        print_verbose,
+        input=None,
+        user_id=None,
+        response_obj=None,
+        start_time=datetime.datetime.now(),
+        end_time=datetime.datetime.now(),
+        error=None,
+    ):
+        # Method definition
+        try:
+            print_verbose(f"LLMonitor Logging - Logging request for model {model}")
+            if response_obj:
+                usage = (
+                    parse_usage(response_obj["usage"])
+                    if "usage" in response_obj
+                    else None
+                )
+                output = response_obj["choices"] if "choices" in response_obj else None
+            else:
+                usage = None
+                output = None
+            if error:
+                error_obj = {"stack": error}
+            else:
+                error_obj = None
+            data = [
+                {
+                    "type": type,
+                    "name": model,
+                    "runId": run_id,
+                    "app": self.app_id,
+                    "event": "start",
+                    "timestamp": start_time.isoformat(),
+                    "userId": user_id,
+                    "input": parse_messages(input),
+                },
+                {
+                    "type": type,
+                    "runId": run_id,
+                    "app": self.app_id,
+                    "event": event,
+                    "error": error_obj,
+                    "timestamp": end_time.isoformat(),
+                    "userId": user_id,
+                    "output": parse_messages(output),
+                    "tokensUsage": usage,
+                },
+            ]
+            print_verbose(f"LLMonitor Logging - final data object: {data}")
+            response = requests.post(
+                self.api_url + "/api/report",
+                headers={"Content-Type": "application/json"},
+                json={"events": data},
+            )
+            print_verbose(f"LLMonitor Logging - response: {response}")
+        except:
+            # traceback.print_exc()
+            print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/prompt_layer.py ADDED Viewed

	@@ -0,0 +1,72 @@

+#### What this does ####
+#    On success, logs events to Promptlayer
+import dotenv, os
+import requests
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class PromptLayerLogger:
+    # Class variables or attributes
+    def __init__(self):
+        # Instance variables
+        self.key = os.getenv("PROMPTLAYER_API_KEY")
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        try:
+            new_kwargs = {}
+            new_kwargs['model'] = kwargs['model']
+            new_kwargs['messages'] = kwargs['messages']
+            # add kwargs["optional_params"] to new_kwargs
+            for optional_param in kwargs["optional_params"]:
+                new_kwargs[optional_param] = kwargs["optional_params"][optional_param]
+            print_verbose(
+                f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
+            )
+            request_response = requests.post(
+                "https://api.promptlayer.com/rest/track-request",
+                json={
+                    "function_name": "openai.ChatCompletion.create",
+                    "kwargs": new_kwargs,
+                    "tags": ["hello", "world"],
+                    "request_response": dict(response_obj),
+                    "request_start_time": int(start_time.timestamp()),
+                    "request_end_time": int(end_time.timestamp()),
+                    "api_key": self.key,
+                    # Optional params for PromptLayer
+                    # "prompt_id": "<PROMPT ID>",
+                    # "prompt_input_variables": "<Dictionary of variables for prompt>",
+                    # "prompt_version":1,
+                },
+            )
+            print_verbose(
+                f"Prompt Layer Logging: success - final response object: {request_response.text}"
+            )
+            response_json = request_response.json()
+            if "success" not in request_response.json():
+                raise Exception("Promptlayer did not successfully log the response!")
+            if "request_id" in response_json:
+                print(kwargs["litellm_params"]["metadata"])
+                if kwargs["litellm_params"]["metadata"] is not None:
+                    response = requests.post(
+                        "https://api.promptlayer.com/rest/track-metadata",
+                        json={
+                            "request_id": response_json["request_id"],
+                            "api_key": self.key,
+                            "metadata": kwargs["litellm_params"]["metadata"]
+                        },
+                    )
+                    print_verbose(f"Prompt Layer Logging: success - metadata post response object: {response.text}")
+        except:
+            print_verbose(f"error: Prompt Layer Error - {traceback.format_exc()}")
+            pass

litellm/integrations/supabase.py ADDED Viewed

	@@ -0,0 +1,116 @@

+#### What this does ####
+#    On success + failure, log events to Supabase
+import dotenv, os
+import requests
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+import datetime, subprocess, sys
+import litellm
+class Supabase:
+    # Class variables or attributes
+    supabase_table_name = "request_logs"
+    def __init__(self):
+        # Instance variables
+        self.supabase_url = os.getenv("SUPABASE_URL")
+        self.supabase_key = os.getenv("SUPABASE_KEY")
+        try:
+            import supabase
+        except ImportError:
+            subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
+            import supabase
+        self.supabase_client = supabase.create_client(
+            self.supabase_url, self.supabase_key
+        )
+    def input_log_event(
+        self, model, messages, end_user, litellm_call_id, print_verbose
+    ):
+        try:
+            print_verbose(
+                f"Supabase Logging - Enters input logging function for model {model}"
+            )
+            supabase_data_obj = {
+                "model": model,
+                "messages": messages,
+                "end_user": end_user,
+                "status": "initiated",
+                "litellm_call_id": litellm_call_id,
+            }
+            data, count = (
+                self.supabase_client.table(self.supabase_table_name)
+                .insert(supabase_data_obj)
+                .execute()
+            )
+            print_verbose(f"data: {data}")
+        except:
+            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
+            pass
+    def log_event(
+        self,
+        model,
+        messages,
+        end_user,
+        response_obj,
+        start_time,
+        end_time,
+        litellm_call_id,
+        print_verbose,
+    ):
+        try:
+            print_verbose(
+                f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
+            )
+            total_cost = litellm.completion_cost(completion_response=response_obj)
+            response_time = (end_time - start_time).total_seconds()
+            if "choices" in response_obj:
+                supabase_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "response": response_obj["choices"][0]["message"]["content"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "success",
+                }
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj, on_conflict="litellm_call_id")
+                    .execute()
+                )
+            elif "error" in response_obj:
+                if "Unable to map your input to a model." in response_obj["error"]:
+                    total_cost = 0
+                supabase_data_obj = {
+                    "response_time": response_time,
+                    "model": response_obj["model"],
+                    "total_cost": total_cost,
+                    "messages": messages,
+                    "error": response_obj["error"],
+                    "end_user": end_user,
+                    "litellm_call_id": litellm_call_id,
+                    "status": "failure",
+                }
+                print_verbose(
+                    f"Supabase Logging - final data object: {supabase_data_obj}"
+                )
+                data, count = (
+                    self.supabase_client.table(self.supabase_table_name)
+                    .upsert(supabase_data_obj, on_conflict="litellm_call_id")
+                    .execute()
+                )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
+            pass

litellm/integrations/traceloop.py ADDED Viewed

	@@ -0,0 +1,78 @@

+class TraceloopLogger:
+    def __init__(self):
+        from traceloop.sdk.tracing.tracing import TracerWrapper
+        self.tracer_wrapper = TracerWrapper()
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        from opentelemetry.trace import SpanKind
+        from opentelemetry.semconv.ai import SpanAttributes
+        try:
+            tracer = self.tracer_wrapper.get_tracer()
+            model = kwargs.get("model")
+            # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
+            if "gpt" in model:
+                return
+            with tracer.start_as_current_span(
+                "litellm.completion",
+                kind=SpanKind.CLIENT,
+            ) as span:
+                if span.is_recording():
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
+                    )
+                    span.set_attribute(
+                        SpanAttributes.LLM_REQUEST_MAX_TOKENS, kwargs.get("max_tokens")
+                    )
+                    span.set_attribute(
+                        SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
+                    )
+                    for idx, prompt in enumerate(kwargs.get("messages")):
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
+                            prompt.get("role"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
+                            prompt.get("content"),
+                        )
+                    span.set_attribute(
+                        SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
+                    )
+                    usage = response_obj.get("usage")
+                    if usage:
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
+                            usage.get("total_tokens"),
+                        )
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
+                            usage.get("completion_tokens"),
+                        )
+                        span.set_attribute(
+                            SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
+                            usage.get("prompt_tokens"),
+                        )
+                    for idx, choice in enumerate(response_obj.get("choices")):
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
+                            choice.get("finish_reason"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
+                            choice.get("message").get("role"),
+                        )
+                        span.set_attribute(
+                            f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
+                            choice.get("message").get("content"),
+                        )
+        except Exception as e:
+            print_verbose(f"Traceloop Layer Error - {e}")

litellm/integrations/weights_biases.py ADDED Viewed

	@@ -0,0 +1,219 @@

+imported_openAIResponse=True
+try:
+    import io
+    import logging
+    import sys
+    from typing import Any, Dict, List, Optional, TypeVar
+    from wandb.sdk.data_types import trace_tree
+    if sys.version_info >= (3, 8):
+        from typing import Literal, Protocol
+    else:
+        from typing_extensions import Literal, Protocol
+    logger = logging.getLogger(__name__)
+    K = TypeVar("K", bound=str)
+    V = TypeVar("V")
+    class OpenAIResponse(Protocol[K, V]): # type: ignore
+        # contains a (known) object attribute
+        object: Literal["chat.completion", "edit", "text_completion"]
+        def __getitem__(self, key: K) -> V:
+            ...  # pragma: no cover
+        def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
+            ...  # pragma: no cover
+    class OpenAIRequestResponseResolver:
+        def __call__(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> Optional[trace_tree.WBTraceTree]:
+            try:
+                if response["object"] == "edit":
+                    return self._resolve_edit(request, response, time_elapsed)
+                elif response["object"] == "text_completion":
+                    return self._resolve_completion(request, response, time_elapsed)
+                elif response["object"] == "chat.completion":
+                    return self._resolve_chat_completion(request, response, time_elapsed)
+                else:
+                    logger.info(f"Unknown OpenAI response object: {response['object']}")
+            except Exception as e:
+                logger.warning(f"Failed to resolve request/response: {e}")
+            return None
+        @staticmethod
+        def results_to_trace_tree(
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            results: List[trace_tree.Result],
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Converts the request, response, and results into a trace tree.
+            params:
+                request: The request dictionary
+                response: The response object
+                results: A list of results object
+                time_elapsed: The time elapsed in seconds
+            returns:
+                A wandb trace tree object.
+            """
+            start_time_ms = int(round(response["created"] * 1000))
+            end_time_ms = start_time_ms + int(round(time_elapsed * 1000))
+            span = trace_tree.Span(
+                name=f"{response.get('model', 'openai')}_{response['object']}_{response.get('created')}",
+                attributes=dict(response),  # type: ignore
+                start_time_ms=start_time_ms,
+                end_time_ms=end_time_ms,
+                span_kind=trace_tree.SpanKind.LLM,
+                results=results,
+            )
+            model_obj = {"request": request, "response": response, "_kind": "openai"}
+            return trace_tree.WBTraceTree(root_span=span, model_dict=model_obj)
+        def _resolve_edit(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Edit`."""
+            request_str = (
+                f"\n\n**Instruction**: {request['instruction']}\n\n"
+                f"**Input**: {request['input']}\n"
+            )
+            choices = [
+                f"\n\n**Edited**: {choice['text']}\n" for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _resolve_completion(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            request_str = f"\n\n**Prompt**: {request['prompt']}\n"
+            choices = [
+                f"\n\n**Completion**: {choice['text']}\n" for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _resolve_chat_completion(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            prompt = io.StringIO()
+            for message in request["messages"]:
+                prompt.write(f"\n\n**{message['role']}**: {message['content']}\n")
+            request_str = prompt.getvalue()
+            choices = [
+                f"\n\n**{choice['message']['role']}**: {choice['message']['content']}\n"
+                for choice in response["choices"]
+            ]
+            return self._request_response_result_to_trace(
+                request=request,
+                response=response,
+                request_str=request_str,
+                choices=choices,
+                time_elapsed=time_elapsed,
+            )
+        def _request_response_result_to_trace(
+            self,
+            request: Dict[str, Any],
+            response: OpenAIResponse,
+            request_str: str,
+            choices: List[str],
+            time_elapsed: float,
+        ) -> trace_tree.WBTraceTree:
+            """Resolves the request and response objects for `openai.Completion`."""
+            results = [
+                trace_tree.Result(
+                    inputs={"request": request_str},
+                    outputs={"response": choice},
+                )
+                for choice in choices
+            ]
+            trace = self.results_to_trace_tree(request, response, results, time_elapsed)
+            return trace
+except:
+    imported_openAIResponse=False
+#### What this does ####
+#    On success, logs events to Langfuse
+import dotenv, os
+import requests
+import requests
+from datetime import datetime
+dotenv.load_dotenv()  # Loading env variables using dotenv
+import traceback
+class WeightsBiasesLogger:
+    # Class variables or attributes
+    def __init__(self):
+        try:
+            import wandb
+        except:
+            raise Exception("\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m")
+        if imported_openAIResponse==False:
+            raise Exception("\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m")
+        self.resolver = OpenAIRequestResponseResolver()
+    def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
+        # Method definition
+        import wandb
+        try:
+            print_verbose(
+                f"W&B Logging - Enters logging function for model {kwargs}"
+            )
+            run = wandb.init()
+            print_verbose(response_obj)
+            trace = self.resolver(kwargs, response_obj, (end_time-start_time).total_seconds())
+            if trace is not None:
+                run.log({"trace": trace})
+            run.finish()
+            print_verbose(
+                f"W&B Logging Logging - final response object: {response_obj}"
+            )
+        except:
+            # traceback.print_exc()
+            print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
+            pass

litellm/llms/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from . import *

litellm/llms/ai21.py ADDED Viewed

	@@ -0,0 +1,194 @@

+import os, types, traceback
+import json
+from enum import Enum
+import requests
+import time, httpx
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Choices, Message
+import litellm
+class AI21Error(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.ai21.com/studio/v1/")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AI21Config():
+    """
+    Reference: https://docs.ai21.com/reference/j2-complete-ref
+    The class `AI21Config` provides configuration for the AI21's API interface. Below are the parameters:
+    - `numResults` (int32): Number of completions to sample and return. Optional, default is 1. If the temperature is greater than 0 (non-greedy decoding), a value greater than 1 can be meaningful.
+    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
+    - `minTokens` (int32): The minimum number of tokens to generate per result. Optional, default is 0. If `stopSequences` are given, they are ignored until `minTokens` are generated.
+    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
+    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
+    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
+    - `topKReturn` (int32): Range between 0 to 10, including both. Optional, default is 0. Specifies the top-K alternative tokens to return. A non-zero value includes the string representations and log-probabilities for each of the top-K alternatives at each position.
+    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
+    - `presencePenalty` (object): Placeholder for presence penalty object.
+    - `countPenalty` (object): Placeholder for count penalty object.
+    """
+    numResults: Optional[int]=None
+    maxTokens: Optional[int]=None
+    minTokens: Optional[int]=None
+    temperature: Optional[float]=None
+    topP: Optional[float]=None
+    stopSequences: Optional[list]=None
+    topKReturn: Optional[int]=None
+    frequencePenalty: Optional[dict]=None
+    presencePenalty: Optional[dict]=None
+    countPenalty: Optional[dict]=None
+    def __init__(self,
+                 numResults: Optional[int]=None,
+                 maxTokens: Optional[int]=None,
+                 minTokens: Optional[int]=None,
+                 temperature: Optional[float]=None,
+                 topP: Optional[float]=None,
+                 stopSequences: Optional[list]=None,
+                 topKReturn: Optional[int]=None,
+                 frequencePenalty: Optional[dict]=None,
+                 presencePenalty: Optional[dict]=None,
+                 countPenalty: Optional[dict]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "Authorization": "Bearer " + api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    model = model
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += (
+                    f"{message['content']}"
+                )
+            else:
+                prompt += (
+                    f"{message['content']}"
+                )
+        else:
+            prompt += f"{message['content']}"
+    ## Load Config
+    config = litellm.AI21Config.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > ai21_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "prompt": prompt,
+        # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        api_base + model + "/complete", headers=headers, data=json.dumps(data)
+    )
+    if response.status_code != 200:
+        raise AI21Error(
+            status_code=response.status_code,
+            message=response.text
+        )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        try:
+            choices_list = []
+            for idx, item in enumerate(completion_response["completions"]):
+                if len(item["data"]["text"]) > 0:
+                    message_obj = Message(content=item["data"]["text"])
+                else:
+                    message_obj = Message(content=None)
+                choice_obj = Choices(finish_reason=item["finishReason"]["reason"], index=idx+1, message=message_obj)
+                choices_list.append(choice_obj)
+            model_response["choices"] = choices_list
+        except Exception as e:
+            raise AI21Error(message=traceback.format_exc(), status_code=response.status_code)
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content"))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        model_response["usage"] = {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        }
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/aleph_alpha.py ADDED Viewed

	@@ -0,0 +1,278 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import httpx
+class AlephAlphaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.aleph-alpha.com/complete")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AlephAlphaConfig():
+    """
+    Reference: https://docs.aleph-alpha.com/api/complete/
+    The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
+    - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
+    - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
+    - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
+    - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
+    - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
+    - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
+    - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
+    - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
+    - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
+    - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
+    - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
+    - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
+    - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
+    - `n` (integer, nullable; default value: 1): The number of completions to return.
+    - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
+    - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
+    - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
+    - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
+    - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
+    - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
+    - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
+    - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
+    - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
+    - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
+    """
+    maximum_tokens: Optional[int]=litellm.max_tokens # aleph alpha requires max tokens
+    minimum_tokens: Optional[int]=None
+    echo: Optional[bool]=None
+    temperature: Optional[int]=None
+    top_k: Optional[int]=None
+    top_p: Optional[int]=None
+    presence_penalty: Optional[int]=None
+    frequency_penalty: Optional[int]=None
+    sequence_penalty: Optional[int]=None
+    sequence_penalty_min_length: Optional[int]=None
+    repetition_penalties_include_prompt: Optional[bool]=None
+    repetition_penalties_include_completion: Optional[bool]=None
+    use_multiplicative_presence_penalty: Optional[bool]=None
+    use_multiplicative_frequency_penalty: Optional[bool]=None
+    use_multiplicative_sequence_penalty: Optional[bool]=None
+    penalty_bias: Optional[str]=None
+    penalty_exceptions_include_stop_sequences: Optional[bool]=None
+    best_of: Optional[int]=None
+    n: Optional[int]=None
+    logit_bias: Optional[dict]=None
+    log_probs: Optional[int]=None
+    stop_sequences: Optional[list]=None
+    tokens: Optional[bool]=None
+    raw_completion: Optional[bool]=None
+    disable_optimizations: Optional[bool]=None
+    completion_bias_inclusion: Optional[list]=None
+    completion_bias_exclusion: Optional[list]=None
+    completion_bias_inclusion_first_token_only: Optional[bool]=None
+    completion_bias_exclusion_first_token_only: Optional[bool]=None
+    contextual_control_threshold: Optional[int]=None
+    control_log_additive: Optional[bool]=None
+    def __init__(self,
+                 maximum_tokens: Optional[int]=None,
+                 minimum_tokens: Optional[int]=None,
+                 echo: Optional[bool]=None,
+                 temperature: Optional[int]=None,
+                 top_k: Optional[int]=None,
+                 top_p: Optional[int]=None,
+                 presence_penalty: Optional[int]=None,
+                 frequency_penalty: Optional[int]=None,
+                 sequence_penalty: Optional[int]=None,
+                 sequence_penalty_min_length: Optional[int]=None,
+                 repetition_penalties_include_prompt: Optional[bool]=None,
+                 repetition_penalties_include_completion: Optional[bool]=None,
+                 use_multiplicative_presence_penalty: Optional[bool]=None,
+                 use_multiplicative_frequency_penalty: Optional[bool]=None,
+                 use_multiplicative_sequence_penalty: Optional[bool]=None,
+                 penalty_bias: Optional[str]=None,
+                 penalty_exceptions_include_stop_sequences: Optional[bool]=None,
+                 best_of: Optional[int]=None,
+                 n: Optional[int]=None,
+                 logit_bias: Optional[dict]=None,
+                 log_probs: Optional[int]=None,
+                 stop_sequences: Optional[list]=None,
+                 tokens: Optional[bool]=None,
+                 raw_completion: Optional[bool]=None,
+                 disable_optimizations: Optional[bool]=None,
+                 completion_bias_inclusion: Optional[list]=None,
+                 completion_bias_exclusion: Optional[list]=None,
+                 completion_bias_inclusion_first_token_only: Optional[bool]=None,
+                 completion_bias_exclusion_first_token_only: Optional[bool]=None,
+                 contextual_control_threshold: Optional[int]=None,
+                 control_log_additive: Optional[bool]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.AlephAlphaConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    completion_url = api_base
+    model = model
+    prompt = ""
+    if "control" in model:  # follow the ###Instruction / ###Response format
+        for idx, message in enumerate(messages):
+            if "role" in message:
+                if idx == 0:  # set first message as instruction (required), let later user messages be input
+                    prompt += f"###Instruction: {message['content']}"
+                else:
+                    if message["role"] == "system":
+                        prompt += (
+                            f"###Instruction: {message['content']}"
+                        )
+                    elif message["role"] == "user":
+                        prompt += (
+                            f"###Input: {message['content']}"
+                        )
+                    else:
+                        prompt += (
+                            f"###Response: {message['content']}"
+                        )
+            else:
+                prompt += f"{message['content']}"
+    else:
+        prompt = " ".join(message["content"] for message in messages)
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise AlephAlphaError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                choices_list = []
+                for idx, item in enumerate(completion_response["completions"]):
+                    if len(item["completion"]) > 0:
+                        message_obj = Message(content=item["completion"])
+                    else:
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
+            except:
+                raise AlephAlphaError(message=json.dumps(completion_response), status_code=response.status_code)
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"]["content"])
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/anthropic.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Usage
+import litellm
+from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman: "
+    AI_PROMPT = "\n\nAssistant: "
+class AnthropicError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.anthropic.com/v1/complete")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AnthropicConfig():
+    """
+    Reference: https://docs.anthropic.com/claude/reference/complete_post
+    to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
+    """
+    max_tokens_to_sample: Optional[int]=litellm.max_tokens # anthropic requires a default
+    stop_sequences: Optional[list]=None
+    temperature: Optional[int]=None
+    top_p: Optional[int]=None
+    top_k: Optional[int]=None
+    metadata: Optional[dict]=None
+    def __init__(self,
+                 max_tokens_to_sample: Optional[int]=256, # anthropic requires a default
+                 stop_sequences: Optional[list]=None,
+                 temperature: Optional[int]=None,
+                 top_p: Optional[int]=None,
+                 top_k: Optional[int]=None,
+                 metadata: Optional[dict]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+# makes headers for API call
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "anthropic-version": "2023-06-01",
+        "content-type": "application/json",
+        "x-api-key": api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    custom_prompt_dict: dict,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"],
+                initial_prompt_value=model_prompt_details["initial_prompt_value"],
+                final_prompt_value=model_prompt_details["final_prompt_value"],
+                messages=messages
+            )
+    else:
+        prompt = prompt_factory(model=model, messages=messages, custom_llm_provider="anthropic")
+    ## Load Config
+    config = litellm.AnthropicConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data, "api_base": api_base},
+    )
+    ## COMPLETION CALL
+    if "stream" in optional_params and optional_params["stream"] == True:
+        response = requests.post(
+            api_base,
+            headers=headers,
+            data=json.dumps(data),
+            stream=optional_params["stream"],
+        )
+        if response.status_code != 200:
+            raise AnthropicError(status_code=response.status_code, message=response.text)
+        return response.iter_lines()
+    else:
+        response = requests.post(
+            api_base, headers=headers, data=json.dumps(data)
+        )
+        if response.status_code != 200:
+            raise AnthropicError(status_code=response.status_code, message=response.text)
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key=api_key,
+            original_response=response.text,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        try:
+            completion_response = response.json()
+        except:
+            raise AnthropicError(
+                message=response.text, status_code=response.status_code
+            )
+        if "error" in completion_response:
+            raise AnthropicError(
+                message=str(completion_response["error"]),
+                status_code=response.status_code,
+            )
+        else:
+            if len(completion_response["completion"]) > 0:
+                model_response["choices"][0]["message"]["content"] = completion_response[
+                    "completion"
+                ]
+            model_response.choices[0].finish_reason = completion_response["stop_reason"]
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )  ##[TODO] use the anthropic tokenizer here
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )  ##[TODO] use the anthropic tokenizer here
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/azure.py ADDED Viewed

	@@ -0,0 +1,414 @@

+from typing import Optional, Union, Any
+import types, requests
+from .base import BaseLLM
+from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, convert_to_model_response_object
+from typing import Callable, Optional
+from litellm import OpenAIConfig
+import litellm, json
+import httpx
+from openai import AzureOpenAI, AsyncAzureOpenAI
+class AzureOpenAIError(Exception):
+    def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
+        self.status_code = status_code
+        self.message = message
+        if request:
+            self.request = request
+        else:
+            self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        if response:
+            self.response = response
+        else:
+            self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AzureOpenAIConfig(OpenAIConfig):
+    """
+    Reference: https://platform.openai.com/docs/api-reference/chat/create
+    The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
+    - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
+    - `function_call` (string or object): This optional parameter controls how the model calls functions.
+    - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
+    - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
+    - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
+    - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
+    - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+    - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
+    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+    """
+    def __init__(self,
+                 frequency_penalty: Optional[int] = None,
+                 function_call: Optional[Union[str, dict]]= None,
+                 functions: Optional[list]= None,
+                 logit_bias: Optional[dict]= None,
+                 max_tokens: Optional[int]= None,
+                 n: Optional[int]= None,
+                 presence_penalty: Optional[int]= None,
+                 stop: Optional[Union[str,list]]=None,
+                 temperature: Optional[int]= None,
+                 top_p: Optional[int]= None) -> None:
+        super().__init__(frequency_penalty,
+                         function_call,
+                         functions,
+                         logit_bias,
+                         max_tokens,
+                         n,
+                         presence_penalty,
+                         stop,
+                         temperature,
+                         top_p)
+class AzureChatCompletion(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+    def validate_environment(self, api_key, azure_ad_token):
+        headers = {
+            "content-type": "application/json",
+        }
+        if api_key is not None:
+            headers["api-key"] = api_key
+        elif azure_ad_token is not None:
+            headers["Authorization"] = f"Bearer {azure_ad_token}"
+        return headers
+    def completion(self,
+               model: str,
+               messages: list,
+               model_response: ModelResponse,
+               api_key: str,
+               api_base: str,
+               api_version: str,
+               api_type: str,
+               azure_ad_token: str,
+               print_verbose: Callable,
+               timeout,
+               logging_obj,
+               optional_params,
+               litellm_params,
+               logger_fn,
+               acompletion: bool = False,
+               headers: Optional[dict]=None,
+               client = None,
+               ):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            if model is None or messages is None:
+                raise AzureOpenAIError(status_code=422, message=f"Missing model or messages")
+            max_retries = optional_params.pop("max_retries", 2)
+            ### CHECK IF CLOUDFLARE AI GATEWAY ###
+            ### if so - set the model as part of the base url
+            if "gateway.ai.cloudflare.com" in api_base:
+                ## build base url - assume api base includes resource name
+                if client is None:
+                    if not api_base.endswith("/"):
+                        api_base += "/"
+                    api_base += f"{model}"
+                    azure_client_params = {
+                        "api_version": api_version,
+                        "base_url": f"{api_base}",
+                        "http_client": litellm.client_session,
+                        "max_retries": max_retries,
+                        "timeout": timeout
+                    }
+                    if api_key is not None:
+                        azure_client_params["api_key"] = api_key
+                    elif azure_ad_token is not None:
+                        azure_client_params["azure_ad_token"] = azure_ad_token
+                    if acompletion is True:
+                        client = AsyncAzureOpenAI(**azure_client_params)
+                    else:
+                        client = AzureOpenAI(**azure_client_params)
+                data = {
+                    "model": None,
+                    "messages": messages,
+                    **optional_params
+                }
+            else:
+                data = {
+                    "model": model, # type: ignore
+                    "messages": messages,
+                    **optional_params
+                }
+            ## LOGGING
+            logging_obj.pre_call(
+                input=messages,
+                api_key=api_key,
+                additional_args={
+                    "headers": {
+                        "api_key": api_key,
+                        "azure_ad_token": azure_ad_token
+                    },
+                    "api_version": api_version,
+                    "api_base": api_base,
+                    "complete_input_dict": data,
+                },
+            )
+            if acompletion is True:
+                if optional_params.get("stream", False):
+                    return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
+                else:
+                    return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
+            elif "stream" in optional_params and optional_params["stream"] == True:
+                return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
+            else:
+                if not isinstance(max_retries, int):
+                    raise AzureOpenAIError(status_code=422, message="max retries must be an int")
+                # init AzureOpenAI Client
+                azure_client_params = {
+                    "api_version": api_version,
+                    "azure_endpoint": api_base,
+                    "azure_deployment": model,
+                    "http_client": litellm.client_session,
+                    "max_retries": max_retries,
+                    "timeout": timeout
+                }
+                if api_key is not None:
+                    azure_client_params["api_key"] = api_key
+                elif azure_ad_token is not None:
+                    azure_client_params["azure_ad_token"] = azure_ad_token
+                if client is None:
+                    azure_client = AzureOpenAI(**azure_client_params)
+                else:
+                    azure_client = client
+                response = azure_client.chat.completions.create(**data) # type: ignore
+                response.model = "azure/" + str(response.model)
+                return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            raise e
+    async def acompletion(self,
+                          api_key: str,
+                          api_version: str,
+                          model: str,
+                          api_base: str,
+                          data: dict,
+                          timeout: Any,
+                          model_response: ModelResponse,
+                          azure_ad_token: Optional[str]=None,
+                          client = None, # this is the AsyncAzureOpenAI
+                          ):
+       response = None
+       try:
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise AzureOpenAIError(status_code=422, message="max retries must be an int")
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "http_client": litellm.client_session,
+                "max_retries": max_retries,
+                "timeout": timeout
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            if client is None:
+                azure_client = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                azure_client = client
+            response = await azure_client.chat.completions.create(**data)
+            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
+       except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+       except Exception as e:
+            raise e
+    def streaming(self,
+                  logging_obj,
+                  api_base: str,
+                  api_key: str,
+                  api_version: str,
+                  data: dict,
+                  model: str,
+                  timeout: Any,
+                  azure_ad_token: Optional[str]=None,
+                  client=None,
+    ):
+        max_retries = data.pop("max_retries", 2)
+        if not isinstance(max_retries, int):
+            raise AzureOpenAIError(status_code=422, message="max retries must be an int")
+        # init AzureOpenAI Client
+        azure_client_params = {
+            "api_version": api_version,
+            "azure_endpoint": api_base,
+            "azure_deployment": model,
+            "http_client": litellm.client_session,
+            "max_retries": max_retries,
+            "timeout": timeout
+        }
+        if api_key is not None:
+            azure_client_params["api_key"] = api_key
+        elif azure_ad_token is not None:
+            azure_client_params["azure_ad_token"] = azure_ad_token
+        if client is None:
+            azure_client = AzureOpenAI(**azure_client_params)
+        else:
+            azure_client = client
+        response = azure_client.chat.completions.create(**data)
+        streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
+        return streamwrapper
+    async def async_streaming(self,
+                          logging_obj,
+                          api_base: str,
+                          api_key: str,
+                          api_version: str,
+                          data: dict,
+                          model: str,
+                          timeout: Any,
+                          azure_ad_token: Optional[str]=None,
+                          client = None,
+                          ):
+        # init AzureOpenAI Client
+        azure_client_params = {
+            "api_version": api_version,
+            "azure_endpoint": api_base,
+            "azure_deployment": model,
+            "http_client": litellm.client_session,
+            "max_retries": data.pop("max_retries", 2),
+            "timeout": timeout
+        }
+        if api_key is not None:
+            azure_client_params["api_key"] = api_key
+        elif azure_ad_token is not None:
+            azure_client_params["azure_ad_token"] = azure_ad_token
+        if client is None:
+                azure_client = AsyncAzureOpenAI(**azure_client_params)
+        else:
+            azure_client = client
+        response = await azure_client.chat.completions.create(**data)
+        streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
+        async for transformed_chunk in streamwrapper:
+            yield transformed_chunk
+    async def aembedding(
+        self,
+        data: dict,
+        model_response: ModelResponse,
+        azure_client_params: dict,
+        client=None,
+    ):
+        response = None
+        try:
+            if client is None:
+                openai_aclient = AsyncAzureOpenAI(**azure_client_params)
+            else:
+                openai_aclient = client
+            response = await openai_aclient.embeddings.create(**data)
+            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding")
+        except Exception as e:
+            raise e
+    def embedding(self,
+                model: str,
+                input: list,
+                api_key: str,
+                api_base: str,
+                api_version: str,
+                timeout: float,
+                logging_obj=None,
+                model_response=None,
+                optional_params=None,
+                azure_ad_token: Optional[str]=None,
+                client = None,
+                aembedding=None,
+                ):
+        super().embedding()
+        exception_mapping_worked = False
+        if self._client_session is None:
+            self._client_session = self.create_client_session()
+        try:
+            data = {
+                "model": model,
+                "input": input,
+                **optional_params
+            }
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise AzureOpenAIError(status_code=422, message="max retries must be an int")
+            # init AzureOpenAI Client
+            azure_client_params = {
+                "api_version": api_version,
+                "azure_endpoint": api_base,
+                "azure_deployment": model,
+                "http_client": litellm.client_session,
+                "max_retries": max_retries,
+                "timeout": timeout
+            }
+            if api_key is not None:
+                azure_client_params["api_key"] = api_key
+            elif azure_ad_token is not None:
+                azure_client_params["azure_ad_token"] = azure_ad_token
+            if aembedding == True:
+                response =  self.aembedding(data=data, model_response=model_response, azure_client_params=azure_client_params)
+                return response
+            if client is None:
+                azure_client = AzureOpenAI(**azure_client_params) # type: ignore
+            else:
+                azure_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                    input=input,
+                    api_key=api_key,
+                    additional_args={
+                        "complete_input_dict": data,
+                        "headers": {
+                            "api_key": api_key,
+                            "azure_ad_token": azure_ad_token
+                        }
+                    },
+                )
+            ## COMPLETION CALL
+            response = azure_client.embeddings.create(**data) # type: ignore
+            ## LOGGING
+            logging_obj.post_call(
+                    input=input,
+                    api_key=api_key,
+                    additional_args={"complete_input_dict": data, "api_base": api_base},
+                    original_response=response,
+                )
+            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
+        except AzureOpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if exception_mapping_worked:
+                raise e
+            else:
+                import traceback
+                raise AzureOpenAIError(status_code=500, message=traceback.format_exc())

litellm/llms/base.py ADDED Viewed

	@@ -0,0 +1,47 @@

+## This is a template base class to be used for adding new LLM providers via API calls
+import litellm
+import httpx, certifi, ssl
+from typing import Optional
+class BaseLLM:
+    _client_session: Optional[httpx.Client] = None
+    def create_client_session(self):
+        if litellm.client_session:
+            _client_session = litellm.client_session
+        else:
+            _client_session = httpx.Client()
+        return _client_session
+    def create_aclient_session(self):
+        if litellm.aclient_session:
+            _aclient_session = litellm.aclient_session
+        else:
+            _aclient_session = httpx.AsyncClient()
+        return _aclient_session
+    def __exit__(self):
+        if hasattr(self, '_client_session'):
+            self._client_session.close()
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if hasattr(self, '_aclient_session'):
+            await self._aclient_session.aclose()
+    def validate_environment(self):  # set up the environment required to run the model
+        pass
+    def completion(
+        self,
+        *args,
+        **kwargs
+    ):  # logic for parsing in - calling - parsing out model completion calls
+        pass
+    def embedding(
+        self,
+        *args,
+        **kwargs
+    ):  # logic for parsing in - calling - parsing out model embedding calls
+        pass

litellm/llms/baseten.py ADDED Viewed

	@@ -0,0 +1,149 @@

+import os
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable
+from litellm.utils import ModelResponse, Usage
+class BasetenError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Api-Key {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url_fragment_1 = "https://app.baseten.co/models/"
+    completion_url_fragment_2 = "/predict"
+    model = model
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += f"{message['content']}"
+            else:
+                prompt += f"{message['content']}"
+        else:
+            prompt += f"{message['content']}"
+    data = {
+        "inputs": prompt,
+        "prompt": prompt,
+        "parameters": optional_params,
+        "stream": True if "stream" in optional_params and optional_params["stream"] == True else False
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url_fragment_1 + model + completion_url_fragment_2,
+        headers=headers,
+        data=json.dumps(data),
+        stream=True if "stream" in optional_params and optional_params["stream"] == True else False
+    )
+    if 'text/event-stream' in response.headers['Content-Type'] or ("stream" in optional_params and optional_params["stream"] == True):
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise BasetenError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            if "model_output" in completion_response:
+                if (
+                    isinstance(completion_response["model_output"], dict)
+                    and "data" in completion_response["model_output"]
+                    and isinstance(
+                        completion_response["model_output"]["data"], list
+                    )
+                ):
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["model_output"]["data"][0]
+                elif isinstance(completion_response["model_output"], str):
+                    model_response["choices"][0]["message"][
+                        "content"
+                    ] = completion_response["model_output"]
+            elif "completion" in completion_response and isinstance(
+                completion_response["completion"], str
+            ):
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["completion"]
+            elif isinstance(completion_response, list) and len(completion_response) > 0:
+                if "generated_text" not in completion_response:
+                    raise BasetenError(
+                        message=f"Unable to parse response. Original response: {response.text}",
+                        status_code=response.status_code
+                    )
+                model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
+                ## GETTING LOGPROBS
+                if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
+                    model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
+                    sum_logprob = 0
+                    for token in completion_response[0]["details"]["tokens"]:
+                        sum_logprob += token["logprob"]
+                    model_response["choices"][0]["message"]._logprobs = sum_logprob
+            else:
+                raise BasetenError(
+                    message=f"Unable to parse response. Original response: {response.text}",
+                    status_code=response.status_code
+                )
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"]["content"])
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/bedrock.py ADDED Viewed

	@@ -0,0 +1,627 @@

+import json, copy, types
+import os
+from enum import Enum
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, get_secret, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+import httpx
+class BedrockError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class AmazonTitanConfig():
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
+    Supported Params for the Amazon Titan models:
+    - `maxTokenCount` (integer) max tokens,
+    - `stopSequences` (string[]) list of stop sequence strings
+    - `temperature` (float) temperature for model,
+    - `topP` (int) top p for model
+    """
+    maxTokenCount: Optional[int]=None
+    stopSequences: Optional[list]=None
+    temperature: Optional[float]=None
+    topP: Optional[int]=None
+    def __init__(self,
+                 maxTokenCount: Optional[int]=None,
+                 stopSequences: Optional[list]=None,
+                 temperature: Optional[float]=None,
+                 topP: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class AmazonAnthropicConfig():
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
+    Supported Params for the Amazon / Anthropic models:
+    - `max_tokens_to_sample` (integer) max tokens,
+    - `temperature` (float) model temperature,
+    - `top_k` (integer) top k,
+    - `top_p` (integer) top p,
+    - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
+    - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
+    """
+    max_tokens_to_sample: Optional[int]=litellm.max_tokens
+    stop_sequences: Optional[list]=None
+    temperature: Optional[float]=None
+    top_k: Optional[int]=None
+    top_p: Optional[int]=None
+    anthropic_version: Optional[str]=None
+    def __init__(self,
+                 max_tokens_to_sample: Optional[int]=None,
+                 stop_sequences: Optional[list]=None,
+                 temperature: Optional[float]=None,
+                 top_k: Optional[int]=None,
+                 top_p: Optional[int]=None,
+                 anthropic_version: Optional[str]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class AmazonCohereConfig():
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
+    Supported Params for the Amazon / Cohere models:
+    - `max_tokens` (integer) max tokens,
+    - `temperature` (float) model temperature,
+    - `return_likelihood` (string) n/a
+    """
+    max_tokens: Optional[int]=None
+    temperature: Optional[float]=None
+    return_likelihood: Optional[str]=None
+    def __init__(self,
+                 max_tokens: Optional[int]=None,
+                 temperature: Optional[float]=None,
+                 return_likelihood: Optional[str]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class AmazonAI21Config():
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
+    Supported Params for the Amazon / AI21 models:
+    - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
+    - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
+    - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
+    - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
+    - `frequencyPenalty` (object): Placeholder for frequency penalty object.
+    - `presencePenalty` (object): Placeholder for presence penalty object.
+    - `countPenalty` (object): Placeholder for count penalty object.
+    """
+    maxTokens: Optional[int]=None
+    temperature: Optional[float]=None
+    topP: Optional[float]=None
+    stopSequences: Optional[list]=None
+    frequencePenalty: Optional[dict]=None
+    presencePenalty: Optional[dict]=None
+    countPenalty: Optional[dict]=None
+    def __init__(self,
+                 maxTokens: Optional[int]=None,
+                 temperature: Optional[float]=None,
+                 topP: Optional[float]=None,
+                 stopSequences: Optional[list]=None,
+                 frequencePenalty: Optional[dict]=None,
+                 presencePenalty: Optional[dict]=None,
+                 countPenalty: Optional[dict]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class AnthropicConstants(Enum):
+    HUMAN_PROMPT = "\n\nHuman: "
+    AI_PROMPT = "\n\nAssistant: "
+class AmazonLlamaConfig():
+    """
+    Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
+    Supported Params for the Amazon / Meta Llama models:
+    - `max_gen_len` (integer) max tokens,
+    - `temperature` (float) temperature for model,
+    - `top_p` (float) top p for model
+    """
+    max_gen_len: Optional[int]=None
+    temperature: Optional[float]=None
+    topP: Optional[float]=None
+    def __init__(self,
+                 maxTokenCount: Optional[int]=None,
+                 temperature: Optional[float]=None,
+                 topP: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def init_bedrock_client(
+        region_name = None,
+        aws_access_key_id = None,
+        aws_secret_access_key = None,
+        aws_region_name=None,
+        aws_bedrock_runtime_endpoint=None,
+    ):
+    # check for custom AWS_REGION_NAME and use it if not passed to init_bedrock_client
+    litellm_aws_region_name = get_secret("AWS_REGION_NAME")
+    standard_aws_region_name = get_secret("AWS_REGION")
+    if region_name:
+        pass
+    elif aws_region_name:
+        region_name = aws_region_name
+    elif litellm_aws_region_name:
+        region_name = litellm_aws_region_name
+    elif standard_aws_region_name:
+        region_name = standard_aws_region_name
+    else:
+        raise BedrockError(message="AWS region not set: set AWS_REGION_NAME or AWS_REGION env variable or in .env file", status_code=401)
+    # check for custom AWS_BEDROCK_RUNTIME_ENDPOINT and use it if not passed to init_bedrock_client
+    env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
+    if aws_bedrock_runtime_endpoint:
+        endpoint_url = aws_bedrock_runtime_endpoint
+    elif env_aws_bedrock_runtime_endpoint:
+        endpoint_url = env_aws_bedrock_runtime_endpoint
+    else:
+        endpoint_url = f'https://bedrock-runtime.{region_name}.amazonaws.com'
+    import boto3
+    if aws_access_key_id != None:
+        # uses auth params passed to completion
+        # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
+        client = boto3.client(
+            service_name="bedrock-runtime",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            region_name=region_name,
+            endpoint_url=endpoint_url,
+        )
+    else:
+        # aws_access_key_id is None, assume user is trying to auth using env variables
+        # boto3 automatically reads env variables
+        client = boto3.client(
+            service_name="bedrock-runtime",
+            region_name=region_name,
+            endpoint_url=endpoint_url,
+        )
+    return client
+def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
+    # handle anthropic prompts using anthropic constants
+    if provider == "anthropic":
+        if model in custom_prompt_dict:
+            # check if the model has a registered custom prompt
+            model_prompt_details = custom_prompt_dict[model]
+            prompt = custom_prompt(
+                role_dict=model_prompt_details["roles"],
+                initial_prompt_value=model_prompt_details["initial_prompt_value"],
+                final_prompt_value=model_prompt_details["final_prompt_value"],
+                messages=messages
+            )
+        else:
+            prompt = prompt_factory(model=model, messages=messages, custom_llm_provider="anthropic")
+    else:
+        prompt = ""
+        for message in messages:
+            if "role" in message:
+                if message["role"] == "user":
+                    prompt += (
+                        f"{message['content']}"
+                    )
+                else:
+                    prompt += (
+                        f"{message['content']}"
+                    )
+            else:
+                prompt += f"{message['content']}"
+    return prompt
+"""
+BEDROCK AUTH Keys/Vars
+os.environ['AWS_ACCESS_KEY_ID'] = ""
+os.environ['AWS_SECRET_ACCESS_KEY'] = ""
+"""
+# set os.environ['AWS_REGION_NAME'] = <your-region_name>
+def completion(
+        model: str,
+        messages: list,
+        custom_prompt_dict: dict,
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        encoding,
+        logging_obj,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+):
+    exception_mapping_worked = False
+    try:
+        # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+        aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+        aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+        aws_region_name = optional_params.pop("aws_region_name", None)
+        # use passed in BedrockRuntime.Client if provided, otherwise create a new one
+        client = optional_params.pop(
+            "aws_bedrock_client",
+            # only pass variables that are not None
+            init_bedrock_client(
+                aws_access_key_id=aws_access_key_id,
+                aws_secret_access_key=aws_secret_access_key,
+                aws_region_name=aws_region_name,
+            ),
+        )
+        model = model
+        provider = model.split(".")[0]
+        prompt = convert_messages_to_prompt(model, messages, provider, custom_prompt_dict)
+        inference_params = copy.deepcopy(optional_params)
+        stream = inference_params.pop("stream", False)
+        if provider == "anthropic":
+            ## LOAD CONFIG
+            config = litellm.AmazonAnthropicConfig.get_config()
+            for k, v in config.items():
+                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({
+                "prompt": prompt,
+                **inference_params
+            })
+        elif provider == "ai21":
+            ## LOAD CONFIG
+            config = litellm.AmazonAI21Config.get_config()
+            for k, v in config.items():
+                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({
+                "prompt": prompt,
+                **inference_params
+            })
+        elif provider == "cohere":
+            ## LOAD CONFIG
+            config = litellm.AmazonCohereConfig.get_config()
+            for k, v in config.items():
+                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            if optional_params.get("stream", False) == True:
+                inference_params["stream"] = True # cohere requires stream = True in inference params
+            data = json.dumps({
+                "prompt": prompt,
+                **inference_params
+            })
+        elif provider == "meta":
+            ## LOAD CONFIG
+            config = litellm.AmazonLlamaConfig.get_config()
+            for k, v in config.items():
+                if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({
+                "prompt": prompt,
+                **inference_params
+            })
+        elif provider == "amazon":  # amazon titan
+            ## LOAD CONFIG
+            config = litellm.AmazonTitanConfig.get_config()
+            for k, v in config.items():
+                if k not in inference_params: # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
+                    inference_params[k] = v
+            data = json.dumps({
+                "inputText": prompt,
+                "textGenerationConfig": inference_params,
+            })
+        ## COMPLETION CALL
+        accept = 'application/json'
+        contentType = 'application/json'
+        if stream == True:
+            if provider == "ai21":
+                ## LOGGING
+                request_str = f"""
+                response = client.invoke_model(
+                    body={data},
+                    modelId={model},
+                    accept=accept,
+                    contentType=contentType
+                )
+                """
+                logging_obj.pre_call(
+                    input=prompt,
+                    api_key="",
+                    additional_args={"complete_input_dict": data, "request_str": request_str},
+                )
+                response = client.invoke_model(
+                    body=data,
+                    modelId=model,
+                    accept=accept,
+                    contentType=contentType
+                )
+                response = response.get('body').read()
+                return response
+            else:
+                ## LOGGING
+                request_str = f"""
+                response = client.invoke_model_with_response_stream(
+                    body={data},
+                    modelId={model},
+                    accept=accept,
+                    contentType=contentType
+                )
+                """
+                logging_obj.pre_call(
+                        input=prompt,
+                        api_key="",
+                        additional_args={"complete_input_dict": data, "request_str": request_str},
+                )
+                response = client.invoke_model_with_response_stream(
+                    body=data,
+                    modelId=model,
+                    accept=accept,
+                    contentType=contentType
+                )
+                response = response.get('body')
+                return response
+        try:
+            ## LOGGING
+            request_str = f"""
+            response = client.invoke_model(
+                body={data},
+                modelId={model},
+                accept=accept,
+                contentType=contentType
+            )
+            """
+            logging_obj.pre_call(
+                    input=prompt,
+                    api_key="",
+                    additional_args={"complete_input_dict": data, "request_str": request_str},
+                )
+            response = client.invoke_model(
+                body=data,
+                modelId=model,
+                accept=accept,
+                contentType=contentType
+            )
+        except Exception as e:
+            raise BedrockError(status_code=500, message=str(e))
+        response_body = json.loads(response.get('body').read())
+        ## LOGGING
+        logging_obj.post_call(
+            input=prompt,
+            api_key="",
+            original_response=response_body,
+            additional_args={"complete_input_dict": data},
+        )
+        print_verbose(f"raw model_response: {response}")
+        ## RESPONSE OBJECT
+        outputText = "default"
+        if provider == "ai21":
+            outputText = response_body.get('completions')[0].get('data').get('text')
+        elif provider == "anthropic":
+            outputText = response_body['completion']
+            model_response["finish_reason"] = response_body["stop_reason"]
+        elif provider == "cohere":
+            outputText = response_body["generations"][0]["text"]
+        elif provider == "meta":
+            outputText = response_body["generation"]
+        else:  # amazon titan
+            outputText = response_body.get('results')[0].get('outputText')
+        response_metadata = response.get("ResponseMetadata", {})
+        if response_metadata.get("HTTPStatusCode", 500) >= 400:
+            raise BedrockError(
+                message=outputText,
+                status_code=response_metadata.get("HTTPStatusCode", 500),
+            )
+        else:
+            try:
+                if len(outputText) > 0:
+                    model_response["choices"][0]["message"]["content"] = outputText
+            except:
+                raise BedrockError(message=json.dumps(outputText), status_code=response_metadata.get("HTTPStatusCode", 500))
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens = prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+    except BedrockError as e:
+        exception_mapping_worked = True
+        raise e
+    except Exception as e:
+        if exception_mapping_worked:
+            raise e
+        else:
+            import traceback
+            raise BedrockError(status_code=500, message=traceback.format_exc())
+def _embedding_func_single(
+        model: str,
+        input: str,
+        optional_params=None,
+        encoding=None,
+):
+    # logic for parsing in - calling - parsing out model embedding calls
+    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+    aws_region_name = optional_params.pop("aws_region_name", None)
+    # use passed in BedrockRuntime.Client if provided, otherwise create a new one
+    client = optional_params.pop(
+        "aws_bedrock_client",
+        # only pass variables that are not None
+        init_bedrock_client(
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            aws_region_name=aws_region_name,
+        ),
+    )
+    input = input.replace(os.linesep, " ")
+    body = json.dumps({"inputText": input})
+    try:
+        response = client.invoke_model(
+            body=body,
+            modelId=model,
+            accept="application/json",
+            contentType="application/json",
+        )
+        response_body = json.loads(response.get("body").read())
+        return response_body.get("embedding")
+    except Exception as e:
+        raise BedrockError(message=f"Embedding Error with model {model}: {e}", status_code=500)
+def embedding(
+    model: str,
+    input: list,
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    optional_params=None,
+    encoding=None,
+):
+    ## LOGGING
+    logging_obj.pre_call(
+        input=input,
+        api_key=api_key,
+        additional_args={"complete_input_dict": {"model": model,
+                                                 "texts": input}},
+    )
+    ## Embedding Call
+    embeddings = [_embedding_func_single(model, i, optional_params) for i in input]
+    ## Populate OpenAI compliant dictionary
+    embedding_response = []
+    for idx, embedding in enumerate(embeddings):
+        embedding_response.append(
+            {
+                "object": "embedding",
+                "index": idx,
+                "embedding": embedding,
+            }
+        )
+    model_response["object"] = "list"
+    model_response["data"] = embedding_response
+    model_response["model"] = model
+    input_tokens = 0
+    input_str = "".join(input)
+    input_tokens+=len(encoding.encode(input_str))
+    usage = Usage(
+            prompt_tokens=input_tokens,
+            completion_tokens=0,
+            total_tokens=input_tokens + 0
+    )
+    model_response.usage = usage
+    ## LOGGING
+    logging_obj.post_call(
+        input=input,
+        api_key=api_key,
+        additional_args={"complete_input_dict": {"model": model,
+                                                 "texts": input}},
+        original_response=embeddings,
+    )
+    return model_response

litellm/llms/cohere.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time, traceback
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import litellm
+import httpx
+class CohereError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.cohere.ai/v1/generate")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class CohereConfig():
+    """
+    Reference: https://docs.cohere.com/reference/generate
+    The class `CohereConfig` provides configuration for the Cohere's API interface. Below are the parameters:
+    - `num_generations` (integer): Maximum number of generations returned. Default is 1, with a minimum value of 1 and a maximum value of 5.
+    - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default value is 20.
+    - `truncate` (string): Specifies how the API handles inputs longer than maximum token length. Options include NONE, START, END. Default is END.
+    - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.75.
+    - `preset` (string): Identifier of a custom preset, a combination of parameters such as prompt, temperature etc.
+    - `end_sequences` (array of strings): The generated text gets cut at the beginning of the earliest occurrence of an end sequence, which will be excluded from the text.
+    - `stop_sequences` (array of strings): The generated text gets cut at the end of the earliest occurrence of a stop sequence, which will be included in the text.
+    - `k` (integer): Limits generation at each step to top `k` most likely tokens. Default is 0.
+    - `p` (number): Limits generation at each step to most likely tokens with total probability mass of `p`. Default is 0.
+    - `frequency_penalty` (number): Reduces repetitiveness of generated tokens. Higher values apply stronger penalties to previously occurred tokens.
+    - `presence_penalty` (number): Reduces repetitiveness of generated tokens. Similar to frequency_penalty, but this penalty applies equally to all tokens that have already appeared.
+    - `return_likelihoods` (string): Specifies how and if token likelihoods are returned with the response. Options include GENERATION, ALL and NONE.
+    - `logit_bias` (object): Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. e.g. {"hello_world": 1233}
+    """
+    num_generations: Optional[int]=None
+    max_tokens: Optional[int]=None
+    truncate: Optional[str]=None
+    temperature: Optional[int]=None
+    preset: Optional[str]=None
+    end_sequences: Optional[list]=None
+    stop_sequences: Optional[list]=None
+    k: Optional[int]=None
+    p: Optional[int]=None
+    frequency_penalty: Optional[int]=None
+    presence_penalty: Optional[int]=None
+    return_likelihoods: Optional[str]=None
+    logit_bias: Optional[dict]=None
+    def __init__(self,
+                 num_generations: Optional[int]=None,
+                 max_tokens: Optional[int]=None,
+                 truncate: Optional[str]=None,
+                 temperature: Optional[int]=None,
+                 preset: Optional[str]=None,
+                 end_sequences: Optional[list]=None,
+                 stop_sequences: Optional[list]=None,
+                 k: Optional[int]=None,
+                 p: Optional[int]=None,
+                 frequency_penalty: Optional[int]=None,
+                 presence_penalty: Optional[int]=None,
+                 return_likelihoods: Optional[str]=None,
+                 logit_bias: Optional[dict]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Bearer {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url = api_base
+    model = model
+    prompt = " ".join(message["content"] for message in messages)
+    ## Load Config
+    config=litellm.CohereConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data, "headers": headers, "api_base": completion_url},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+    )
+    ## error handling for cohere calls
+    if response.status_code!=200:
+        raise CohereError(message=response.text, status_code=response.status_code)
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise CohereError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                choices_list = []
+                for idx, item in enumerate(completion_response["generations"]):
+                    if len(item["text"]) > 0:
+                        message_obj = Message(content=item["text"])
+                    else:
+                        message_obj = Message(content=None)
+                    choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
+                    choices_list.append(choice_obj)
+                model_response["choices"] = choices_list
+            except Exception as e:
+                raise CohereError(message=response.text, status_code=response.status_code)
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding(
+    model: str,
+    input: list,
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+    optional_params=None,
+):
+    headers = validate_environment(api_key)
+    embed_url = "https://api.cohere.ai/v1/embed"
+    model = model
+    data = {
+        "model": model,
+        "texts": input,
+        **optional_params
+    }
+    if "3" in model and "input_type" not in data:
+        # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
+        data["input_type"] = "search_document"
+    ## LOGGING
+    logging_obj.pre_call(
+            input=input,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        embed_url, headers=headers, data=json.dumps(data)
+    )
+    ## LOGGING
+    logging_obj.post_call(
+            input=input,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=response,
+        )
+    """
+        response
+        {
+            'object': "list",
+            'data': [
+            ]
+            'model',
+            'usage'
+        }
+    """
+    if response.status_code!=200:
+        raise CohereError(message=response.text, status_code=response.status_code)
+    embeddings = response.json()['embeddings']
+    output_data = []
+    for idx, embedding in enumerate(embeddings):
+        output_data.append(
+            {
+                "object": "embedding",
+                "index": idx,
+                "embedding": embedding
+            }
+        )
+    model_response["object"] = "list"
+    model_response["data"] = output_data
+    model_response["model"] = model
+    input_tokens = 0
+    for text in input:
+        input_tokens+=len(encoding.encode(text))
+    model_response["usage"] = {
+        "prompt_tokens": input_tokens,
+        "total_tokens": input_tokens,
+    }
+    return model_response

litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt ADDED Viewed

	@@ -0,0 +1,2523 @@

+0xDEADBEA7/DialoGPT-small-rick
+1Basco/DialoGPT-small-jake
+2early4coffee/DialoGPT-medium-deadpool
+2early4coffee/DialoGPT-small-deadpool
+2gud/DialogGPT-small-Koopsbot
+ABBHISHEK/DialoGPT-small-harrypotter
+AIDynamics/DialoGPT-medium-MentorDealerGuy
+AJ/DialoGPT-small-ricksanchez
+AJ/rick-discord-bot
+AJ/rick-sanchez-bot
+AJ-Dude/DialoGPT-small-harrypotter
+AK270802/DialoGPT-small-harrypotter
+ATGdev/DialoGPT-small-harrypotter
+AVeryRealHuman/DialoGPT-small-TonyStark
+AbhinavSaiTheGreat/DialoGPT-small-harrypotter
+AccurateIsaiah/DialoGPT-small-jefftastic
+AccurateIsaiah/DialoGPT-small-mozark
+AccurateIsaiah/DialoGPT-small-mozarkv2
+AccurateIsaiah/DialoGPT-small-sinclair
+AdharshJolly/HarryPotterBot-Model
+AdrianGzz/DialoGPT-small-harrypotter
+Aero/Tsubomi-Haruno
+AetherIT/DialoGPT-small-Hal
+AiPorter/DialoGPT-small-Back_to_the_future
+Aibox/DialoGPT-small-rick
+Akjder/DialoGPT-small-harrypotter
+AllwynJ/HarryBoy
+AnthonyNelson/DialoGPT-small-ricksanchez
+Apisate/DialoGPT-small-jordan
+ArJakusz/DialoGPT-small-stark
+Aran/DialoGPT-medium-harrypotter
+Aran/DialoGPT-small-harrypotter
+Arcktosh/DialoGPT-small-rick
+AriakimTaiyo/DialoGPT-cultured-Kumiko
+AriakimTaiyo/DialoGPT-medium-Kumiko
+AriakimTaiyo/DialoGPT-revised-Kumiko
+AriakimTaiyo/DialoGPT-small-Kumiko
+AriakimTaiyo/DialoGPT-small-Rikka
+ArtemisZealot/DialoGTP-small-Qkarin
+Aruden/DialoGPT-medium-harrypotterall
+Aspect11/DialoGPT-Medium-LiSBot
+Asuramaru/DialoGPT-small-rintohsaka
+Atchuth/DialoGPT-small-MichaelBot
+Augustvember/WOKKAWOKKA
+Augustvember/WokkaBot3
+Augustvember/test
+Augustvember/wokka2
+Augustvember/wokka4
+Augustvember/wokka5
+Augustvember/wokkabottest2
+AvatarXD/DialoGPT-medium-Blitzo
+Awsaf/DialoGPT-medium-eren
+Awsaf/large-eren
+Axcel/DialoGPT-small-rick
+Ayjayo/DialoGPT-medium-AyjayoAI
+Ayran/DialoGPT-medium-harry-potter-1-through-3
+Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6-e18
+Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6
+Ayran/DialoGPT-small-gandalf
+Ayran/DialoGPT-small-harry-potter-1-through-3
+Azuris/DialoGPT-medium-envy
+Azuris/DialoGPT-medium-senorita
+Azuris/DialoGPT-small-envy
+BW/TEST
+Backedman/DialoGPT-small-Anika
+BalajiSathesh/DialoGPT-small-harrypotter
+Batsy24/DialoGPT-medium-Twilight_BellaBot
+Batsy24/DialoGPT-small-Twilight_EdBot
+Bee-Garbs/DialoGPT-real-cartman-small
+Biasface/DDDC
+Biasface/DDDC2
+BigTooth/DialoGPT-Megumin
+BigTooth/DialoGPT-small-tohru
+BigTooth/Megumin-v0.2
+BigeS/DialoGPT-small-Rick
+Bimal/my_bot_model
+BinksSachary/DialoGPT-small-shaxx
+BinksSachary/ShaxxBot
+BinksSachary/ShaxxBot2
+BlightZz/DialoGPT-medium-Kurisu
+BlightZz/MakiseKurisu
+BlueGamerBeast/DialoGPT-small-Morgana
+BotterHax/DialoGPT-small-harrypotter
+Broadus20/DialoGPT-small-joshua
+BrunoNogueira/DialoGPT-kungfupanda
+Brykee/DialoGPT-medium-Morty
+Bubb-les/DisloGPT-medium-HarryPotter
+Camzure/MaamiBot-test
+Canadiancaleb/DialoGPT-small-jesse
+Canadiancaleb/DialoGPT-small-walter
+CasualHomie/DialoGPT-small-harrypotter
+Chae/botman
+Chakita/Friends
+Chalponkey/DialoGPT-small-Barry
+ChaseBread/DialoGPT-small-harrypotter
+Chiuchiyin/DialoGPT-small-Donald
+ChrisVCB/DialoGPT-medium-cmjs
+ChrisVCB/DialoGPT-medium-ej
+Chuah/DialoGPT-small-harrypotter
+ChukSamuels/DialoGPT-small-Dr.FauciBot
+Ciruzzo/DialoGPT-small-harrypotter
+ClaudeCOULOMBE/RickBot
+Cloudy/DialoGPT-CJ-large
+ClydeWasTaken/DialoGPT-small-joshua
+CodeDanCode/CartmenBot
+CodeDanCode/SP-KyleBot
+CoderBoy432/DialoGPT-small-harrypotter
+CoderEFE/DialoGPT-marxbot
+Connor/DialoGPT-small-rick
+Connorvr/BrightBot-small
+CopymySkill/DialoGPT-medium-atakan
+Corvus/DialoGPT-medium-CaptainPrice-Extended
+Corvus/DialoGPT-medium-CaptainPrice
+Coyotl/DialoGPT-test-last-arthurmorgan
+Coyotl/DialoGPT-test2-arthurmorgan
+Coyotl/DialoGPT-test3-arthurmorgan
+CracklesCreeper/Piglin-Talks-Harry-Potter
+Cryptikdw/DialoGPT-small-rick
+Cthyllax/DialoGPT-medium-PaladinDanse
+CurtisBowser/DialoGPT-medium-sora-two
+CurtisBowser/DialoGPT-medium-sora
+CurtisBowser/DialoGPT-small-sora
+CyberMuffin/DialoGPT-small-ChandlerBot
+DARKVIP3R/DialoGPT-medium-Anakin
+Daivakai/DialoGPT-small-saitama
+Dawit/DialogGPT-small-ironman
+Daymarebait/Discord_BOT_RICK
+DecafNosebleed/DialoGPT-small-ScaraBot
+Denny29/DialoGPT-medium-asunayuuki
+Devid/DialoGPT-small-Miku
+Dilmk2/DialoGPT-small-harrypotter
+Dimedrolza/DialoGPT-small-cyberpunk
+DingleyMaillotUrgell/homer-bot
+Doiman/DialoGPT-medium-harrypotter
+DongHai/DialoGPT-small-rick
+Doquey/DialoGPT-small-Luisbot1
+Doquey/DialoGPT-small-Michaelbot
+Doxophobia/DialoGPT-medium-celeste
+Dragoniod1596/DialoGPT-small-Legacies
+Dreyzin/DialoGPT-medium-avatar
+DueLinx0402/DialoGPT-small-harrypotter
+Duugu/jakebot3000
+Dyzi/DialoGPT-small-landcheese
+EEE/DialoGPT-medium-brooke
+EEE/DialoGPT-small-aang
+EEE/DialoGPT-small-yoda
+ESPersonnel/DialoGPT-small-got
+Eagle3ye/DialoGPT-small-PeppaPig
+Elzen7/DialoGPT-medium-harrypotter
+Emi2160/DialoGPT-small-Neku
+EmileAjar/DialoGPT-small-harrypotter
+EmileAjar/DialoGPT-small-peppapig
+Erikaka/DialoGPT-small-loki
+EstoyDePaso/DialoGPT-small-harrypotter
+EuropeanTurtle/DialoGPT-small-mrcobb
+ExEngineer/DialoGPT-medium-jdt
+Exilon/DialoGPT-large-quirk
+EzioDD/house
+FFF000/dialogpt-FFF
+FangLee/DialoGPT-small-Kirito
+Filosofas/DialoGPT-medium-PALPATINE
+Flampt/DialoGPT-medium-Sheldon
+For/sheldonbot
+FosterPatch/GoT-test
+Fu10k/DialoGPT-medium-Rick
+GabbyDaBUNBUN/DialoGPT-medium-PinkiePie
+Galaxy/DialoGPT-small-hermoine
+GamerMan02/DialoGPT-medium-gamerbot
+Gappy/DialoGPT-small-Zhongli
+Geezy/DialoGPT-small-guy
+GenDelport/DialoGPT-small-harrypotter
+Gowtham25/DialoGPT-small-jackie
+Gregor-Davies/DialoGPT-small-rick
+Greysan/DialoGPT-medium-TOH
+Guard-SK/DialoGPT-medium-ricksanchez
+Guard-SK/DialoGPT-small-ricksanchez
+GunjanPantha/DialoGPT-small-gameofthrones
+Guy0/DialoGPT-small-Batmanbotty
+HAttORi/DialoGPT-Medium-zerotwo
+HackyHackyMan/DialoGPT-small-harrypotter
+Hadron/DialoGPT-medium-nino
+Hallzy/Peterbot
+Hamas/DialoGPT-large-jake
+Hamas/DialoGPT-large-jake2
+Hamas/DialoGPT-large-jake3
+Hamas/DialoGPT-large-jake4
+Hamhams/DialoGPT-small-rick
+HansAnonymous/DialoGPT-medium-rick
+HansAnonymous/DialoGPT-small-shrek
+HarryPuttar/HarryPotterDC
+Harshal6927/Jack_Sparrow_GPT
+Harshal6927/Tony_Stark_GPT
+Havokx/DialoGPT-small-Rick
+Heldhy/DialoGPT-small-tony
+Heldhy/testingAgain
+MagnusChase7/DialoGPT-medium-harrypotter
+Htenn/DialoGPT-small-spongebob
+Htenn/DialoGPT-small-spongebobv2
+HueJanus/DialoGPT-small-ricksanchez
+HypNyx/DialoGPT-small-DwightBot
+HypNyx/DialoGPT-small-Thanos
+HypedKid/PeterBot
+ILoveThatLady/DialoGPT-small-rickandmorty
+ITNODove/DialoGPT-medium-cyberbones
+Icemiser/chat-test
+Ilyabarigou/Genesis-harrybotter
+ImAPizza/DialoGPT-medium-albert
+ImAPizza/DialoGPT-medium-alberttwo
+Invincible/Chat_bot-Harrypotter-medium
+Invincible/Chat_bot-Harrypotter-small
+Invincible/DialoGPT-medium-harryPotter
+Istiaque190515/Sherlock
+Istiaque190515/harry_bot_discord
+Istiaque190515/harry_potter
+ItoYagura/DialoGPT-medium-tohru
+ItzJorinoPlays/DialoGPT-small-PickleRick
+J-Chiang/DialoGPT-small-thor
+JDS22/DialoGPT-medium-HarryPotterBot
+Jedi33/tonystarkAI
+Jeffrey/DialoGPT-small-Jeffrey
+JimmyHodl/DialoGPT-medium
+Jllama/dialoGPT-small-Joshua-test
+Jonesy/DialoGPT-medium_Barney
+Jonesy/FG_OLD
+Jonesy/DialoGPT-small_JT
+Julianqll/DialoGPT-small-finalmorty
+Julianqll/DialoGPT-small-ricksanchez
+KAIHATSU/DialoGPT-small-rick
+KENNETHFOO/DialoGPT-medium-harrypotter
+KOSTAS/DialoGPT-small-Cleverbot
+KP2500/KPBot
+Kai0857/DialoGPT-small-harrypotter
+Kail91/DialoGPT-small-PeraltaBot
+Kairu/DialoGPT-small-Rick
+Kairu/RICKBOT
+KakoSi/Smolmm3
+KakoSi/opaazzi
+Kaledmgo/DialoGPT-small-donajulia
+Kargan/DialoGPT-small-randombot
+KaydenSou/Joshua
+Keen/DialoGPT-small-potter
+KekLord/DialoGPT-small-rick3
+Keqing/Keqing-Siesta
+Keqipig/DialoGPT-small-spamton
+KhanAdeeb/model-tony-stark
+KingCodeSquid/Octavian
+KingCodeSquid/Octavian2
+Kirili4ik/ruDialoGpt3-medium-finetuned-telegram
+KnutZuidema/DialoGPT-small-morty
+Konggate/DialoGPT-small-harrypotter
+Koriyy/DialoGPT-medium-gf
+Koro/DialoGPT-medium-rickandmorty
+Koro/DialoGPT-small-rickandmorty
+KringleClaus/Dialog-santa
+KrispyIChris/DialoGPT-small-harrypotter
+Kryptone/Burobot
+Kryptone/RinAI
+Kryptone/monikAI-Unstable
+Kryptone/monikAI
+Kshaunish/DialoGPT-small-rick
+Kush/DialoGPT-small-harrypotter
+LARACHNIDE/DialogGPT-small-sw
+LactoseLegend/DialoGPT-small-Rick
+Laezor/DialoGPT-small-witcher1
+Laezor/DialoGPT-small-yakuza_0
+LaiJY/DialoGPTChatbot
+Laptop/DialoGPT-small-gandalf
+Lenza/DialoGPT-medium-Kobayashi
+Leonel/DialoGPT-small-chandler
+Leostronkest/DialoGPT-small-michael
+Leostronkest/DialoGPT
+Leviii03/Dialogpt-small-Jake99
+Lizardon/Peterbot
+Lovery/Aqua
+Lucdi90/DialoGPT-medium-XiaoBot
+LuckyWill/DialoGPT-small-JakeBot
+Lurka/DialoGPT-medium-isseibot
+Lurka/DialoGPT-medium-kon
+Luxiere/DialoGPT-medium-tyrion
+MAUtastic/DialoGPT-medium-RickandMortyBot
+MCUxDaredevil/DialoGPT-small-rick
+MS366/DialoGPT-small-vision
+MadhanKumar/DialoGPT-small-HarryPotter
+MadhanKumar/HarryPotter-Bot
+MagmaCubes1133/DialoGPT-large-rick
+Mandy/DialoGPT-small-Mikasa
+Manthan/DialoGPT-small-harrypotter
+Mara/DialoGPT-medium-harrypotter
+MathiasVS/DialoGPT-small-RickAndMorty
+MaxW0748/DialoGPT-small-Rick
+MayankGupta/DialoGPT-small-harrypotter
+MichaelTheLearner/DialoGPT-medium-harry
+Midhunkrishna/DialoGPT-small-bjk
+Mierln/SmartHarry
+MightyCoderX/DialoGPT-medium-EdwardElric
+ModzabazeR/small-okaberintaro
+Mohsin272/DialoGPT-medium-harrypotter
+Mona/DialoGPT-small-harrypotter
+MoonlitEtherna/DialoGPT-small-Nyivae
+MrDuckerino/DialoGPT-medium-Rick
+MrE/DialoGPT-medium-SARGE
+MrE/DialoGPT-medium-SARGER1
+MrE/DialoGPT-medium-SARGER3
+MrGentle/DeltaModel-genius1
+MrZ/DialoGPT-small-Rick
+Mythiie/DialoGPT-small-Modeus
+N8Daawg/chat_bot
+NASABOI/MachineLearningAI
+nabarun/DialoGPT-small-joshua
+NamPE/DialoGPT-medium-Aqua-konosuba
+NamPE/DialoGPT-medium-Takanashi-Rikka
+NamPE/DialoGPT-small-satouhina
+NanniKirby/DialoGPT-medium-bapi
+NanniKirby/bapismall
+Naturealbe/DialoGPT-small-harrypotter-2
+Naturealbe/DialoGPT-small-harrypotter
+Navigator/DialoGPT-medium-martymcfly
+Navya2608/DialoGPT-medium-chandler
+Navya2608/DialoGPT-medium-rachel
+Navya2608/DialoGPT-small-tonystarkscript
+Necrozma/harrypotterbot
+Nekoism/Zhongli-Beta
+NibrasShami/DialopGPT-small-HarryPotter
+NickCavarretta/DialoGPT-small-laffy
+Nihwy/DialoSqui
+NikhilKrishna/DialoGPT-medium-harrypotter
+Ninja5000/DialoGPT-medium-HarryPotter
+Ninja5000/DialoGPT-medium-TWEWYJoshua
+Niphredil/DialoGPT-small-lotr
+Nisarg2701/DialoGPT-medium-Rick
+NoLawz/DialoGPT-medium-hagrid
+NoLawz/DialoGPT-medium-harrypotter
+NoLawz/DialoGPT-medium-spongebob
+Nova/DialoGPT-medium-Lelouch
+NovaChrono/twervy
+Obesitycart/ChatBot
+Obscurity/DialoGPT-Medium-707
+Oji/DialoGPT-small-Rick
+Optimal/Harry
+P4RZ1V4L/DialoGPT-Medium-Tony
+PVAbhiram2003/DialoGPT-medium-RickandMorty
+Paradocx/Dialogpt-mid-hpai
+Pensador777critico/DialoGPT-small-RickandMorty
+PhilipTheGreat/DiabloGPT-small-Traveller
+PinoCorgi/DialoGPT-small-Shrek1
+Piumi/DialogGPT-small-harrypotter
+Plencers/DialoGPT-small-homer
+Poly-Pixel/shrek-medium-full
+Poly-Pixel/shrek-medium
+Poly-Pixel/shrek-test-small
+Pupihed/DialoGPT-small-shrek
+PurpleJacketGuy/My_Jarvis
+PurpleJacketGuy/My_Jarvis_2
+RAhul03/DialoGPT-small-harrypotter
+REAP3R/Chat-bot
+REZERO/DialoGPT-medium-saitama
+RTM/ChatBot
+RTM/Lucky
+RTurk/DialoGPT-small-TIMBOT
+Radicalkiddo/DialoGPT-small-Radical
+Rashid11/DialoGPT-small-rick
+Rathod/DialoGPT-small-harrypotter
+Redolid/DialoGPT-small-Rick
+Rei/DialoGPT-medium-kurisu
+RifsxD/DialoGPT-medium-raifu
+RishabhRawatt/DialoGPT-small-Rickmorty
+RishabhRawatt/DialoGPT-small-kela
+Ritchie/DialoGPT-small-Rickandmorty
+RizqFarIDN/DialoGPT-medium-harrypotter
+RizqFarIDN/DialoGPT-small-harrypotter
+RobinMari/DialoGPT-small-mikoto
+Royce23/DialoGPT-small-almas
+Rush11/DialoGPT-small-HarryPotter
+Ryanar/DialoGPT-medium-Zelda
+Ryukie/DialoGPT-small-Rick
+S34NtheGuy/DialoGPT-medium-Glass_Of_Water
+S34NtheGuy/DialoGPT-medium-Mona
+S34NtheGuy/DialoGPT-small-Harry282
+S34NtheGuy/DialoGPT-small-MJOLNIR_Soul
+S34NtheGuy/DialoGPT-small-cursedryno
+S34NtheGuy/DialoGPT-small-pikamew362
+S34NtheGuy/DialoGPT-small-wetterlettuce
+SJSui/RickBot
+SPGT/LiveSafe-DialoGPT
+SaffronIce/DialoGPT-medium-Jett
+Salma-2/DialoGPT-small-harrypotter
+Sammigooof/Peterbot
+SarahhhUwU/DialoGPT-small-ally
+Sarumomo/DialoGPT-small-test
+Saviour/ChandlerBot
+Saz/DialoGPT-small-paimon
+Saz/DialoGPT-small-saz
+Science-geek32/DialoGPT-small-doctor
+Science-geek32/DialoGPT-small-doctor2.0
+Scoops/SandalBot
+ScottaStrong/DialogGPT-medium-Scott
+ScottaStrong/DialogGPT-medium-joshua
+ScottaStrong/DialogGPT-small-Scott
+ScottaStrong/DialogGPT-small-joshua
+Sebastianthecrab/DialoGPT-small-melchior
+Sedge/DialoGPT-small-Sedge
+Shakaw/DialoGPT-small-spongebot
+ShayoGun/DialoGPT-small-shayo
+Sheel/DialoGPT-small-harrypotter
+Sheerwin02/DialoGPT-medium-mikasa
+Sheerwin02/DialoGPT-small-isla
+Sherman/DialoGPT-medium-joey
+Shike/DialoGPT_medium_harrypotter
+Shinx/DialoGPT-medium-myheroacademia
+NaturesDisaster/DialoGPT-large-Neku
+NaturesDisaster/DialoGPT-small-Neku
+ShiroNeko/DialoGPT-small-rick
+Shubham-Kumar-DTU/DialoGPT-small-goku
+SilentMyuth/sarcastic-model
+SilentMyuth/stableben
+SirBastianXVII/DialoGPT-small-TVD
+Sired/DialoGPT-small-trumpbot
+Siyris/DialoGPT-medium-SIY
+Siyris/SIY
+Skywhy/DialoGPT-medium-Churchyy
+Snaky/StupidEdwin
+Soapsy/DialoGPT-mid-cartman
+SonMooSans/DialoGPT-small-joshua
+SonMooSans/test
+Sora4762/DialoGPT-small-naruto
+Sora4762/DialoGPT-small-naruto1.1
+Soumyajit1008/DialoGPT-small-harryPotterssen
+SpacyGalaxy/DialoGPT-medium-Gandalf
+Spectrox/emmybot
+Spirax/DialoGPT-medium-sheldon
+Spoon/DialoGPT-small-engineer
+Stabley/DialoGPT-small-evelynn
+Stevo/DiagloGPT-medium-spamton
+Stoned-Code/DioloGPT-large-Rick-SC-420
+Sunnydx/BillCipherBot
+TTYU/DialoGPT-small-trump
+TVLG/DialoGPT-small-Iroh-Bot
+Taramiko/DialoGPT-small-hoshiyo_kojima
+Taramiko/Hoshiyo_Kojima
+Tejasvb/DialoGPT-small-rick
+Tejasvb/DialogGPT-small-rick
+ThatSkyFox/DialoGPT-medium-joshua
+ThatSkyFox/DialoGPT-small-joshua
+The-Programmer-With-Cool-Pens/TifaBotAIPackage
+TheCatsMoo/DialoGGPT-small-joshua
+TheDiamondKing/DialoGPT-small-harrypotter
+ThePeachOx/DialoGPT-small-harry
+TheReverendWes/DialoGPT-small-rick
+TheTUFGuy/HermioneChatBot
+Thejas/DialoGPT-small-Stewei
+Thejas/DialoGPT-small-elon
+ThoracicCosine/DialoGPT-small-harrypotter
+Tidum/DialoGPT-large-Michael
+Toadally/DialoGPT-small-david_mast
+Tofu05/DialoGPT-large-boon2
+Tofu05/DialoGPT-med-boon3
+TofuBoy/DialoGPT-medium-Yubin2
+TofuBoy/DialoGPT-medium-boon
+Tr1ex/DialoGPT-small-rick
+TrebleJeff/DialoGPT-small-Michael
+TrimPeachu/Deadpool
+Trixzy/rickai-v1
+Tropics/DialoGPT-small-peppa
+UKJ5/DialoGPT-small-harrypotter
+Username1/Mourinhio-medium
+Username1/Mourinho
+Username1/Wenger
+VLRevolution/DialogGPT-small-GGODMODEL
+VMET/DialoGPT-small-dumbassbot
+VaguelyCynical/DialoGPT-small-RickSanchez
+Vampiro/DialoGPT-small-dante_b
+Vampiro/DialoGPT-small-dante_c
+VariableZee/DialoGPT-small-ivylia03
+Verge/Peterbot
+VincentButterfield/DialoGPT-small-harrypotter
+VishalArun/DialoGPT-medium-harrypotter
+Vitafeu/DialoGPT-medium-ricksanchez
+VulcanBin/DialoGPT-small-cortana
+WarrenK-Design/DialoGPT-small-Rick
+Wessel/DiabloGPT-medium-harrypotter
+White/white-bot
+Whitez/DialoGPT-small-twety
+Wise/DialogGPT-small-JC
+WoutN2001/james3
+WurmWillem/DialoGPT-medium-RickandMorty3
+Xeouz/Ultron-Small
+XuguangAi/DialoGPT-small-Harry
+XuguangAi/DialoGPT-small-Leslie
+XuguangAi/DialoGPT-small-Rick
+Yankee/test1234
+Zane/Ricky
+Zane/Ricky3
+Zeer0/DialoGPT-small-ZerO
+Zen1/Derekbot
+Zen1/test1
+Zeph/DialoGPT-small-rick
+Zephaus/Chromrepo
+Zixtrauce/BDBot
+Zixtrauce/BDBot4Epoch
+Zixtrauce/BaekBot
+Zixtrauce/BrandonBot
+Zixtrauce/BrandonBot2
+Zixtrauce/JohnBot
+Zixtrauce/SelfAwareness
+Zuha/DialoGPT-small-gandalf
+a01709042/DialoGPT-medium
+aadilhassan/Chandlerbot
+aashutosh2102/DialoGPT-smalll-harrypotter
+abhiramtirumala/DialoGPT-sarcastic
+abhisht/DialoGPT-medium-Emilybot
+abjbpi/DS_small
+abjbpi/Dwight_Schrute
+aced/DialoGPT-medium-3PO
+adviksinghania/DialoGPT-medium-rick
+af1tang/personaGPT
+aggb/DialogGPT-small-AGGB-B
+aimiekhe/yummv1
+aimiekhe/yummv2
+aishanisingh/DiagloGPT-small-michaelscott
+aishanisingh/DialoGPT-small-harrypotter
+akaushik1/DialoGPT-small-kaiser
+akhooli/personachat-arabic
+alankar/DialoGPT-small-rick
+alipsezzar/DialoGPT-medium-harrypotter
+alistair7/bbt-diagpt2-model
+aluserhuggingface/DialoGPT-small-harrypotter
+alvinkobe/DialoGPT-medium-steve_biko
+alvinkobe/DialoGPT-small-KST
+andikarachman/DialoGPT-small-sheldon
+anduush/DialoGPT-small-Rick
+ange/DialoGPT-medium-Monke
+ankimt01/DialoGPT-small-anch
+ann101020/le2sbot-hp
+anshengli2/DialogGPT-small-Bot
+anweasha/DialoGPT-small-Chandler
+anweasha/DialoGPT-small-Jake
+aplnestrella/Aladdin-Bot
+arampacha/DialoGPT-medium-simpsons
+archmagos/HourAI
+ardatasc/miniMe-version1
+arifbhrn/DialogGPT-small-Rickk
+arnav7633/DialoGPT-medium-tony_stark
+aryanbhosale/DialoGPT-medium-harrypotter
+asad/DialoGPT-small-harryporter_bot
+ashwinchandran13/DialoGPT-small-harrypotter
+astrobreazy/DialoGPT-small-harrypotter
+atkh6673/DialoGPT-small-harrypotter
+atkh6673/DialoGPT-small-trump
+atomsspawn/DialoGPT-small-dumbledore
+augustojaba/DialoGPT-small-harrypotter
+avinashshrangee/DialoGPT-small-Ricky
+awvik360/DialoGPT-medium-plemons
+awvik360/DialoGPT-medium-plemons2
+awvik360/DialoGPT-small-plemons
+aydin/DialoGPT-medium-michael
+ayush19/rick-sanchez
+b0shakk/DialoGPT-small-Ragnar
+balta/DialoGPT-small-TestBot
+banden/DialoGPT-medium-RickBot
+banden/DialoGPT-small-LokiBot
+beatajackowska/DialoGPT-RickBot
+benajtil/DialoGPT-small-Daddyben
+benajtil/DialoGPT-small-RickAndMortyScripts
+benjaminbeilharz/dialoGPT-small-empatheticdialogues-generation
+benmrtnz27/DialoGPT-small-misato
+bensuydam/CartmanBot
+bestminerevah/DialoGPT-small-thetenthdoctor
+bhaden94/LokiDiscordBot-medium
+bhavya689/DialoGPT-large-chandler
+bleachybrain/DialoGPT-med-ss
+bmdonnell/DialoGPT-medium-harrypotter
+bonebambi/DialoGPT-small-ThakirClone
+bookemdan/DialoGPT-small-harrypotter
+boran/berkbot
+boydster/DialoGPT-small-gollum
+brimeggi/testbot2
+brokentx/newbrokiev2
+bspans/DialoGPT-small-yoda
+byeongal/Ko-DialoGPT
+bypequeno/DialoGPT-small-michaelscott
+caps1994/DialoGPT-small-chrisbot-caps1994
+caps1994/DialoGPT-small-chrisbot
+caps1994/DialoGPT-small-harrypotter-caps1994
+cartyparty/DialoGPT-small-harrypotter
+cartyparty/DialoGPT-small-iteration1
+cartyparty/DialoGPT-small-nerdherd
+cedpsam/chatbot_fr
+centon21/DialoGPT-small-harrypotter
+chaitrabhat/DialoGPT-small-rick
+chamindu/DialoGPT-medium-hermione
+chamodkarunasena/DialoGPT-medium-sokka
+chan030609/DialoGPT-medium-JAB
+chan030609/DialoGPT-small-JAB
+chellver24/DialoGPT-medium-chizuru_ichinose
+chip/DialoGPT-small-chizuru
+thu-coai/blenderbot-400M-esconv
+clairesb/kindness_bot
+clairesb/kindness_bot_repo
+clancystudios/DialoGPT-medium-Morty
+clayfox/DialoGPT-medium-Hiccup
+clayfox/DialoGPT-small-Hiccup
+cocoaclef/DialoGPT-small-kohaku
+codealtgeek/DiabloGPT-medium-rickmorty
+colochoplay/DialoGTP-small-harrypotter
+conniezyj/DialoGPT-small-snape
+cookirei/DialoGPT-medium-Joreyar
+cosmic/DialoGPT-Rick
+cosmicray001/prod-harry
+cosmicray001/small-harry
+crystalgate/DialoGPT-small-rick
+cumtowndiscord/DialoGPT-small-joshua
+cutiebunny639/DialoGPT-small-harry
+d4rk/harry
+danildany/DialoGPT-small-MichaelScott
+danny481/DialoGPT-small-datnguyenchatbot
+danny481/DialoGPT-small-harrypotter
+danny481/Final_ChatBot
+darkzek/chickenbot-jon-snow
+darthboii/DialoGPT-small-PickleRick
+darthboii/DialoGPT-small-Rick
+dats/DialoGPT-small-harrypotter
+dattam/DialoGPT-medium-TonyStarkBot
+dead69/GPT-small-yoda
+deepparag/Aeona
+deepparag/DumBot-Beta
+deepparag/DumBot
+delvan/DialoGPT-medium-DwightV1
+df4rfrrf/DialoGPT-medium-Aerith
+dhanushlnaik/amySan
+disdamoe/DialoGPT-small-moe
+disdamoe/TheGreatManipulator
+disdamoe/TheManipulator
+divi/Peterbot
+dk16gaming/DialoGPT-small-HarryPotter
+dkminer81/Tromm
+dreamline2/DialoGPT-small-joshua-demo
+dukeme/DialoGPT-small-RDBotv1
+eclare/DialoGPT-small-SCHAEFER
+educhav/Austin-DialoGPT-small
+educhav/Elijah-DialoGPT-small
+educhav/J-DialoGPT-small
+educhav/Sam-DialoGPT-small
+eklrivera/DialoGPT-small-harrypotter
+eldritch-axolotl/Rick
+ericklasco/DialoGPT-small-erickHarryPotter
+ericzhou/DialoGPT-Medium-Rick
+ericzhou/DialoGPT-Medium-Rick_v2
+ericzhou/DialoGPT-medium-elon
+ericzhou/tsundere_v1
+estehpanas/pascalbot
+ethzhou/jooby
+ethzhou/joobyChat
+ethzhou/newJooby
+f00d4tehg0dz/Peppa
+f00d4tehg0dz/Yoda
+facebook/blenderbot-1B-distill
+facebook/blenderbot-3B
+facebook/blenderbot-400M-distill
+facebook/blenderbot-90M
+facebook/blenderbot_small-90M
+faketermz/DialoGPT
+fatemaMeem98/DialoGPT-medium-HermioneGrangerBot
+felinecity/DioloGPT-small-KaeyaBot
+felinecity/DioloGPT-small-KaeyaBot2
+felinecity/DioloGPT-small-LisaBot
+felinecity/ScaraBot
+fibruh/DialoGPT-small-harrypotter
+flakje/DialoGPT-small-Marty
+flooptherocket/DialogGPT-small-rick
+ftnvir/DialoGPT-medium-bullyMaguire
+gabtan99/dialogpt-tagalog-medium-10
+gabtan99/dialogpt-tagalog-medium-20
+gabtan99/dialogpt-tagalog-medium-30
+gabtan99/dialogpt-tagalog-medium
+gfdream/dialogpt-small-familyguy
+gfdream/dialogpt-small-harrypotter
+ghhostboy/DialoGPT-medium-connorDBH3-1
+ghhostboy/DialoGPT-medium-connorDBH3-21
+gizmo-dev/DialoGPT-small-jake
+gorkemgoknar/gpt2chatbotenglish
+grayson124/chatbotwaifu
+grounddominator/DialoGPT-lar-Rick
+gusintheshell/DialoGPT-small-rickbot
+gwima/ryan-sackmott
+hama/Doctor_Bot
+hama/Harry_Bot
+hama/barney_bot
+hama/me0.01
+hama/rick_bot
+heabeoun/DiabloGPT-small-nuon-conv
+henryoce/DialoGPT-small-rick-and-morty
+hervetusse/DialogGPT-small-harrypotter
+hireddivas/DialoGPT-small-ray
+hireddivas/DialoGPT-small-scully
+hireddivas/dialoGPT-small-mulder
+hireddivas/dialoGPT-small-phil
+hireddivas/dialoGPT-small-sonic
+honguyenminh/old-zhongli
+houssaineamzil/DialoGPT-small-joey
+hrv/DialoGPT-small-rick-morty
+hyunwoongko/blenderbot-9B
+hyunwoongko/reddit-3B
+hyunwoongko/reddit-9B
+iamalpharius/GPT-Small-BenderBot
+ianc89/hagrid
+ignkai/DialoGPT-medium-spider-man-updated
+ilikeapple12/DialoGPT-small-Phos
+imran2part/DialogGPT-small-Doctor
+imrit1999/DialoGPT-small-MCU
+myynirew/DialoGPT-medium-ettengiv
+myynirew/DialoGPT-medium-leirbag
+myynirew/DialoGPT-small-awazimuruk
+ionite/DialoGPT-large-Sh0rtiAI-v2
+ionite/DialoGPT-medium-IoniteAI
+ionite/DialoGPT-medium-McKayAI-v2
+ionite/DialoGPT-medium-McKayAI
+ionite/DialoGPT-medium-Sh0rtiAI
+ionite/DialoGPT-medium-mohnjilesAI
+ionite/DialoGPT-medium-orangeAI
+ironman123/DialoGPT-small-harrypotter
+ishraaqparvez/DialoGPT-small-harrypotter
+jackky46/DialoGPT-medium-got
+jahz/DialoGPT-medium-FF8
+jalensmh/DialoGPT-medium-jalenbot
+jalensmh/DialoGPT-small-exophoria
+jamestop00/DialoGPT-spike-medium
+jasper/DialoGPT-large-homersimpson
+jchen/DialoGPT-evan
+jeanlks/DialogGPT-small-gayvid
+jeanlks/DialogGPT-small-pato
+jfhr1999/CharacterTest
+jogp10/DialoGPT-medium-arya
+jollmimmim/DialoGPT-small-monkeydluffy
+jordanhagan/DialoGPT-medium-NegaNetizen
+josephmagnayon/DialoGPT-medium-Alfred
+josepjulia/RepoHumanChatBot
+josh8/DialoGPT-medium-josh
+josh8/DialoGPT-small-josh
+jpsxlr8/DialoGPT-small-harrypotter
+jth1903/DialoGPT-small-rick
+julianolf/DialoGPT-small-harrypotter
+kaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaot1k/DialoGPT-small-Wanda
+kagennotsuki/DialoGPT-medium-radion
+kche0138/DialoGPT-medium-DIO
+kingabzpro/DialoGPT-small-Rick-Bot
+kipiiler/Rickbot
+knightbat/harry-potter
+kripanshudixit/DialoGPT-small-phoenix
+kris/DialoGPT-small-spock
+kris/DialoGPT-small-spock3
+kris/DialoGPT-small-spock4
+kris/DialoGPT-small-spock5
+kshitiz/testing-bot-repo
+kunalbhargava/DialoGPT-small-housebot
+kvothe28/DiabloGPT-small-Rick
+l41n/c3rbs
+lain2/Peterbot
+lanejm/DialoGPT-small-hagrid
+lapacc33/DialoGPT-medium-rick
+life4free96/DialogGPT-med-TeiaMoranta
+life4free96/DialogGPT-med-TeiaMoranta3
+light/small-rickk
+limivan/DialoGPT-small-c3po
+cosmicroxks/DialoGPT-small-scott
+logube/DialogGPT_small_harrypotter
+lonewanderer27/DialoGPT-small-Joshua
+lonewanderer27/KeitaroBot
+lonewanderer27/YoshinoriBot
+lonewanderer27/YuriBot
+lovellyweather/DialoGPT-medium-johnny
+luca-martial/DialoGPT-Elon
+lucas-bo/DialogGPT-small-yoda
+ludowoods/KujouSara
+lulueve3/DialoGPT-medium-Kokkoro
+lulueve3/DialoGPT-medium-Kokkoro2
+madbuda/DialoGPT-got-skippy
+madbuda/DialoGPT-medium-skippy
+majonez57/JoeBot
+manav/dialogpt-large-kanye-reddit
+manav/dialogpt-medium-berkeley-reddit
+maniacGhost24/MichaelScott-bot-push-small
+manraf/DialoGPT-smmall-harrypotter
+matprado/DialoGPT-small-rick-sanchez
+maxxx2021/DialGPT-small-harrypotter
+mdc1616/DialoGPT-large-sherlock
+melon422/DialoGPT-medium-MelonBot
+melon422/DialoGPT-medium-MelonBot2
+mewmew/DialoGPT-small-rick
+michelleshx/DialoGPT-small-michelle-discord-bot
+microsoft/DialoGPT-large
+microsoft/DialoGPT-medium
+microsoft/DialoGPT-small
+mikabeebee/Peterbot
+milayue/neosh-bot1
+minsiam/DialoGPT-medium-harrypotterbot
+minsiam/DialoGPT-small-harrypotterbot
+miogfd1234/ll
+mittalnishit/DialoGPT-medium-rickman2
+mittalnishit/DialoGPT-small-rickman
+mjstamper/DialoGPT-small-samwise
+mk3smo/dialogpt-med-ahiru
+mk3smo/dialogpt-med-duck2
+mk3smo/dialogpt-med-duck3
+mk3smo/dialogpt-med-duck5
+mk3smo/dialogpt-med-duckfinal
+mk3smo/dialogpt-med-stt3
+mklucifer/DialoGPT-medium-DEADPOOL
+mklucifer/DialoGPT-small-DEADPOOL
+mluengas/DialogGPT-small-michaelscott
+model-mili/DailoGPT-Yukub-v3
+model-mili/DialoGPT-small-Sapph-v1
+model-mili/DialoGPT-small-Yukub-v2
+model-mili/DialoGPT-small-Yukub
+mohammedks713/DialoGPT-small-harrypotter
+mohammedks713/DialoGPT-small-jonsnow
+mra1ster/DialoGPT_scully_small
+muhardianab/DialoGPT-small-theoffice
+munezah/DialoGPT-small-aot
+munezah/DialoGPT-small-sherlock
+mutamuta/DialoGPT-small-rick
+mutamuta/DialoGPT-spongebob-small
+namanrana16/DialoGPT-small-TrumpBot
+nanometeres/DialoGPT-medium-halbot
+nanometeres/DialoGPT-small-halbot
+ncoop57/DiGPTame-medium
+niharikadeokar/DialoGPT-small-Jakebot
+nikhilpatil2532000/DialoGPT-small-harrypotter
+nimrazaheer/DialoGPT-small-harrypotter
+nitishk/IronStarkBot
+nlokam/DialoGPT-digibot3.0-new
+nlokam/Digibot
+nlokam/ada_V.3
+nlokam/ada_V.6
+nlokam/ada_V.7
+nlokam/books_to_bots_v.00
+noobed/DialoGPT-small-astley
+norie4/DialoGPT-small-kyutebot
+norie4/DialoGPT-small-memoji
+not7even/DialoGPT-small-7evenpool
+npc-engine/exported-bart-light-gail-chatbot
+ntjrrvarma/DialoGPT-small-RickBot
+nwl/DialoGPT-small-enhypen
+nytestalkerq/DialoGPT-medium-joshua
+oakkas/Dialge-small-harrypotter-oguz
+odinmay/joebot
+odinmay/zackbotmodel
+ogpat123/DialoGPT-small-Michael
+ogpat23/Jules-Chatbot
+omkar1309/RickBot
+omnimokha/DialoGPT-medium-jakeamal
+omnimokha/DialoGPT-small-jakeamal
+omnimokha/jakebot2
+oododo/DialoGPT-small-elon
+otto-camp/DialoGPT-small-RickBot
+overgrowth/jokeboy
+owencubes/DialoGPT-small-Josuke
+paladinx00/rh-bender
+parigaswetha/DialoGPT-small-jakeperalta
+parthsinha/DialoGPT-small-rickandmorty
+pashin/DialoGPT-small-ironman-2
+pashin/DialoGPT-small-ironman-3
+pashin/DialoGPT-small-ironman1
+pastlecry/DialoGPT-small-harrypotter
+peamjo/DialoGPT-small-morty
+person123/DialoGPT-small-petergriffin
+pewriebontal/DialoGPT-medium-Pewpewbon
+phantom-deluxe/dialoGPT-RickBot
+phantom-deluxe/dialoGPT-harry
+phozon/harry-potter-medium
+piyushdubey/DialoGPT-Mi
+pompeiifreckles/DialoGPT-medium-Rick
+ppn/DialoGPT-small-harrypotter
+pranavtharoor/test
+professional/DialoGPT-small-joshua
+ps2102/DialoGPT-small-harrypotter
+psblade/DialoGPT-medium-PotterBot
+puugz/DialoGPT-small-spiderman
+qwerty/DialoGPT-small-rick
+r3cdhummingbird/DialoGPT-medium-joshua
+r3dhummingbird/DialoGPT-medium-joshua
+r3dhummingbird/DialoGPT-medium-neku
+r3dhummingbird/DialoGPT-small-harrypotter
+r3dhummingbird/DialoGPT-small-neku
+rachelcorey/DialoGPT-medium-kramer
+rachelcorey/DialoGPT-medium-niles
+rafakat/Botsuana-rick
+rahul26/DialoGPT-small-rickandmorty
+rahulMishra05/discord-chat-bot
+raj2002jain/DialoGPT-small-Light
+ravephelps/DialoGPT-small-MichaelSbott
+redbloodyknife/DialoGPT-medium-shayo
+rhollings/DialoGPT_small_steverogers
+richiellei/Childe
+richiellei/Childe3
+richiellei/DialoGPT-small-rick
+richielleisart/Childe
+ridwanpratama/DialoGPT-small-misaki
+rinz/DialoGPT-small-Harry-Potterrr
+rlagusrlagus123/XTC20000
+rlagusrlagus123/XTC4096
+rmicheal48/DialoGPT-small-steven_universe
+rodrigodz/DialoGPT-medium-dxd
+romuNoob/Mine
+romuNoob/test
+rovai/AI
+rovai/CARRIE
+rovai/Chat_pytorch1
+rovai/chatbotmedium1
+rovai/chatbotmedium2
+rovai/chatbotmedium3
+rovai/chatbotmedium4
+rovai/chatbotone
+rpeng35/DialoGPT-small-erenyeager
+rrtong/DialoGPT-medium-shang-chi
+rsd511/DialoGPT-small-house
+rsedlr/RickBot
+rsedlr/RickBotExample
+ruriko/bacqua
+ruriko/konoaqua
+ruriko/konodio
+sachdevkartik/DialoGPT-small-rick
+saintseer121323/DialoGPT-small-kotonoha
+sakai026/Chizuru
+sakai026/Mizuhara
+sam213/DialoGPT-small-harrypotter
+sambotx4/scamantha
+samuelssonm/DialoGPT-small-rick
+sanjanareddy226/JakeBot
+sankalpjha1/mr.bot_haary
+satkinson/DialoGPT-medium-marvin
+satkinson/DialoGPT-small-marvin
+satvikag/chatbot
+satvikag/chatbot2
+sergunow/movie-chat
+setiadia/DialogGPT-small-HPBot
+shelb-doc/DialoGPT-medium-ash
+shihab/HarryPotter
+shonuff/DialoGPT-medium-konosuba
+shreeshaaithal/DialoGPT-small-Michael-Scott
+shreeshaaithal/Discord-AI-bot
+shreeshaaithal/whatsapp-medium-bot-2
+sidkhuntia/harrypotter
+sifclairhelix/DialoGPT-small-harrypot
+simrana5/RickBotExample
+skynex/DialoGPT-small-batman
+skynex/DialoGPT-small-finalbatman
+sleekmike/DialoGPT-small-joshua
+smilesandtea/DialoGPT-medium-Rick
+smmzhu/DialoGPT-small-SZ
+solfer/DialoGPT-small-ryuji
+spockinese/DialoGPT-small-sherlock
+sreyanghosh/DialoGPT-medium-joker
+srirachasenpai/DialoGPT-medium-harrypotter
+srv/DialoGPT-medium-Breaking_Bad
+ssam/DialoGPT-small-RickmfSanchez
+ssspider/DialoGPT-medium-harrypotter
+stfuowned/nek
+stfuowned/rick
+sthom/DialoGPT-small-tin
+sudip/bot1
+sudoabrar/DialoGPT-small-dwight
+suhasjain/DailoGPT-small-harrypotter
+swapnil165/DialoGPT-small-Rick
+terter/rick-bot-test-v2
+thatoneguy267/DialoGPT-small-Oscar
+thatoneguy267/bruhpleasehelpme
+theChanChanMan/DialoGPT-small-chandler
+thefryingpan/gpt-neo-125M-splishy
+theiconik/hermione-granger
+thesamuelpena/Dialog-medium-Sonic
+thesamuelpena/Dialog-medium-masterchief
+thetlwin/DialoGPT-small-ironman
+thinhda/chatbot
+thu-coai/CDial-GPT2_LCCC-base
+thu-coai/CDial-GPT_LCCC-base
+thu-coai/CDial-GPT_LCCC-large
+ticet11/DialoGPT-small-BOBBY
+timslams666/DialoGPT-small-rick
+tinega/DialoGPT-small-harrypotter
+tngo/DialoGPT-small-HankHill
+toiletwater/DialoGPT-medium-ironman
+tom1804/HP
+tom1804/HP_last
+tom1804/hp_new
+tomascerejo12/DialoGPT-small-Rick
+tosin/dialogpt_mwoz
+tosin/dialogpt_sv
+toyfreak/DialoGPT-small-addy
+toyfreak/DialoGPT-small-shy
+tpri/DialoGPT-small-pa
+tprincessazula/Dialog-GPT-small-AANG
+tprincessazula/Dialog-GPT-small-KATARA-AVATAR
+tprincessazula/Dialog-GPT-small-SOKKA-AVATAR
+tprincessazula/Dialog-GPT-small-harrypotter
+transfaeries/DialoGPT-Discord
+transfaeries/DialoGPT-medium-Discord-1.0
+transfaeries/DialoGPT-small-Discord-1.0
+transfaeries/Twilight-Sparkle-GPT
+trig/DialoGPT-small-harrypotter
+trig/multiverse-second
+trig/multiverse
+trig/sokka-chatbot-test
+trig/tlok-test
+troythewar/DialogGPT-small-harrypotter
+truthisneverlinear/EleventhDoctor
+ttntran/DialoGPT-small-human
+tuantt/GroundNet
+ughvom/Ginger
+ughvom/britnayBOTMAIN
+umr55766/DialogGPT-small-peppa-pig
+usamazaheer/DialoGPT-small-harrypotter
+uutkras/Pandabot
+uyharold86/DialoGPT-small-RickAndMorty
+valarikv/DialoGPT-small-bateman
+vibranium19/DialoGPT-medium-jake
+victordata/DialoGPT-small-Rick
+victorswedspot/DialoGPT-small-gandalf
+vijayv500/DialoGPT-small-Big-Bang-Theory-Series-Transcripts
+vijote/DialoGPT-small-Morty
+vivek-g-2009/DialoGPT-medium-harrypotter
+vlco-o/NLboto_o-aki-dialogpt
+vlco-o/NLboto_o-small-dialogpt
+wadeed/DialogGPT-small-chandlerbingg
+wanderer/DialoGPT-small-Phoebe
+wjching/DialoGPT-small-ricksanchez
+won/DialoGPT-small-harrypotter
+worms3401/DialoGPT-small-Eleonora
+worsterman/DialoGPT-small-mulder
+wtrClover/DialoGPT-small-Flutterbot
+wtrClover/DialoGPT-small-TwilightBot
+xdmason/pretrainedCas
+xiaoheiqaq/DialoGPT-mediumJojo
+xiaoheiqaq/DialoGPT-smallharrypotter
+yahya1994/DialoGPT-small-AOT-Eren
+yahya1994/DialoGPT-small-DN-L
+yahya1994/DialoGPT-small-DN-Light
+yahya1994/DialoGPT-small-DN-Ryuk
+yahya1994/DialoGPT-small-Gintama-Gintoki
+yahya1994/DialoGPT-small-Parasyte-Migi
+yahya1994/DialoGPT-small-ReZero-Rem
+yahya1994/DialoGPT-small-ReZero-Subaru
+yahya1994/DialoGPT-small-Ryuk
+yusufmorsi/georgebot
+zaydzuhri/lelouch-medium
+zemi/jakebot
+zen-satvik/BotGPT-medium-HP
+zentos/DialoGPT-small-spongebob
+zinary/DialoGPT-small-rick-new
+zuto37/DialoGPT-small-sadao
+Maxwere/DiabloGPT-medium-maxbot
+Grungle/DialoGPT-medium-butters
+sadkat/technoai
+Grungle/DialoGPT-medium-butters2
+kookyklavicle/sean-diaz-bot
+kookyklavicle/sean-diaz
+Aquasp34/DialoGPT-small-aqua1
+zenham/khemx
+aryanbhosale/smartharrypotterbot
+Britain/DialoGPT-small-ZifBotTwoFixed
+Britain/DialoGPT-small-DanyBotThree
+infinitylyj/DialogGPT-small-rick
+infinitylyj/DialogGPT-small-general
+infinitylyj/DialogGPT-medium-general
+jackyv/DialoGPT-small-pinocchio
+Freak55/DialoGPT-small-Phoenix-Wright
+Britain/DialoGPT-small-DanyBotThreeFixed
+Britain/DialoGPT-small-DanyBotTwo
+P4RZ1V4L/DialoGPT-medium-tonystark
+Britain/DialoGPT-small-DanyBotTwoNew
+zenham/mskeen_m_e4_16h
+zenham/khemx_m_e4_16h
+zenham/wail_m_e4_16h_2k
+RTM/vilang
+BeanBoi50404/DialoGPT-small-PeppaPigButBetter
+nabin19677/small-cartman
+Prime2911/DialoGPT-small-handsomejack
+Starry/KARENTRIES
+dietconk/DialogGPT-small-Orange
+mafeu/DialoGPT-medium-willem
+Prime2911/DialoGPT-medium-handsomejack
+Meowren/DialoGPT-small-Rick-Bot
+DB13067/Peterbot
+Savitar/DialoGPT-medium-RickandMorty
+MolePatrol/Olbot
+erinchocolate/DialoGPT-small-harrypotter
+Valouzze/FairuvenIA
+MehSatho/Tai-medium-Hermione
+Valouzze/MegaIA
+Makinitas/DialoGPT-small-RickAndMortyScripts
+darthrussel/DialoGPT-small-rickandmorty
+vanilladucky/Friends_chatting_bot
+vanilladucky/Friends_chatting_bot_redefined
+chocoduck/Joey_bot
+duanxingjuan/DialoGPT-medium-DEMON_SLAYER
+pinkducky/Monica_Bot
+Starry/HELLORUKAS
+pinkducky/Rachel_Bot
+trig/multiverse-third
+pinkducky/Ross_Bot
+duanxingjuan/DialoGPT-large-DEMON_SLAYER_v1
+duanxingjuan/DialoGPT-large-DEMON
+duanxingjuan/DialoGPT-large-DEMON1
+issue89/DialoGPT-small-house
+LeonLi279/DialoGPT-small-harrypotter
+MolePatrol/DialoGPT-Medium-ConnerBot
+MolePatrol/DialoGPT-Medium-MoleBot
+TheDaydreamer/ricky
+BeamBee/DialoGPT-small-Lavenza
+Garsic/DialoGPT-medium-pecorine
+CallForEcho/DialoGPT-small-harrypotter
+BeamBee/DialoGPT-small-LavenzaNumTwo
+Meowren/MichaelScottBott
+shalpin87/dialoGPT-homer-simpson
+darthrussel/DialoGPT-small-homerbot-halfdata
+TheGoldenToaster/DialoGPT-medium-Woody
+bemich/DialoGPT-small-GeorgeCostanza
+AAAA-4/DialoGPT-small-player_03
+Teyronebigdick/DialoGPT-small-harrypotter
+Sammith/DialoGPT-small-miachael
+Nxtxn01/DialoGPT-small-harrypotter
+Teyronebigdick/DialoGPT-small-terrydavis
+mczolly/DialoGPT-small-the-doctor
+crazypegasus/GPT-JonSnow
+MrYiRen/DialoGPT-small-harrypotter
+TropicalJuice/Dialog-PeterGriffin
+TheGoldenToaster/DialoGPT-medium-Bot
+MrYiRen/DialoGPT-small-harrypotter2
+gulgulglut/DialoGPT-small-Rick
+trev/DialoGPT-small-MLP
+RAJESHNEMANI/Chatbot_AI
+lilapapazian/DialoGPT-small-harrypotter
+Alethea/GPT2-chitchat
+florentiino/DialoGPT-small-harrypotter
+NUTELEX/Eva
+jessicammow/DialoGPT-small-ronswanson
+MrYiRen/DialoGPT-small-ZC
+jessicammow/DialoGPT-medium-leslieknope
+AmbricJohnson5888/death
+AmbricJohnson5888/claura
+DarrellTimothy/DialoGPT-small-harrypotter
+RarePizzaDog/Apes_Bot
+iyedr8/DialoGPT-small-rick
+MEDT/ChatBot
+NonzeroCornet34/DialoGPT-small-hansolo
+NonzeroCornet34/DialoGPT-small-philbot
+atomsspawn/DialoGPT-medium-dumbledore
+florentiino/DialoGPT-small-rick
+ShibaDeveloper/DialoGPT-small-harrypotter
+sahilnare78/DialogGPT-medium-harrypotter
+Garsic/DialoGPT-medium-jill
+mdm/DialoGPT-small-Kanye
+ScyKindness/Hatsune_Miku
+aaaacash/DialoGPT-large-michaelscott
+AntoDono/DialoGPT-Harry
+BFMeriem/model
+BFMeriem/chatbot-model
+StringCheese/Dialog-small-bigbang
+jakewillms17/capcake-model
+Shivierra/DialoGPT-small-technoblade
+Scaprod/DialoGPT-small-arbiter
+Tlacaelel/DialoGPT-small-jarvis
+spuun/kekbot-beta-1
+Coma/Beter
+Wavepaw/DialoGPT-medium-WardenIngo
+Akarsh3053/potter-chat-bot
+MachineBabs/RickBot
+MachineBabs/DocBrown
+spuun/kekbot-beta-1-medium
+MEDT/Chatbot_Medium
+tosin/dialogpt_mwoz_idioms
+tosin/dialogpt_afriwoz_wolof
+aakhilv/tonystark
+spuun/kekbot-beta-2-medium
+xiaoGato/DialoGPT-small-villanelle
+Jonesy/DialoGPT-small_FG
+deathknight67/DialoGPT-medium-joshua
+kyriinx/DialoGPT-small-glyph
+Jonesy/DialoGPT-medium_FG
+spuun/kekbot-beta-3-medium
+Lisia/DialoGPT-small-connor
+awvik360/DialoGPT-medium-plemons-04262022
+Jonesy/LisaOnIce
+kvnaraya/DialoGPT-small-michael
+Hyperspace/DialoGPT-small-Hyperdrive
+Azuris/DialoGPT-medium-ekidona
+aditeyabaral/sonobois
+Jonesy/HomersNightOut
+Andrei0086/Chat-small-bot
+awvik360/UncleRuckus
+captainswiftfox/rickandmorty
+radicalrascal/DialoGPT-medium-jimmy
+dmoz47/DialoGPT-small-peterparker
+niprestige/GPT-small-DusabeBot
+Shakerlicious/DialoGPT-small-descentbot
+atomsspawn/DialoGPT-small-shelbot
+atomsspawn/DialoGPT-small-sheldon
+Willow/DialoGPT-medium-willow
+IsekaiMeta/dapprf
+farjvr/DialoGPT-small-Mortyfar
+InSaiyan/DialoGPT-small-harrypotter
+IsekaiMeta/dapprf3
+emolyscheisse/DialoGPT-small-mandybot
+IsekaiMeta/dapprf4
+qgdmonilla/DialoGPT-small-harrypotter
+NHStudios/DialoGPT-small-jake
+Shakerlicious/DialoGPT-small-raquelbot
+annasham/DialoGPT-small-myneighborTotoro
+CaptAdorable/RickBot
+Willow/DialoGPT-large-willow
+Kabutopusu/DialoGPT-medium-NITWMae
+HarmlessTarget/DialoGPT-medium-Bender
+soni69/DialoGPT-medium-holmes
+captainswiftfox/DialoGPT-small-rick
+kathywu/DialoGPT-small-kathy
+mybot/DialoGPT-medium-harrypotter
+Dedemg1988/DialoGPT-small-michaelscott
+pedrobaiainin/DialoGPT-small-harrypotter
+kathywu/DialoGPT-medium-kathy
+SNCannon/DialoGPT-medium-merc
+THE-DDLM/DialoGPT-sebastian
+fatirali/DialoGPT-medium-harrypotter
+TejasARathod/DialoGPT-medium-BatmanBot
+Varick/dialo-jarvis
+Robinsd/HarryBot
+dipstheman/DialoGPT-small-humanconversation
+dipstheman/DialoGPT-small-humanconversationpart
+LinkTheSinger/DialoGPT-small-Kanna
+LinkTheSinger/DialoGPT-small-Kannav4
+Robinsd/HarryBot4
+SomeRandomGuy/tony
+Meowren/HumanBot
+marcoperez/DialoGPT-small-rickandmorty
+LarsBell/DialoGPT-small-billyloomis
+okwach/mawaidhaChatbot
+LooksLikeIveLost/DialoGPT-medium-me
+okwach/mawaidhaChatbot2
+thebyy/DialoGPT-small-mortyisarick
+rongina/DialoGPT-small-cartman
+fransoa/arrombado-dms
+ionite/DialoGPT-medium-MarkAI
+ddrmaster1000/DialoGPT-medium-rick
+PeritusDux/DialoGPT-small-rick
+HomerChatbot/HomerSimpson
+t8oo/DialoGPT-small-zeni
+t8oo/DialoGPT-small-zenigata
+sexomq/DialoGPT-medium-TeoBot
+Char135/DialoGPT-medium-sebastian
+HomerChatbot/DialoGPT-small-HomerSimpson
+trev/Twilight-Sparkle
+gigikenneth/family-guy-bot
+ulises801/DialoGPT-medium-rick
+fujuta/DialoGPT-medium-HarryPotter
+fujuta/DialoGPT-medium-RonWeasley
+fujuta/DialoGPT-medium-HermioneGrander
+deepparag/Aeona-Beta
+HomerChatbot/DialoGPT-small-homersimpsonbot
+redcy/FrasierBotv1
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn
+natdon/DialoGPT_Michael_Scott
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v3
+deathmite/DiabloGPT-small-potaru
+ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v4
+DaBaap/Chat-Bot-Batman
+Iwa/bot
+badlawyer/DialoGPT-medium-sherlock-bot
+thanhchauns2/DialoGPT-medium-Luna
+jayklaws0606/DialoGPT-small-jayBot
+RUCAIBox/mvp
+Flem/DialoGPT-medium-alastor
+keans/DialoGPT-small-highjacker
+jayklaws0606/dgpt-small-jaybot
+CodeMaestro/DialoGPT-small-TChalla
+ElMuchoDingDong/AudreyBotBlenderBot
+stfuowned/rickfinal
+DuskSigma/DialogGPTHomerSimpson
+hireddivas/dialoGPT-small-sonic2
+N0NAne/DialoGPT-small-harrypotter
+tinkoff-ai/response-quality-classifier-tiny
+tinkoff-ai/response-quality-classifier-base
+tinkoff-ai/response-quality-classifier-large
+tinkoff-ai/response-toxicity-classifier-base
+RUCAIBox/mvp-open-dialog
+RUCAIBox/mtl-open-dialog
+RUCAIBox/mvp-multi-task
+Cirilaron/DialoGPT-medium-raiden
+BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch
+lucataco/DialogGPT-med-Rick
+lucataco/DialoGPT-medium-rafa
+gloomyworm/DialoGPT-small-ortho
+kozlovtsev/DialoGPT-medium-harrypotter
+Cirilaron/DialoGPT-medium-jetstreamsam
+lucataco/DialoGPT-medium-omar
+lucataco/DialoGPT-medium-milo
+daedalus2003/HouseBot
+SallyXue/DialoGPT-small-harrypotter
+Averium/DialoGPT-medium-TailsBot
+nlokam99/ada_sample
+nlokam99/ada_sample_2
+nlokam99/ada_sample_3
+nlokam/adanimals_V1
+spuun/kekbot-beta-4-medium
+quirkys/DialoGPT-small-harrypotter
+markofhope/DialoGPT-medium-HarringtonBot
+AntoDono/DialoGPT-Bopy-Alpha-1.01
+Hermite/DialoGPT-large-hermite
+robinhad/gpt2-uk-conversational
+Browbon/DialoGPT-small-LucaChangretta
+gloomyworm/DialoGPT-medium-ortho
+Browbon/DialoGPT-medium-LucaChangretta
+Fluffypillow/DialoGPT-small-Rem
+Hermite/DialoGPT-large-hermite2
+Bman/DialoGPT-medium-peppapig
+ZipperXYZ/DialoGPT-medium-TheWorldMachine
+AlyxTheKitten/DialoGPT-medium-AgedBlaine-2
+Averium/DialoGPT-medium-TailsBot1.1
+Elijah629/DialoGPT-mrsanai
+ZipperXYZ/DialoGPT-medium-TheWorldMachine2
+damianruel/DialoGPT-medium-MySon
+ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive
+Elijah629/DialoGPT-shrek
+AlyxTheKitten/DialoGPT-medium-Jimmis-2
+dennis-fast/DialoGPT-ElonMusk
+Sealgair/DialoGPT-medium-Eyden
+crystallyzing/DialoGPT-small-nishikiyama
+crystallyzing/DialoGPT-small-kiryu
+NikkiTiredAf/DialoGPT-small-billy2
+Evokus/DialoGPT-small-harrypotter
+mcimmy/DialoGPT-small-bob
+Laggrif/DialoGPT-medium-Luke
+Laggrif/DialoGPT-medium-3PO
+ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2
+prprakash/DialoGPT-small-TonyStark
+sexomq/TeoBot-Romanian-medium
+Bman/DialoGPT-medium-dora
+Hermite/DialoGPT-large-hermite3
+Averium/FabioBot
+arem/DialoGPT-medium-rickandmorty
+soProf1998/DialoGPT-small-chattyrick
+soProf1998/DialoGPT-medium-chattyrick
+Dorin/DialoGPT-small-Rick
+OptimalHoiboy/DialoGPT-small-kasumai
+Hartmann/DialoGPT-small-koishikomeiji
+Konbai/DialoGPT-small-akagi
+Konbai/DialoGPT-small-akagi2
+JazzyLucas/DialoGPT-small-TonyStark
+mystery/DialoGPT-small-pinkiepie
+sexomq/TeoBot-Romanian-medium2
+erikycd/chatbot_hadita
+infinix/Sheldon-bot
+JamesonSpiff/chatBot_test_model
+Akito1961/DialoGPT-small-C3PO
+Naturealbe/DialoGPT-small-Technoblade
+zR0clu/DialoGPT-medium-Mr.Roboto
+reso/DialoGPT-medium-v3ga
+trimox/tryingnewstuff
+Nakul24/YC_Bot
+casperthegazer/DiabloGPT-medium-lukedot
+JamesStratford/PLord-bot-DialoGPT-medium
+CaptPyrite/DialoGPT-small-cat
+SafeTorpedo/DialoGPT-small-MichaelBot
+brianveebee/DialoGPT-medium-bender
+myynirew/DialoGPT-medium-shouko01
+myynirew/2-0OKUOHS
+smmzhu/DialoGPT-medium-sam
+myynirew/shouko0-3
+myynirew/dumbbot
+Lamia/DialoGPT-small-Sundrop
+ashtrindade/chatbot-stacey
+tinkoff-ai/ruDialoGPT-small
+tinkoff-ai/ruDialoGPT-medium
+24adamaliv/DialoGPT-medium-Will
+cybertelx/DialoGPT-small-drunkic0n
+Rick-C137/DialoGPT-small-rick
+debyve/dumbbot
+Amir-UL/JimBot
+BoxCrab/DialoGPT-small-Strider
+AbdalK25/DialoGPT-small-TheWiseBot
+casperthegazer/DialoGT-gandalf-urdot
+pineappleSoup/DialoGPT-medium-707
+Nakul24/AD_ChatBot
+TeaTM/DialoGPT-small-bushcat
+ionite/DialoGPT-medium-NakaAI
+Creepton/DDLCYuri-DialoGPT-small
+TeaTM/DialoGPT-large-bushcat
+yazinga/DialoGPT-medium-scout
+throwaway112358112358/DialoGPT-medium-script
+Jingna/test_hpv_discord
+anonchickenlegs/sartoshi-bot
+xander-cross/DialoGPT-small-EvilMortyTheBot
+Bman/DialoGPT-medium-shrek
+Yank2901/DialoGPT-small-Rick
+akshatpandeyme/DialoGPT-small-manpreet
+Jenwvwmabskvwh/DialoGPT-small-josh444
+akshatpandeyme/DialoGPT-small-parthiv
+akshatpandeyme/DialoGPT-small-ParthivBot
+seeksery/DialoGPT-calig
+akshatpandeyme/DialoGPT-small-AnyaBot
+Jordine/shitter
+model-attribution-challenge/DialoGPT-large
+seeksery/DialoGPT-calig2
+obl1t/DialoGPT-medium-Jotaro
+trickstters/DialoGPT-small-evanbot
+trickstters/evanbot-gpt
+AriakimTaiyo/gpt2-chat
+Yank2901/DialoGPT-small-Harry
+lizz27/DialoGPT-small-baymax
+obl1t/DialoGPT-medium-Jolyne
+seeksery/DialoGPT-calig3
+Jenwvwmabskvwh/DialoGPT-small-josh445
+trickstters/evbot2
+Jenwvwmabskvwh/DialoGPT-small-josh450
+lizz27/DialoGPT-medium-BaymaxBot
+soop/DialoGPT-medium-BaymaxBot
+abelblue3/DialoGPT-medium-baymax
+priyankac/DialoGPT-medium-BaymaxBot
+Ironpanther1/Testing
+tosin/dialogpt_afriwoz_pidgin
+Anon25/DialoGPT-Medium-BaymaxBot
+GoldenRedstone/DialoGPT-medium-Phoenix-Wright
+Primobot/DialoGPT-small-harrypotter
+Lyem/LyemBotv1
+JamesSantosxx/DialoGPT-small-harrypotter
+Lyem/LyemBotv2
+Ironpanther1/ArtoriaBot
+Swervin7s/DialoGPT-medium-anakin
+DogH2O/DialoGPT-small-naruto
+NoPeanuts/DialoGPT-small-po
+Gravitygaming/homerai
+Lyem/LyemBotv3
+celine45688/LuTing
+antwortemir/shouko04
+SebastianS/MetalSebastian
+notaproblem00/DialoGPT-small-bakugou
+myodoctor/DIALOGPT-medium-HarryPotterBot
+aniketface/DialoGPT-medium-elon
+noiseBase/DialoGPT-small-HarryPotter
+karan21/DialoGPT-medium-rickandmorty
+karan21/DialoGPT-medium-guin
+Sophiejs/DialoGPT-small-BlaineBot
+skouras/DialoGPT-small-swda
+skouras/DialoGPT-small-maptask
+TheodoreAinsley/LindaGold
+AlbedoAI/DialoGPT-large-Albedo
+AlbedoAI/DialoGPT-large-Albedo2
+willmay/DialoGPT-medium-will
+AlbedoAI/DialoGPT-medium-Albedo
+chulainn/DialoGPT-medium-Zuko
+ctoner2653/DialoGPT-medium-RickBoty
+Number4/DialoGPT-medium-harrypotter
+yummyhat/DialoGPT-small-spike
+EllyPony/flutterbot
+Suryansh-23/DialoGPT-small-MichaelScottOffice
+Cirilaron/DialoGPT-medium-vergil
+Izuuk/izuuk
+shungyan/Diablo-small-harrypotter
+bhavyasharma/DialoGPT-small-harrypotter
+nintwentydo/rickbot
+tylersfoot/DialoGPT-medium-rick
+EJoftheVern/DialoGPT-medium-shaggy
+xtraXpert/DialoGPT-small-RickAndMorty2
+ANIKEThash/DialoGPT-medium-character
+Noonw/DialoGPT-small-hijackersexurmom
+fat32man/elon_answers
+MinhP/DialoGPT-small-themis
+Noonw/DialoGPT-small-osamaflyplane
+Noonw/DialoGPT-small-ladenflyplane
+Noonw/DialoGPT-small-ladenonjet
+MinhP/DialoGPT-small-franco
+Karan59/DialoGPT-small-evaModel
+marblyso/DialoGPT-medium-marblesbagel
+Jojo17/DialoGPT-small-RickAndMorty
+deseipel/medium-LucyClarke_
+DiscordBackup/model0000
+SirSpiffy/IvanModel
+woodmtaylor/DialoGPT-small-Heej
+woodmtaylor/DialoGPT-medium-Heej
+OctaviusI/marisaV0
+ChloeMJM/DialoGPT-small-rick
+JDesignEra/DialoGPT-small-Anya
+MrE/DialoGPT-medium-SARGER4
+aarya-c111/DialoGPT-small-Rogers
+bozlucas/DialoGPT-medium-HermioneBot
+LasseVKP/DialoGPT-Mogens
+metaloopa/DialoGPT-medium-Rintaro
+ingen51/DialoGPT-medium-GPT4
+Divyesh/DialoGPT-medium-harrypotter
+Natsuki-Chan/DialoGPT-medium-luz
+akira2001/DialoGPT-medium-harrypotter
+osueng02/DialoGPT-small-STAN_BOT
+osueng02/DialoGPT-medium-STAN_BOT
+wormed/DialoGPT-small-denai
+RehanP123/DialoGPT-medium-kermit.old
+Nakul24/SM_Bot
+chulainn/DialoGPT-medium-Ragnar
+aniketface/DialoGPT-product
+shohanursobuj/DialoGPT
+marblyso/DialoGPT-medium-hero
+marblyso/DialoGPT-medium-kel
+marblyso/DialoGPT-medium-aubrey
+akil191/small-test-harryakakakaka
+sanpellegrino/CoryBot
+Arqhero/DialoGPT-small-adventuretime
+chulainn/DialoGPT-medium-Tyrion
+VTG/MentalHealthChatbotv1
+luminolblue/HomunculusGPT-testbot
+Paulina354/DialoGPT-small-rickandmorty
+khuranagarvit019/MentalHealthChatbot
+VirtualizedTrash/Chatbot
+pedrocaribe/DialoGPT-medium-LL
+queenaccila/DialoGPT-small-kashiwagi
+GarfExit/DialogGPT-medium-707
+marblyso/DialoGPT-medium-shepherd
+Spectre29/DialoGPT-small-Kaisa
+Spectre29/Kaisa-converse-model
+ZedTheUndead/Rick_fragment
+marblyso/DialoGPT-medium-mari
+Delicious/DialoGPT-small-harrypotter
+BBHKR/DialoGPT-small-jacksparrow
+Guwon/DialoGPT-small-Quincy
+epeicher/DialoGPT-small-homer-2
+timmychanga/DialoGPT-small-ashley
+mywateriswet/ShuanBot
+epeicher/DialoGPT-small-flanders
+Super-McTea/DialoGPT-small-McTea
+Eronzin/meuBotzindoEron
+Techdra/DialoGPT-large-theboy
+Eronzin/DialoGPT-small-Frodo
+gtgillott/gib
+AwesomeDWNJ/EmiBot
+CJ3/DialoGPT-medium-amber3
+GamerMan02/DialoGPT-medium-gamerbot2
+GamerMan02/DialoGPT-medium-gamerbot1
+Insomnic/DialoGPT-small-harrypotter
+Super-McTea/DialoGPT-small-McTeaV2
+FelipeJoazeiro/chatbot-morty
+microsoft/GODEL-v1_1-base-seq2seq
+microsoft/GODEL-v1_1-large-seq2seq
+Rencist/DialoGPT-small-rick
+scorpiofrens/DialoGPT-medium-ergon
+somemusicnerdwoops/DialoGPT-small-shadow
+powchang/DialoGPT2-medium-CAiFE
+ratneshrt/DialoGPT-small-Artico
+somemusicnerdwoops/DialoGPT-distilgpt2-sonicfandub
+Tsec-Research/DialoGPT-chandler-penny
+neonon/DialoGPT-medium-cloy
+ddae208s/DialoGPT-small-dimitri
+mossfarmer/VRANAK
+Matax/Aristrathor3000
+brownanchovy/Harry
+Overlrd/DialoGPT-small-cartman
+epeicher/DialoGPT-large-homer
+comradesocrates/DialoGPT-medium-stranger
+Rakublu/DialoGPT-small-yasuo
+neonon/DialoGPT-medium-htccc
+Alt41r/gpt-simpson
+Nimit-Jjw/DialoGPT-chandler-penny
+Quoc123/DialoGPT-small-AQUA
+marblyso/DialoGPT-medium-pearl
+estus2/rick-superu-rick2
+marblyso/DialoGPT-medium-marina
+rovenmusic/DialoGPT-small-melodybot
+deseipel/small-LucyClarke_
+rovenmusic/DialoGPT-small-melodybotv2
+rovenmusic/DialoGPT-small-melodybotv3
+epeicher/DialoGPT-medium-homer
+andrewkroening/GalaxyFarAway-DialoGPT-HanSolo
+nams/nams-bot
+Nicktherat/DialoGPT-medium-endella
+alfirsaafauzulh/DialoGPT-small-KamuiBastion
+rovenmusic/DialoGPT-small-melodyv10
+somesh212/Harry_Potter-BOT
+somesh212/Harry_Potter_botDialoGPT_Som2
+jmagine/DialoGPT-small-metahead
+somesh212/Harry_Potter_botDialoGPT_Som3
+rovenmusic/DialoGPT-small-melodyvfinal
+jmagine/DialoGPT-small-jmagine
+jmagine/DialoGPT-small-funded
+jmagine/DialoGPT-small-jimj
+andrewkroening/GalaxyFarAway-DialoGPT-LukeSkywalker
+andrewkroening/GalaxyFarAway-DialoGPT-Threepio
+andrewkroening/GalaxyFarAway-DialoGPT-Vader
+andrewkroening/GalaxyFarAway-DialoGPT-LeiaOrgana
+andrewkroening/GalaxyFarAway-DialoGPT-Yoda
+Wizardd/DialoGPT-small-sheldon
+BenKJH/DialoGPT-small-lucybotasg
+Ananjas/AwooAI
+Ananjas/AwooV2
+kookyklavicle/gpt-sean-diaz
+kookyklavicle/SeanDiazBot2
+Ananjas/AwooV3
+Overlrd/DialoGPT-medium-cartman
+Ananjas/AwooV6
+mathecas/HarryPotterBotAI
+Karina256/DialoGPT-small-dory
+Tony8657/DialoGPT-small-TonyStarkBot
+SebastianS/my_mim
+TFS668/DialoGPT-small-Rick
+redhoff/DialoGPT-Medium-RedBot
+FeriVOQ/DialoGPT-small-joshua
+Triobloid/DialoGPT-small-lianaharrypotter
+quinnzie/DialoGPT-small-sinister
+FarziBuilder/DialoGPT-medium-harrypotter
+sohampatil/DialoGPT-small-mentalchatbot
+gtkarber/DialoGPT-medium-columbo
+PaddlePaddle/plato-mini
+Junkan/DialoGPT-medium-Bilbo
+ThatSkyFox/DialoGPT-medium-whatsapp
+Ar4ikov/DialogAgentGPT2
+reallygoodtechdeals/Bingocat-ai-Dialo-GPT-medium
+thmauler/crashed
+OptionaI/DialoGPT-small-beepboopy
+davebathhews/DialoGPT-OTIS
+GGOM/SipBotGGOM
+davebathhews/DialoGPT-OTISBOT
+GGOM/WillBotGGOM
+GGOM/ElyasBotGGOM
+reallygoodtechdeals/steve-ai-Dialo-GPT-medium
+Crushtoe/DialoGPT-small-vangluss
+apotempest/DialoGPT-medium-geralt
+DiogoSabec/DialoGPT-small-joshua
+WaleedArif/DialoGPT-small-Micheal
+Crushtoe/DialoGPT-medium-vangluss
+Crushtoe/GODEL-v1_1-base-seq2seq-vangluss
+DiogoSabec/BOT
+Le033/DialoGPT-small-rickmorty
+Filosofas/DialoGPT-medium-PALPATINE2
+JadansTower/jobot
+NTMNathan/DialoGPT-small-harrypotter
+Ashypaws/DialoGPT-medium-Ashybot
+wmdosborne/DialoGPT-medium-kyritebot
+worms3402/DialoGPT-small-automata2
+Pi3141/DialoGPT-small-elon
+Grendar/Dialo-GPT-medium-shiro
+Pi3141/DialoGPT-medium-elon
+Pi3141/DialoGPT-medium-elon-2
+JoshuaPawlik/DialoGPT-medium-joshua
+Pi3141/DialoGPT-medium-elon-3
+josephthen3320/DialoGPT-small-walter
+robbiegwald/Rick
+Gurtej/Drbot
+Hereward/DialoGPT_medium_ObiWan_Kenobi
+Giu888/DialoGPT-small-sao
+Grendar/blenderbot-400M-distill-Shiro
+keeg8/Book-0-1500
+keeg8/Book-1500-1700
+keeg8/Book-1850-1900
+keeg8/Book-1700-1850
+karlreimond/DialoGPT-small-harrypotter
+lenartlola/SpongeBob
+lenartlola/rick-bot
+Deedlit/DialoGPT-small-southpark
+babylasagne/DialoGPT-small-narryuto
+babylasagne/DialoGPT-small-harry
+babylasagne/DialoGPT-small-spider
+babylasagne/DialoGPT-small-batman
+BradHeffernan/rickNmortyModel
+UmUDev/DialoGPT-medium-AlexVN
+ukikunz/gas-kenji-medium
+ukikunz/gas-kenji
+Isokeel/DialoGPT-medium-KMbot
+KakoSi/AcciGPT-smol
+Spoofed/DiabloGPT-small-peter
+sophiadt/DialoGPT-medium-707
+UmUDev/DialoGPT-medium-Alex
+PygmalionAI/pygmalion-350m
+sophiadt/DialoGPT-medium-reigen
+rexfi/DialoGPT-small-peter
+rexfi/NafezBot-DialoGPT
+caps1994/chris-bot
+rexfi/RickyBot
+allenai/cosmo-xl
+woodmtaylor/DialoGPT-large-Dumpling
+rexfi/MikeScottBot
+apfallinus/RickBot
+apfallinus/HarryBot
+apfallinus/MedBot
+apfallinus/AeonaBot
+apfallinus/BatmanBot
+apfallinus/AiBot
+LostXOR/TotallyNotARobot
+gachaddict/DialoGPT-medium-ike
+OctaviusI/staging
+PygmalionAI/pygmalion-1.3b
+Terrymir/DialoGPT-medium-Soraka
+SantiPingui58/DialoGPT-small-hika
+ss1612/montana-chat
+MrEmpty/DialoGPT-small-rickandmorty
+shikiskhakis/DialoGPT-small-blackdoom
+alexandreteles/GPTChizuru
+Chae/scottbot_med
+AhmedMostafa/DialoGPT-small-Rick
+metkoon/30dollarceo
+Dinocroth/DialoGPT-medium-Trevor-PhilipsV2
+metkoon/MatBot
+SmallQ/DialoGPT-small-Anya
+bigbossa/aiko6
+GK123/DialoGPT-medium-hanbot
+TheHappyDrone/DialoGPT-medium-salesman
+Pcik/DialoGPT-medium-Jaiden
+TheHappyDrone/DialoGPT-medium-Nexus-Nova
+Pcik/DialoGPT-medium-Dante
+AlmightyDeathCheater/DialoGPT-medium-harrypotter
+Pcik/DialoGPT-medium-Kirby
+Starry/COUNTNARC
+TheHappyDrone/DialoGPT-medium-Nexus-Nova-turing-v2
+wetwoteraq/DialoGPT-medium-aqua
+wetwoteraq/DialoGPT-small-peter
+wetwoteraq/DialoGPT-medium-peter
+lilexo2/DialoGPT-medium-Monica
+momo10/DialoGPT-small-harryPotter
+Antale123/ConorBot
+shikiskhakis/DialoGPT-small-xemnas
+Ecook/DialoGPT-medium-Ecook
+PygmalionAI/pygmalion-2.7b
+FowlerF/DiscordChatBot
+JoeRoganfan-69420/DialoGPT-medium-HarryPotterbot
+dusty310/DialoGPT-medium-Misaki
+Gurtej/Drbot2
+Gurtej/Drbot3
+Gurtej/Drbot4
+Gurtej/Drbot5
+Gurtej/Drbot6
+Gurtej/Drbot7
+Gurtej/Drbot8
+Gurtej/Drbot9
+PygmalionAI/pygmalion-6b
+Gurtej/Drbot11
+navygup/Mood-Tracker
+Maraslumunnus/DialoGPT-small-ivern
+DAS9051/BatemanChatBot
+SmallQLALA/DialoGPT-small-Anya
+RinkaDev/GPT-Peppa-Pig
+thu-coai/blenderbot-1B-augesc
+siyaT/DialoGPT-harrypotter-small
+keircare/DialoGPT-small-RickSanchez
+shiiiroe/DialoGPT-medium-kirito
+jdakillah/Rick
+kielljoy/DialoGPT-small-stupidspecialkay
+Ashypaws/DialoGPT-medium-Kitaibot
+jdakillah/RICK-V2
+jdakillah/Bender
+jdakillah/Generalbot
+kielljoy/DialoGPT-medium-ryanbot
+emre/spanish-dialoGPT
+vuminhtue/DialoGPT-large-HarryPotter3
+ralphsorz/DialoGPT-small-samwise
+SumYin/DialoGPT-small-Homer
+JamesRoy/DGPT-DC
+Blizzchor/DialoGPT-medium-HarryBotter
+gjhghjk/rick
+gjhghjk/rick2
+SumYin/ZeroTwo-Medium-DialoGPT
+Blizzchor/DialoGPT-medium-gamora
+Mydia2/DialoGPT-small-Flonnealive
+AL-CT/DialoGPT-small-slayer
+DhruvShek/Webraft-Ai
+arno2077/DiabloGPT-small-harrypotter
+keyonecs/fourept-debique-gpt
+Blizzchor/DialoGPT-medium-QuillLord
+callmeclover/Stinger-CONVRS_MODL
+aminFelah/DialogueGPT-very-small-harryPotter
+Keijuro/aeris-dialogpt
+Abdelrahman853/DialoGPT-small-echo
+Bearfoot/DialoGPT-medium-shrek
+arthme2/jay
+arthme2/DialoGPT-medium-Jay
+42meow/DialoGPT-medium-42meow
+Peeepy/Evie
+HorniFolks/Unicorn
+waifu-workshop/pygmalion-6b
+agenttylostudios/DialoGPT-small-Bocchi
+GregariousJamie/DialoGPT-small-jamie
+Fuwaguwa/DialoGPT-Medium-AzurLaneMusashi-v8
+s3nh/DialoGPT-large-Rick
+s3nh/DialoGPT-large-Morty
+s3nh/DialoGPT-small-morty
+Givinghawk/GPT-Morty
+DhruvShek/swearbot
+grart/DialoGPT-small-gillion
+interpixle/Sir_Caladan
+s3nh/DialoGPT-tony-montana
+s3nh/DialoGPT-small-harry-potter-goblet-of-fire
+s3nh/DialoGPT-small-hermione-granger-goblet-of-fire
+s3nh/DialoGPT-small-woody-toy-story
+s3nh/DialoGPT-small-buzz-toy-story
+puj0/DialoGPT-small-joshua
+julianvd49/DialoGPT-medium-EllieBot
+Sreyas/DialoGPT-small-elit
+DiscordRequestsAPI/DialoGPT-medium-NurDeeps
+MarinHinawa/DialoGPT-medium-Ene
+polandball/polanball
+whoami24142/DialoGPT-small-padilha
+DiscordRequestsAPI/NurDeeps-Bot
+Vaibhav-rm/GPT2-Shri-v1
+chrisrowles/DialoGPT-small-chrisrowles
+espeon98/DialoGPT-kenny-bot
+espeon98/DialoGPT-kenny-bot-2
+polandball/GPT-Polen
+chrisrowles/DialoGPT-medium-chrisrowles
+DiscordRequestsAPI/NurDeeps-Bot-2
+steerevo88/DialoGPT-small-baiken
+akiFQC/japanese-dialogpt-small-aozora
+Ngao/DialoGPT-small-ngao
+Mineroero/DialoGPT-medium-M4SOPMOD
+simple2312/DialoGPT-nayeon
+nemowet88/DialoGPT-small-ricktest
+Abraxas3d/house
+vampiregirl/DialoGPT-medium-lennoxram
+aisingapore/coherence-momentum
+simple2312/DialoGPT-Ellie
+simple2312/DialoGPT-Twice
+testaws/DialoGPT-small-joshua
+nemowet88/output-pythia-test
+Gurtej/Drbot12
+Gurtej/Drbot13
+Gurtej/Drbot14
+Gurtej/Drbot16
+EZSNoVa/DialogGPT-medium-NoVa
+mattallio/Archivist-medium-dialoGPT
+rlatt/DialoGPT-small-RickSanchez
+Lyforth/DialoGPT-Medium-Maribelle
+kittenwhiperer/Deadpool
+KumquatJoe/DialoGPT-medium-MaleToucherBot
+lmkhoa/GODEL_base_model
+JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023
+LrxLcs/DialogGPT2-SMAL
+Delcos/internal_chat_model_e2
+euvu/DialoGPT-small-harrypotter
+LrxLcs/GPT2-V2
+LrxLcs/GPT2-Test
+euvu/euvu-rickbot
+Weeeeeeeeeeeee00/DialoGPT-small-harrypotter
+slyslasher24/DialoGPT-Medium-Pondweed
+slyslasher24/DialoGPT-Small-Pondweed
+bradydawg/AI-Bot2
+aisingapore/rumour-detection-twitter
+RatInChat/Pilup7575
+rlatt/DialoGPT-large-RickSanchez
+Kira225784/Klarabot-test
+bigbossa/DialoGPT-small-aikogirl
+sckova/DialoGPT-small-joshua
+sckova/DialoGPT-medium-joshua
+sckova/DialoGPT-medium
+Beltenebros/DialoGPT-small-PerionOfGaul
+Byteno/DialoGPT-medium-glamrockfreddy
+audreycl/audreycl-testagain
+aisingapore/Lif3WayAp
+audreycl/DialoGPT-RoyalPurpleFish
+audreycl/DialoGPT-RPF
+Axelajs26/DialoGPT-small-alicetendou
+Noohance/DialoGPT-medium-noohbot
+Draptor/DialoGPT-small-coolco
+David042/DialoGPT-LucasBot
+Hobospider132/DialoGPT-Mahiru-Proto
+Draptor/DialoGPT-medium-moto
+aisingapore/SPANBert
+JYBX/DialoGPT-small-Penny
+JYBX/DialoGPT-small-Pennybot
+aisingapore/RoBERTa-base
+JYBX/DialoGPT-small-Amybot
+LuckyBor11/Figure
+FlyingGrayson0304/Gandalf-stupid-version
+BlinksFly/Harry_Potter-Ai
+PhilipN/DialoGPT-small-KeqingBot
+YTTD/DialoGPT-medium-sou
+PhilipN/DialoGPT-large-KeqingBot
+YTTD/DialoGPT-medium-souv2
+keonju/chat_bot
+MysteriousAmazon/DialoGPT-medium-alastor
+mICHPl/MINI_AI
+rlatt/DialoGPT-large-King-James-Bible-test
+v3nom1704/DialoGPT-small-potterbot
+Techcs002/DialoGPT-medium-AboTalkTest
+MysteriousAmazon/DialoGPT-medium-freddy
+ICAMPB204/DialoGPT-small-HarryPotter
+kelvinhang/DialoGPT-medium-badguy
+tatsumis6/MonikaAI
+kennethhendricks/DialoGPT-medium-PowPowGaming-Gen1
+rlatt/DialoGPT-large-King-James-Bible-test-accurate
+kennethhendricks/DialoGPT-medium-PowPowGaming
+kelvinhang/DialoGPT-medium-badguy2
+zami0011/qqpbksdj
+vladiyudi/Morty-data
+RazaK18/DialoGPT-small-harrypotter
+comradesocrates/DialoGPT-large-io
+kelvinhang/DialoGPT-medium-okakoro
+Monchic/chatwithkani
+zami0011/rickdick
+CallMeJeremy/DialoGPT-medium-THREEPIO
+Leomas/DialoGPT-medium-Leomas
+RehanP123/DialoGPT-large-kermit
+shahules786/Safetybot-T5-base
+huolongguo10/CDial-GPT2-LCCC-Base-copy
+yashR4J/TyrionBOT
+TakoIsATaco/DialoGPT-small-ShinAI
+MrLamBam/DialoGPT-medium-LUKEBot
+Zeda/DialoGPT-Medium-ZedaBot
+princedream/DialoGPT-small-harrypotter
+shahules786/Safetybot-mt5-base
+xiaomengdotcom/Chatgpt-harryP
+ProtonPLUS/Colab
+YTTD/DialoGPT-medium-saf
+jasondubon/HubermanGPT-small-v1
+YTTD/DialoGPT-medium-safv2
+YTTD/DialoGPT-medium-safv3
+kennethhendricks/DialoGPT-medium-jared-hendricks-gen1
+Cohee/pygmalion-6b-pyggyback-v6_40_v8p4_60
+DiogenesGois/DialoGPT-medium-Rick
+LordDanielDE/DialoGPT-medium-Hina
+ITG/DialoGPT-medium-spanish-chitchat
+kemsa51/DialoGPT-medium-cartman
+Mogwhy/DialoGPT-medium-Arrobot
+nRuaif/Pyg6B-V8P2
+Seer-luma/DialoGPT-small-SeerBot
+Dinoloverwii/DialoGPT-Sachibot
+flayeddie/Mike
+wooldover/krautbot
+kielljoy/DialoGPT-small-k
+WAHCLAN/DialoGPT-Medium-DAN
+ss1612/loki-chat
+IceBruhOne/mytestcharacter
+wooldover/pygbot
+IceBruhOne/DialoGPT-medium-subjectai
+YukioKoito/DialoGPT-small-ozua
+gaytrimoh/DialoGPT-small-harrypotter
+YukioKoito/DialoGPT-small-doog
+IceBruhOne/DialoGPT-medium-subjectai2
+custads23/DialoGPT-medium-aubrey
+HaHaMagpie/DialoGPT-small-phineas
+Carslo45/DialoGPT-medium-ddlc-monika
+zl111/ChatDoctor
+MarinHinawa/DialoGPT-medium-haruka
+custads23/DialoGPT-medium-basil
+IceBruhOne/DialoGPT-medium-complexai
+MarinHinawa/DialoGPT-medium-Shintaro
+jlsalty9999/DialoGPT-medium-Riddle
+custads23/DialoGPT-medium-mincy
+Wtfsquad/DialoGPT-small-pulpfictionVincent
+ss1612/erika-chatv4
+WAHCLAN/DialoGPT-Large-DAN
+Speedemon/jake-peralta-ai
+Speedemon/cobalt
+DeliveryBoy/DiabloGPT-medium-Kurisu
+AbbyRhea/DialoGPT-small-adrienbot
+monish162/kirthin-waifuu
+janna42/DialoGPT-small-phoenix
+AbbyRhea/DialoGPT-medium-AA
+FrozenSmoothie/DialoGPT-medium-star
+Fizi12341/astro_bot1234
+stiGGy/DialoGPT-medium-raymond
+patthebaker45/DialoGPT-small-Carlbot
+r4k4n1/DialoGPT-small-joshua
+Sukul/DialoGPT-small-Harsabot
+Sukul/DialoGPT-small-Harsabot1
+hihihotdog/DialoGPT-bot
+LarsJonasson/pythia-1.4b-deduped-sft-swedish
+mayaeary/pygmalion-6b-4bit-128g
+mayaeary/pygmalion-6b_dev-4bit-128g
+Inhaexpress/DialoGPT-medium-paimon
+sanyasna517/DialoGPT-medium-Zhongli
+StephenBrink/DialoGPT-small-will
+StanleyRoberts/Nix
+boudchicha/soluzione
+mayaeary/PPO_Pygway-V8p4_Dev-6b-4bit-128g
+ToborWinner/DialoGPT-medium-jolly
+mayaeary/PPO_Pygway-6b-Mix-4bit-128g
+ayushutkarsh/t3
+Inhaexpress/DialoGPT-medium-paimon2
+eepyblanky/DialoGPT-medium-malina
+eachadea/legacy-ggml-vicuna-13b-4bit
+eachadea/ggml-gpt4-x-alpaca-13b-native-4bit
+totallynotbrent/brotGPT
+Inhaexpress/DialoGPT-medium-harry_potter_ps
+robintan66/DialoGPT-small-harrypotter
+MajorCrayon7047/MadboneAssistantGPT-2
+VennuT/DialoGPT-medium-Alphinaud
+triple777/annicebot
+totallynotbrent/aaronGPTalpha
+Plaaasma/gerald-model
+yashugupta786/bart_large_xsum_samsum_conv_summarizer
+eachadea/legacy-ggml-vicuna-7b-4bit
+ColtonAi/Llmtrain
+ColtonAi/Chem4
+IchtacaKemeRaz/favabean
+Stromello/DialoGPT-medium-ZeroTwo
+totallynotbrent/brotGPTplus
+storminstakk/Stormin-Stakk
+ToddGoldfarb/Cadet-Tiny
+aghelan3/eggIncubationRepo
+hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_1024-1b7
+JosephusCheung/Guanaco
+raymondho/DialoGPT-small-harry
+Capitalist/DialoGPT-small-rick
+gfgddfg/DialoGPT-small-qiu_chat
+eachadea/ggml-toolpaca-13b-4bit
+CNR223/DialoGPT-small-MasterO
+Abigaming75/Bot_wa
+pranitrai07/DialoGPT-medium-harrypotter
+IlyaGusev/saiga_7b_lora
+Ancestral/Dolly_Shygmalion-6b-4bit-128g
+Ancestral/PPO_Shygmalion-6b-4bit-128g
+wyskiski/winonabot
+hcpwr/DialoGPT-medium-samantha
+Roguwan/DialoGPT-medium-rogu
+totallynotbrent/aaronGPTplus
+Ancestral/Dolly_Malion-6b-4bit-128g
+vantozdad/DialoGPT-medium-Dumbledore
+Abyss-fyf/DialoGPT-small-discord
+CrystalzAura/DialoGPT-small-elysia
+eachadea/ggml-gpt4all-7b-4bit
+inu-ai/alpaca-guanaco-japanese-gpt-1b
+Husnul/pepper-bot-morty
+TheBloke/vicuna-13B-1.1-GPTQ
+CRD716/ggml-vicuna-1.1-quantized
+4bit/pygmalion-6b-4bit-128g
+Reaver1092/DialoGPT-small-bones
+Ibnelaiq/Makise-Amadeus-Kurisu-small
+inu-ai/dolly-japanese-gpt-1b
+clawrex/DialoGPT-medium-walt
+IlyaGusev/saiga_13b_lora
+Zeda/DialoGPT-Large-ZedaBot
+Ibnelaiq/Makise-Amadeus-Kurisu
+Jaxon/DialoGPT-medium-kirito
+glitchie/bb
+Aqua002/DialoGPT-small-deadpool
+Aqua002/discord-chatbot
+lemoniada/Przembot
+Avitas8485/Dialogpt-small-v1
+Jprafol/DialoGPT-large-ARCHIBot
+Jprafol/DialoGPT-large-ARCHIBotV2
+spitfire4794/ben-ultra
+IlyaGusev/saiga_30b_lora
+NbAiLab/nb-gpt-j-6B-norpaca
+winglian/vicuna-self-reflect-13b
+0x044/test-1
+0x044/dgpt
+ss1612/erika-chatv6
+TestingCoder463632/DialoGPT-small-palpatine
+Blizzchor/DialoGPT-medium-BarryB
+sasha0552/pygmalion-6b-f16-ggml
+kavindu999/BetterEnglishGPT-v1
+kavindu999/BetterEnglishGPT-v2
+EnterNameBros/DialoGPT-small-FoxySan
+OrientalDude/DialoGPT-medium-GOKU
+Avitas8485/Dialogpt-medium-v1
+finex/pfe-mohamed-Harry
+Avitas8485/Dialogpt-medium-finetuned
+psyamk/DialoGPT-small-harrypotter
+Jamesonn/DialoGPT-small-jumin
+CNXT/CNXT
+Ilangraterol/Dataset_model
+IlyaGusev/saiga_30b_ggml
+Locutusque/gpt2-conversational-or-qa
+TrippingFollowing39/AMOGUS
+moomoomer/DialoGPT-medium-garfield
+PygmalionAI/pygmalion-7b
+Viperxyz/DialoGPT-small-Cartman
+Neko-Institute-of-Science/pygmalion-7b
+TehVenom/Pygmalion-7b-Merged-Safetensors
+BiaDd/DialoGPT-medium-Punko
+NewBreaker/chatglm-6b-int4
+TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors
+TehVenom/Pygmalion-7b-4bit-Q4_1-GGML
+userzyzz/piggySharded
+steinhaug/models-bck
+blueberrycheesecake/DialoGPT-small-misssophie
+Imablank/P1GM4L10N-7B-MERGED_WEIGHTS
+MrToast/idk
+SouroJ/DialoGPT-medium-Mordecai
+sasha0552/pygmalion-7b-bf16
+swajan/DialoGPT-small-Trail-1
+RobiKenobi/DialoGPT-medium-pete
+sasha0552/pygmalion-7b-f16-ggml
+sasha0552/pygmalion-7b-f16
+winglian/llama-adapter-13b
+MatLumber/Bisho
+iconical/MortyChatbotAI
+swajan/Trail-1
+swajan/Trail-2
+Misfit2/DialoGPT-large-Sonic
+ToddGoldfarb/Cadet-Medium
+ajpieroni/DiabloGPT-medium-medea
+AliiaR/DialoGPT-medium-empathetic-dialogues
+Chun121/ChocolaChat
+lemoniada/kicerobot
+Kazeyami-o7/DialoGPT-medium-beterbiffin
+Elucia/Diluc_Bot
+Elucia/Diluc_Bot_1.1
+Elucia/Diluc_Bot_1.2
+neurofumo/DialoGPT-small-joshua
+Elucia/Diluc_Bot_1.3
+GraphicStylz/Stylz
+naybiblu/ChizuruBot
+calvindoingstuff/DialoGPT-medium-luffy
+xZephy/DialoGPT-small-HelperBot
+crazywombat/DialoGPT-small-abandonware
+anshengli2/DialoGPT-small-counter-hate
+sephwalker3/piggy-7b
+apricxty/DialoGPT-small-chatbot
+leadmaister/langchain-prompt-master
+Covriar/DialoGPT-med-kiryu
+yesuns/DialoGPT-small-yesun
+davidviriato/DialoGPT-small-joshua
+VMware/open-llama-0.3T-7B-open-instruct-v1.1
+prabhguron/DialoGPT-small-harrypotter
+xHexyy/small-test
+malteos/bloom-6b4-clp-german-oasst-v0.1
+Pcik/DialoGPT-medium-Ruby
+sasha0552/pygmalion-7b-q4_0-ggml
+sasha0552/pygmalion-7b-q4_1-ggml
+sasha0552/pygmalion-7b-q5_0-ggml
+sasha0552/pygmalion-7b-q5_1-ggml
+sasha0552/pygmalion-7b-q8_0-ggml
+rjorg543/DialoGPT-small-ben
+eachadea/ggml-gpt4-x-vicuna-13b
+Tlethal/DialoGPT-small-harrypotter
+xHexyy/test2
+xHexyy/test3
+ldilov/stablelm-tuned-alpha-7b-4bit-128g-descact-sym-true-sequential
+AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token
+jun-ai/BeethovenBot
+channashi/DialoGPT-small-rocket
+biscuitbutb/biscuitbot-dialogpt-model
+ytrbqrkflbvbhy/DialoGPT-small-me-rus
+Pruz0/VescGPT
+IlyaGusev/saiga_7b_ggml
+IlyaGusev/saiga_13b_ggml
+TechTay/DialoGPT-small-Luciano
+BlackBull/yeet
+WAHCLAN/DialoGPT-Medium-SAM
+MistyIce/dialog-gpt-Heshan
+Pruz0/LennGPT
+Wanfq/MAKER-mwoz-full-kb-t5-base
+Wanfq/MAKER-mwoz-full-kb-t5-large
+Wanfq/MAKER-smd-condensed-kb-t5-base
+Wanfq/MAKER-smd-condensed-kb-t5-large
+Wanfq/MAKER-camrest-condensed-kb-t5-base
+Wanfq/MAKER-camrest-condensed-kb-t5-large
+Wanfq/MAKER-camrest-full-kb-t5-base
+Wanfq/MAKER-camrest-full-kb-t5-large
+Wanfq/MAKER-mwoz-condensed-kb-t5-base
+Wanfq/MAKER-mwoz-condensed-kb-t5-large
+raphaman/test
+Pruz0/HaLLGPT
+Binaryy/blender-bot-distill-finetuned
+alex297/DialoGPT-small-sparky
+Pruz0/GeoGPT
+Pruz0/PruzGPT
+dorkai/pygmalion-2.7b
+ikocx-to24/DialoGPT-medium-plankton
+th3d4nk/llamaModel1
+PygmalionAI/pygmalion-13b
+TehVenom/Pygmalion-13b-Merged
+ivaan01/TFG-Mauri
+alex297/DialoGPT-medium-fox
+Crataco/Pygmalion-1.3B-GGML
+SaintMcMuffins/DialoGPT-small-brain2.0
+dujade18/DialoGPT-medium-dwightoffice
+TehVenom/Pygmalion-13b-8bit-GPTQ
+helloerikaaa/chandlerGPT
+SaintMcMuffins/Brain2.1
+kb2c37g/DialoGPT-small-Rick
+alex297/DialoGPT-small-fox
+TeraSpace/dialofrednocontext
+EnterNameBros/DialoGPT-small-Senko
+EnterNameBros/DialoGPT-small-Senko-san
+4bit/pyg-7b
+EnterNameBros/DialoGPT-small-Senko-san-ver
+Lumiras/rachbot
+kevintest1234/DialoGPT-small-harrypotter
+EnterNameBros/DialoGPT-small-Senko-san-ver-2
+EnterNameBros/DialoGPT-large-Senko-san-ver-2
+Delmarfish/Delmar
+diankymar/kitty
+TatonkaHF/ruDialoGpt3-medium-finetuned-russian-joke
+EggsInAJar/DialoGPT-small-MerrickBot
+DBoi/Mayreel2
+hosst/FridgeLLM
+loitran/DialoGPT-medium-peppapig
+Syamil/DialoGPT-small-pixal
+Avitas8485/Dialogpt-medium-v2
+Inhaexpress/DialoGPT-medium-harrypotter
+loitran/DialoGPT-medium-HarryPotter
+Syamil/DialoGPT-medium-pixal
+roykim/ko_chat
+Syamil/DialoGPT-medium-pixals
+minhcrafters/DialoGPT-small-Fukuya
+Warren00/DialoGPT-Med-peppa05a
+Syamil/DialoGPT-medium-pixalbot
+LelouchH/DiabloGPT-small-RaidenBot
+Inhaexpress/DialoGPT-medium-shrek124
+Inhaexpress/DialoGPT-medium-terra1
+nascar123/Discordtester000
+EnterNameBros/Offical-Senko-medium-update
+EnterNameBros/Offical-Senko-medium-update-2
+EnterNameBros/Offical-Senko-medium-update-3
+EnterNameBros/Senko-medium
+jiezhou1996/test
+ElMater06/SpaceCore
+EnterNameBros/Offical-Senko-medium
+EnterNameBros/Senko-san
+DBoi/Mayreel
+VMware/open-llama-0.7T-7B-open-instruct-v1.1
+Warren00/DialoGPT-Small-Peppa06_053123
+mpalacio/DialoGPT_ootwl
+protag07/DialoGPT-small-harrypotter
+h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2
+cosimoiaia/Loquace-70m
+cosimoiaia/Loquace-410m
+MareNoceda/DialoGPT-medium-Luz
+GarrisonBot/DialoGPT-medium-herbertgarrison
+cosimoiaia/Loquace-12B
+cosimoiaia/Loquace-7B
+Deojoandco/ahGPT-small-v1
+PeachHeles/bmo
+Rays236/DialoGPT-small-harrypotter
+Deojoandco/ahGPT-small-v2
+Syamil/DialoGPT-medium-newpixal
+Coderhuynin/DialoGPT-large-TonyStark
+SotirisLegkas/final_socratic_dialoGPT
+ademfatnassi/bonjourGPT-small
+ikocx-to24/DialoGPT-small-planktongpt2
+EricYou/RickBot
+Ayaakaa/DialoGPT-small-Yoisaki-Kanade
+DoesNoPro/DialoGPT-small-RaidenG
+rajeshbot/DialoGPT-medium-Harry-to-Hari
+DoesNoPro/DialoGPT-small-RaidenG2
+SamsonP/pygmalion-6b-sft
+Deojoandco/ahDialoGPT-small-v4
+Syamil/GPTNeo-PIXAL-Model
+Syamil/GPTNeo-PIXAL-new
+Lattori/DiabloGPT-small-ConanBot
+Badzee/DialoGPT-medium-jackbot
+meowsynth/DialoGPT-small-sophie
+EnterNameBros/Senko-san-medium-baby
+Deojoandco/ah-GPT2-v4
+cosimoiaia/Loquace-20B
+EnterNameBros/Senko-san-medium-fox
+MarkyMarx/DialoGPT-medium-jimmybot2
+DhruvShek/DialoGPT
+Doge22/DialoGPT-medium-max
+lyogavin/Anima33B
+steerevo88/testThotBot
+steerevo88/workingthotBot
+YTTD/DialoGPT-medium-keiji
+MisguidedKerbal/DialoGPT-medium-kerbal
+Blueify/DialoGPT-small-model-lotr
+steerevo88/newthotBot
+paripi/Malishka
+finex/pfe-mohamed2023-RON
+DhruvShek/CMDGPT
+finex/pfe-mohamed2023-Hermione
+SkylerBlu9/DialoGPT-medium-CitrAI
+SkylerBlu9/DialoGPT-medium-autismobot
+MisguidedKerbal/DialoGPT-kerbalV2
+EnterNameBros/Senko-san-medium-a
+dderr/testmodel
+priyanshdahiya/DialoGPT-small-rick
+Goodnoway/DialoGPT-nerbalV2
+WompWomp1/DialoGPT-medium-Kirin
+lyogavin/Anima33B-merged
+peytonai/DialoGPT-small-wali-joshua
+MisguidedKerbal/DialoGPT-kerbalV3
+WompWomp1/DialoGPT-medium-Kaori
+OmarDiab/DialoGPT-small-Amogus
+servetier/DialoGPT-large-miguel
+OmarDiab/DialoGPT-small-Amogus-2
+steveglover/falcon-7b-instruct-telco-chat
+Lazycuber/Janemalion-6B
+Goodnoway/DialoGPT-nerbalV4
+gvij/gpt-j-6B-alpaca-gpt4
+papahawk/keya-560m
+JavRedstone/DialoGPT-small-tesseractist
+imuncomfortable/DiabloGPT-small-CocoAtarashi
+Amod/falcon7b-fine-tuned-therapy-merged
+Oshirigami1980/DialoGPT-medium-Steven
+Drevanil/DialoGPT-small-try
+Yaewe/1
+DataHammer/mozi_emotional_7b
+udxyz/HarryPotterBot
+Kasyapa/DialoGPT-medium-hagridbot
+lyogavin/Anima33B-DPO-Belle-1k
+JeanL-0/TestingModel-01
+TejasC2/DialoGPT-TejasBot
+lyogavin/Anima33B-DPO-Belle-1k-merged
+InterruptAI/Interrupt-350M
+Lucideds/Lucideds
+EnterNameBros/Senko-san-medium-sc
+EnterNameBros/Senko-san-medium-scl
+DaddySen/tighnari
+ettevyemerald/DialoGPT-medium-beomgyu
+minhcrafters/DialoGPT-small-mindwandering
+JNDankwah/DialoGPT-small-ThorCB
+minhcrafters/DialoGPT-medium-Zephirel
+papahawk/falcon-40b
+sonntt/DialoGPT-small-mindwandering
+pundapog/DialoGPT-medium-ethanbot
+TheBloke/Pygmalion-7B-SuperHOT-8K-GGML
+TheBloke/Pygmalion-7B-SuperHOT-8K-fp16
+pobierz69/model-6b-read-desc
+sidca/Cam
+EnterNameBros/Senko-san-medium-abc
+abhi-8/DialoGPT-medium-Michael
+abhi-8/DialoGPT-medium-Rick
+abhi-8/DialoGPT-medium-Joshua-twevy
+spitfire4794/dialogpt-small-rick
+abhi-8/Joshua-bot
+Justus-Jonas/Imaginary-Embeddings-Classic
+Justus-Jonas/Imaginary-Embeddings-SpeakerTokens
+Justus-Jonas/Imaginary-Embeddings-SpeakerTokens-STP
+spitfire4794/dialogpt-small-morty
+Kauru/DialoGPT-medium-Ranni
+crazydamns/DialoGPT-Johnny2
+jpandeinge/DialoGPT-medium-Oshiwambo-Bot
+custads23/pygmalion-1.3b
+HatCha01/DialoGPT-small-Batman
+crazydamns/DialoGPT-Johnny3
+assembleteams/curiouspi
+Kauru/DialoGPT-medium-Ranniv2
+SatwikShrivastava/narutoAI-chatbot
+digitalmax1/max
+adr2432/small-Joshua-Bot
+ObsessedCitrus/DialoGPT-small-PeterBot_ChatBot
+suarkadipa/HubermanGPT-small-v1
+suarkadipa/HarryPotterGPT-small-v1
+wevie1978/DialoGPT-medium-Kebb
+kopeqwerty/DialoGPT-medium-idotbot
+zelalt/Chatbot_T5-Prmtrs
+jarvissss/DialoGPT-medium-idotbot
+Magmadue/DiabloGPT-small-ei
+nicbull/DialoGPT-small-cryptonic
+nicbull/DialoGPT-small-cryptonic2
+chloe0x0/DialoGPT-small-Muty
+chloe0x0/mutyGPT
+alexwang05/DialoGPT-small-soph
+BHAndersonJr/DialoGPT-small-fry
+timothykim04/DialoGPT-medium-timothykim
+timothykim04/DialoGPT-medium-harrypotter
+Luca999/Limitlessai99
+Madgimmy/DiabloGPT-small-Madgimmy
+chloe0x0/mutyGPT-v2
+nuggster/DialoGPT-small-ianbot
+we1kkk/llama2-hf-qlora-oasst1
+IlyaGusev/saiga2_7b_lora
+IlyaGusev/gigasaiga_lora
+jliu03/JustinBot
+heliosbrahma/falcon-7b-finetuned-mental-health-conversational
+drunknmonk/GPT-Chandler
+jun-ai/llama2-qlora-finetunined-french
+WompWomp1/DialoGPT-large-Kirin
+WompWomp1/DialoGPT-large-Kirin-2
+WompWomp1/DialoGPT-large-Rin
+or4cl3ai/Aiden_t5
+jstawski/Llama-2-13b-hf-finetuned-SNG
+Gelmo/Halouf
+IlyaGusev/saiga2_13b_lora
+sophji/DialoGPT-small-GodlyLJ
+ATrapenard/Discord-Impersonation-Bot
+hiamitabha/llama2forbittlerobot
+IlyaGusev/saiga2_7b_gguf
+IlyaGusev/saiga2_13b_gguf
+TejasC2/DialoGPT-TejasBot2
+CNR223/DialoGPT-medium-MalcolmReynold
+minh-hahaha/DialoGPT-small-harrypotter
+phucnq1591999/SolanaChatBot
+marclove/llama-2-7b-chat-functions
+Sheerapi/test
+YukioKoito/DialoGPT-small-chibi
+YukioKoito/DialoGPT-small-twilight
+amzrana/lora
+ierhon/basic-chatbot
+Pula23/Hggjg
+Focs/DialoGPT-medium-tony-stark
+Kenobiwan/DialoGPT-small-AizakkuBot2
+drado/DialoGPT-small-joshua
+rah-1/Rahulio
+tanishqvashisht/DialoGPT-small-Joshua
+Kenobiwan/DialoGPT-small-AizakkuBot3
+Ridloo/DialogGPT-small-harrypotter
+dyuhong80/DialoGPT-large-ModerateEffortBombGPT
+ai-forever/paper_persi_chat
+paralleldynamix/paralleldynamix-model101
+kelSidenna/SoftwareRequirements-T5-Base
+renahime/DialoGPT-medium-umineko
+Shaun1204/RedGPT-Gormlee
+diwas7777/HarryBot
+heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational
+kelSidenna/SoftwareReq-DialoGPT-medium
+shanover/medbot-conv
+J-Wiggler/DialoGPT-medium-Stanley
+gearski/DialoGPT-small-itskleb
+wozniakclub/llama-2-7b-medtext-llama2
+gearski/DialoGPT-medium-itskleb
+rebornrulz/Rulz-AI
+Quantsr/DialogGPT-small-Aeris
+ostorc/rick-sanchez-chatbot
+nicbull/DialoGPT-medium-nic
+nicbull/DialoGPT-medium-nic2
+gorkemgoknar/llama2-7f-moviechatbot-ggml-q4
+aka-nikko/ainz-ooal-gown
+llSourcell/medllama2_7b
+xtuner/Llama-2-7b-qlora-moss-003-sft
+xtuner/Llama-2-7b-qlora-arxiv-gentitle
+xtuner/internlm-7b-qlora-arxiv-gentitle
+xtuner/internlm-7b-qlora-alpaca-enzh
+xtuner/Baichuan-7B-qlora-arxiv-gentitle
+xtuner/Baichuan-7B-qlora-alpaca-enzh
+nicbull/DialoGPT-medium-leric
+Ian-14/llm13
+theastro/starkbot
+yupimrandy/DialoGPT-medium-butcher
+hclaim/clamgptattempt4
+yupimrandy/DialoGPT-medium-hughie
+nekohacker591/google1
+zhmx31/Mychatbot
+sk8ingcat/DialoGPT-small-TonyStark
+SanchoJR/meX
+xtuner/Qwen-7B-qlora-moss-003-sft
+xtuner/Qwen-7B-qlora-arxiv-gentitle
+xtuner/Qwen-7B-qlora-alpaca-enzh
+xtuner/Qwen-7B-qlora-oasst1
+xtuner/Baichuan-7B-qlora-oasst1
+xtuner/internlm-7b-qlora-oasst1
+4bit/medllama2_7b
+JGKD/JangoGPTv1.0
+kwankwan1000/DialoGPT-small-peppa
+JGKD/JangoGPTv1.5
+SoniR/config
+mjyh/falcon-7b-qlora-sclue-20230601-04-merged
+sadzip/SiberianPersona-ruGPT-3.5-qlora
+Wolffire88/DialoGPT-medium-Android16
+nolly3317/DialoGPT-small-alice
+feelinrealcute/pym-6b
+nixsy/AvasLove
+feelinrealcute/pym-13b7
+AleksiDu/HarryPotterBot
+Belcebuzzz/DialoGPT-small-TomoGF
+xtuner/internlm-7b-qlora-lawyer
+xtuner/internlm-7b-qlora-colorist
+xtuner/internlm-7b-qlora-coder
+xtuner/internlm-7b-qlora-open-platypus
+xtuner/internlm-7b-qlora-sql
+inception-mbzuai/jais-13b-chat
+Fredithefish/Guanaco-3B-Uncensored
+garrachonr/LlamaDos
+literallywood/DialoGPT-small-ekansh
+IALABS/Arturosfastfood
+javieitor/DialoGPT-medium-Rick
+Kuduxaaa/ava-small
+Al-Hathboor-Bikal-ai-2023/SRTIP-GPT-F7B-base
+L-R/LLmRa-355M
+Fredithefish/Guanaco-3B-Uncensored-v2
+xtuner/Llama-2-7b-qlora-colorist
+KE-AI/basicchatbot-kel
+josepholiver/TEST_MODEL_1
+PlaceReporter99/Utility_Bot_Chat
+J-Wiggler2/Caesar
+J-Wiggler2/Caesar2
+matvalan/vittae-cot
+Dawnstarhunter/DialoGPT-medium-Eveline
+sahilxyd/DialoGPT-small-joshua
+EnterNameBros/Senko-san-medium-abcd
+6adityaverma/DialoGPT-large-Walter
+6adityaverma/DialoGPT-large-Rick
+IlyaGusev/saiga2_70b_lora
+AyushK0808/StarWarsBot
+EnterNameBros/Senko-ai-medium
+Fredithefish/Guanaco-7B-Uncensored
+IlyaGusev/saiga2_70b_gguf
+glassofwine/DialoGPT-medium-johanwine
+zattio770/120-Days-of-LORA-v2-13B
+cannice/blenderbot-400M-distill-empathetic
+Likelihood94/Jackoftrades
+Hapski/DialoGPT-small-nene
+Fredithefish/Guanaco-13B-Uncensored
+kitbear444/DialoGPT-medium-kit
+SonnyAu/DialoGPT-dumbledore
+TheBloke/Guanaco-7B-Uncensored-GGUF
+TheBloke/Guanaco-13B-Uncensored-GGUF
+TheBloke/Guanaco-7B-Uncensored-GPTQ
+TheBloke/Guanaco-13B-Uncensored-GPTQ
+TheBloke/Guanaco-3B-Uncensored-v2-GPTQ
+TheBloke/Guanaco-3B-Uncensored-v2-GGML
+Codexister/DialoGPT-medium-KafkaBotV1
+mfodwo/STUGPT-small-v1
+asas-ai/jais-13b-chat-8bit
+SoupChickn/Valeen-DialoGPT
+Codexister/DialoGPT-medium-KafkaBotV2
+KoalaAI/OPT-1.3b-Chat
+Nafaille/nafaille6b
+DiTy/dialogpt
+Severus27/BeingWell_llama2_7b
+rayho/DialoGPT-small-polysoft
+TuningAI/Llama2_13B_startup_Assistant
+dipxsy/testmodel
+dipxsy/Jarvis-small
+Lazycuber/L2-7b-Chat-Guanaco-Uncensored
+dipxsy/jarvis-blend
+TheBloke/Guanaco-13B-Uncensored-AWQ
+TheBloke/Guanaco-7B-Uncensored-AWQ
+wstock04/shiddeatorBotV1
+Boqianshen/llama-2-7b-miniguanaco
+sebastiantrbl/distilgpt2-finetuned-wikitext2
+herzlixh/DialoGPTs_HarryFromHogwarts
+poiccard/jais-13b-chat-adn
+sebastiantrbl/test-DialoGPT-finetune
+uffergist/DialoGPT-small-cummy
+wstock04/shiddeatorBotV3.0
+wstock04/shiddeatorBotDUMB
+Applekinz/John
+Or4cl3/1nsfw
+sebastiantrbl/DialoGPT-finetuned-daily-dialog
+LTC-AI-Labs/L2-7b-Base-WVG-Uncensored
+hussain2030/jais13bchat2
+subabi/DialoGPT-medium-subabicord
+marblyso/DialoGPT-medium-collin
+Crataco/Pygmalion-6B-GGML
+dipxsy/jl
+testerhubhai/krnedo
+IAteSpaghettiForLunch/DialoGPT-medium-GLADoS
+IAteSpaghettiForLunch/GLADoSBOT
+Nikolai5592/DialoGPT-Medium-RickBot
+KuroganeNiello/medium-NebBot

litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

litellm/llms/huggingface_restapi.py ADDED Viewed

	@@ -0,0 +1,604 @@

+## Uses the huggingface text generation inference API
+import os, copy, types
+import json
+from enum import Enum
+import httpx, requests
+from .base import BaseLLM
+import time
+import litellm
+from typing import Callable, Dict, List, Any
+from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
+from typing import Optional
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class HuggingfaceError(Exception):
+    def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
+        self.status_code = status_code
+        self.message = message
+        if request is not None:
+            self.request = request
+        else:
+            self.request = httpx.Request(method="POST", url="https://api-inference.huggingface.co/models")
+        if response is not None:
+            self.response = response
+        else:
+            self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class HuggingfaceConfig():
+    """
+    Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate
+    """
+    best_of: Optional[int] = None
+    decoder_input_details: Optional[bool] = None
+    details: Optional[bool] = True # enables returning logprobs + best of
+    max_new_tokens: Optional[int] = None
+    repetition_penalty: Optional[float] = None
+    return_full_text: Optional[bool] = False # by default don't return the input as part of the output
+    seed: Optional[int] = None
+    temperature: Optional[float] = None
+    top_k: Optional[int] = None
+    top_n_tokens: Optional[int] = None
+    top_p: Optional[int] = None
+    truncate: Optional[int] = None
+    typical_p: Optional[float] = None
+    watermark: Optional[bool] = None
+    def __init__(self,
+                 best_of: Optional[int] = None,
+                 decoder_input_details: Optional[bool] = None,
+                 details: Optional[bool] = None,
+                 max_new_tokens: Optional[int] = None,
+                 repetition_penalty: Optional[float] = None,
+                 return_full_text: Optional[bool] = None,
+                 seed: Optional[int] = None,
+                 temperature: Optional[float] = None,
+                 top_k: Optional[int] = None,
+                 top_n_tokens: Optional[int] = None,
+                 top_p: Optional[int] = None,
+                 truncate: Optional[int] = None,
+                 typical_p: Optional[float] = None,
+                 watermark: Optional[bool] = None
+                 ) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def output_parser(generated_text: str):
+    """
+    Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens.
+    Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763
+    """
+    chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
+    for token in chat_template_tokens:
+        if generated_text.strip().startswith(token):
+            generated_text = generated_text.replace(token, "", 1)
+        if generated_text.endswith(token):
+            generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1]
+    return generated_text
+tgi_models_cache = None
+conv_models_cache = None
+def read_tgi_conv_models():
+    try:
+        global tgi_models_cache, conv_models_cache
+        # Check if the cache is already populated
+        # so we don't keep on reading txt file if there are 1k requests
+        if (tgi_models_cache is not None) and (conv_models_cache is not None):
+            return tgi_models_cache, conv_models_cache
+        # If not, read the file and populate the cache
+        tgi_models = set()
+        script_directory = os.path.dirname(os.path.abspath(__file__))
+        # Construct the file path relative to the script's directory
+        file_path = os.path.join(script_directory, "huggingface_llms_metadata", "hf_text_generation_models.txt")
+        with open(file_path, 'r') as file:
+            for line in file:
+                tgi_models.add(line.strip())
+        # Cache the set for future use
+        tgi_models_cache = tgi_models
+        # If not, read the file and populate the cache
+        file_path = os.path.join(script_directory, "huggingface_llms_metadata", "hf_conversational_models.txt")
+        conv_models = set()
+        with open(file_path, 'r') as file:
+            for line in file:
+                conv_models.add(line.strip())
+        # Cache the set for future use
+        conv_models_cache = conv_models
+        return tgi_models, conv_models
+    except:
+        return set(), set()
+def get_hf_task_for_model(model):
+    # read text file, cast it to set
+    # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt"
+    tgi_models, conversational_models = read_tgi_conv_models()
+    if model in tgi_models:
+        return "text-generation-inference"
+    elif model in conversational_models:
+        return "conversational"
+    elif "roneneldan/TinyStories" in model:
+        return None
+    else:
+        return "text-generation-inference" # default to tgi
+class Huggingface(BaseLLM):
+    _client_session: Optional[httpx.Client] = None
+    _aclient_session: Optional[httpx.AsyncClient] = None
+    def __init__(self) -> None:
+        super().__init__()
+    def validate_environment(self, api_key, headers):
+        default_headers = {
+            "content-type": "application/json",
+        }
+        if api_key and headers is None:
+            default_headers["Authorization"] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens
+            headers = default_headers
+        elif headers:
+            headers=headers
+        else:
+            headers = default_headers
+        return headers
+    def convert_to_model_response_object(self,
+                                         completion_response,
+                                         model_response,
+                                         task,
+                                         optional_params,
+                                         encoding,
+                                         input_text,
+                                         model):
+        if task == "conversational":
+            if len(completion_response["generated_text"]) > 0: # type: ignore
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = completion_response["generated_text"] # type: ignore
+        elif task == "text-generation-inference":
+            if len(completion_response[0]["generated_text"]) > 0:
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = output_parser(completion_response[0]["generated_text"])
+            ## GETTING LOGPROBS + FINISH REASON
+            if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
+                model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
+                sum_logprob = 0
+                for token in completion_response[0]["details"]["tokens"]:
+                    if token["logprob"] != None:
+                        sum_logprob += token["logprob"]
+                model_response["choices"][0]["message"]._logprob = sum_logprob
+            if "best_of" in optional_params and optional_params["best_of"] > 1:
+                if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
+                    choices_list = []
+                    for idx, item in enumerate(completion_response[0]["details"]["best_of_sequences"]):
+                        sum_logprob = 0
+                        for token in item["tokens"]:
+                            if token["logprob"] != None:
+                                sum_logprob += token["logprob"]
+                        if len(item["generated_text"]) > 0:
+                            message_obj = Message(content=output_parser(item["generated_text"]), logprobs=sum_logprob)
+                        else:
+                            message_obj = Message(content=None)
+                        choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
+                        choices_list.append(choice_obj)
+                    model_response["choices"].extend(choices_list)
+        else:
+            if len(completion_response[0]["generated_text"]) > 0:
+                model_response["choices"][0]["message"][
+                    "content"
+                ] = output_parser(completion_response[0]["generated_text"])
+        ## CALCULATING USAGE
+        prompt_tokens = 0
+        try:
+            prompt_tokens = len(
+                encoding.encode(input_text)
+            )  ##[TODO] use the llama2 tokenizer here
+        except:
+            # this should remain non blocking we should not block a response returning if calculating usage fails
+            pass
+        output_text = model_response["choices"][0]["message"].get("content", "")
+        if output_text is not None and len(output_text) > 0:
+            completion_tokens = 0
+            try:
+                completion_tokens = len(
+                    encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+                )  ##[TODO] use the llama2 tokenizer here
+            except:
+                # this should remain non blocking we should not block a response returning if calculating usage fails
+                pass
+        else:
+            completion_tokens = 0
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        model_response._hidden_params["original_response"] = completion_response
+        return model_response
+    def completion(self,
+        model: str,
+        messages: list,
+        api_base: Optional[str],
+        headers: Optional[dict],
+        model_response: ModelResponse,
+        print_verbose: Callable,
+        encoding,
+        api_key,
+        logging_obj,
+        custom_prompt_dict={},
+        acompletion: bool = False,
+        optional_params=None,
+        litellm_params=None,
+        logger_fn=None,
+    ):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            headers = self.validate_environment(api_key, headers)
+            task = get_hf_task_for_model(model)
+            print_verbose(f"{model}, {task}")
+            completion_url = ""
+            input_text = ""
+            if "https" in model:
+                completion_url = model
+            elif api_base:
+                completion_url = api_base
+            elif "HF_API_BASE" in os.environ:
+                completion_url = os.getenv("HF_API_BASE", "")
+            elif "HUGGINGFACE_API_BASE" in os.environ:
+                completion_url = os.getenv("HUGGINGFACE_API_BASE", "")
+            else:
+                completion_url = f"https://api-inference.huggingface.co/models/{model}"
+            ## Load Config
+            config=litellm.HuggingfaceConfig.get_config()
+            for k, v in config.items():
+                if k not in optional_params: # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in
+                    optional_params[k] = v
+            ### MAP INPUT PARAMS
+            if task == "conversational":
+                inference_params = copy.deepcopy(optional_params)
+                inference_params.pop("details")
+                inference_params.pop("return_full_text")
+                past_user_inputs = []
+                generated_responses = []
+                text = ""
+                for message in messages:
+                    if message["role"] == "user":
+                        if text != "":
+                            past_user_inputs.append(text)
+                        text = message["content"]
+                    elif message["role"] == "assistant" or message["role"] == "system":
+                        generated_responses.append(message["content"])
+                data = {
+                    "inputs": {
+                        "text": text,
+                        "past_user_inputs": past_user_inputs,
+                        "generated_responses": generated_responses
+                    },
+                    "parameters": inference_params
+                }
+                input_text = "".join(message["content"] for message in messages)
+            elif task == "text-generation-inference":
+                # always send "details" and "return_full_text" as params
+                if model in custom_prompt_dict:
+                    # check if the model has a registered custom prompt
+                    model_prompt_details = custom_prompt_dict[model]
+                    prompt = custom_prompt(
+                        role_dict=model_prompt_details.get("roles", None),
+                        initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
+                        final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+                        messages=messages
+                    )
+                else:
+                    prompt = prompt_factory(model=model, messages=messages)
+                data = {
+                    "inputs": prompt,
+                    "parameters": optional_params,
+                    "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
+                }
+                input_text = prompt
+            else:
+                # Non TGI and Conversational llms
+                # We need this branch, it removes 'details' and 'return_full_text' from params
+                if model in custom_prompt_dict:
+                    # check if the model has a registered custom prompt
+                    model_prompt_details = custom_prompt_dict[model]
+                    prompt = custom_prompt(
+                        role_dict=model_prompt_details.get("roles", {}),
+                        initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
+                        final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+                        bos_token=model_prompt_details.get("bos_token", ""),
+                        eos_token=model_prompt_details.get("eos_token", ""),
+                        messages=messages,
+                    )
+                else:
+                    prompt = prompt_factory(model=model, messages=messages)
+                inference_params = copy.deepcopy(optional_params)
+                inference_params.pop("details")
+                inference_params.pop("return_full_text")
+                data = {
+                    "inputs": prompt,
+                    "parameters": inference_params,
+                    "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
+                }
+                input_text = prompt
+            ## LOGGING
+            logging_obj.pre_call(
+                    input=input_text,
+                    api_key=api_key,
+                    additional_args={"complete_input_dict": data, "task": task, "headers": headers, "api_base": completion_url, "acompletion": acompletion},
+                )
+            ## COMPLETION CALL
+            if acompletion is True:
+                ### ASYNC STREAMING
+                if optional_params.get("stream", False):
+                    return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model) # type: ignore
+                else:
+                    ### ASYNC COMPLETION
+                    return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, input_text=input_text, model=model, optional_params=optional_params) # type: ignore
+            ### SYNC STREAMING
+            if "stream" in optional_params and optional_params["stream"] == True:
+                response = requests.post(
+                    completion_url,
+                    headers=headers,
+                    data=json.dumps(data),
+                    stream=optional_params["stream"]
+                )
+                return response.iter_lines()
+            ### SYNC COMPLETION
+            else:
+                response = requests.post(
+                    completion_url,
+                    headers=headers,
+                    data=json.dumps(data)
+                )
+                ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
+                is_streamed = False
+                if response.__dict__['headers'].get("Content-Type", "") == "text/event-stream":
+                    is_streamed = True
+                # iterate over the complete streamed response, and return the final answer
+                if is_streamed:
+                    streamed_response = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="huggingface", logging_obj=logging_obj)
+                    content = ""
+                    for chunk in streamed_response:
+                        content += chunk["choices"][0]["delta"]["content"]
+                    completion_response: List[Dict[str, Any]] = [{"generated_text": content}]
+                    ## LOGGING
+                    logging_obj.post_call(
+                        input=input_text,
+                        api_key=api_key,
+                        original_response=completion_response,
+                        additional_args={"complete_input_dict": data, "task": task},
+                    )
+                else:
+                    ## LOGGING
+                    logging_obj.post_call(
+                        input=input_text,
+                        api_key=api_key,
+                        original_response=response.text,
+                        additional_args={"complete_input_dict": data, "task": task},
+                    )
+                    ## RESPONSE OBJECT
+                    try:
+                        completion_response = response.json()
+                        if isinstance(completion_response, dict):
+                            completion_response = [completion_response]
+                    except:
+                        import traceback
+                        raise HuggingfaceError(
+                            message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}", status_code=response.status_code
+                        )
+                print_verbose(f"response: {completion_response}")
+                if isinstance(completion_response, dict) and "error" in completion_response:
+                    print_verbose(f"completion error: {completion_response['error']}")
+                    print_verbose(f"response.status_code: {response.status_code}")
+                    raise HuggingfaceError(
+                        message=completion_response["error"],
+                        status_code=response.status_code,
+                    )
+                return self.convert_to_model_response_object(
+                    completion_response=completion_response,
+                    model_response=model_response,
+                    task=task,
+                    optional_params=optional_params,
+                    encoding=encoding,
+                    input_text=input_text,
+                    model=model
+                )
+        except HuggingfaceError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if exception_mapping_worked:
+                raise e
+            else:
+                import traceback
+                raise HuggingfaceError(status_code=500, message=traceback.format_exc())
+    async def acompletion(self,
+                          api_base: str,
+                          data: dict,
+                          headers: dict,
+                          model_response: ModelResponse,
+                          task: str,
+                          encoding: Any,
+                          input_text: str,
+                          model: str,
+                          optional_params: dict):
+       response = None
+       try:
+            async with httpx.AsyncClient() as client:
+                response = await client.post(url=api_base, json=data, headers=headers, timeout=None)
+                response_json = response.json()
+                if response.status_code != 200:
+                    raise HuggingfaceError(status_code=response.status_code, message=response.text, request=response.request, response=response)
+                ## RESPONSE OBJECT
+                return self.convert_to_model_response_object(completion_response=response_json,
+                                                            model_response=model_response,
+                                                            task=task,
+                                                            encoding=encoding,
+                                                            input_text=input_text,
+                                                            model=model,
+                                                            optional_params=optional_params)
+       except Exception as e:
+           if isinstance(e,httpx.TimeoutException):
+                raise HuggingfaceError(status_code=500, message="Request Timeout Error")
+           elif response is not None and hasattr(response, "text"):
+                raise HuggingfaceError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
+           else:
+                raise HuggingfaceError(status_code=500, message=f"{str(e)}")
+    async def async_streaming(self,
+                          logging_obj,
+                          api_base: str,
+                          data: dict,
+                          headers: dict,
+                          model_response: ModelResponse,
+                          model: str):
+        async with httpx.AsyncClient() as client:
+            response = client.stream(
+                "POST",
+                url=f"{api_base}",
+                json=data,
+                headers=headers
+            )
+            async with response as r:
+                if r.status_code != 200:
+                    raise HuggingfaceError(status_code=r.status_code, message="An error occurred while streaming")
+                streamwrapper = CustomStreamWrapper(completion_stream=r.aiter_lines(), model=model, custom_llm_provider="huggingface",logging_obj=logging_obj)
+                async for transformed_chunk in streamwrapper:
+                    yield transformed_chunk
+    def embedding(self,
+        model: str,
+        input: list,
+        api_key: Optional[str] = None,
+        api_base: Optional[str] = None,
+        logging_obj=None,
+        model_response=None,
+        encoding=None,
+    ):
+        super().embedding()
+        headers = self.validate_environment(api_key, headers=None)
+        # print_verbose(f"{model}, {task}")
+        embed_url = ""
+        if "https" in model:
+            embed_url = model
+        elif api_base:
+            embed_url = api_base
+        elif "HF_API_BASE" in os.environ:
+            embed_url = os.getenv("HF_API_BASE", "")
+        elif "HUGGINGFACE_API_BASE" in os.environ:
+            embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
+        else:
+            embed_url = f"https://api-inference.huggingface.co/models/{model}"
+        if "sentence-transformers" in model:
+            if len(input) == 0:
+                raise HuggingfaceError(status_code=400, message="sentence transformers requires 2+ sentences")
+            data = {
+                "inputs": {
+                    "source_sentence": input[0],
+                    "sentences": [ "That is a happy dog", "That is a very happy person", "Today is a sunny day" ]
+                }
+            }
+        else:
+            data = {
+                "inputs": input # type: ignore
+            }
+        ## LOGGING
+        logging_obj.pre_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+            )
+        ## COMPLETION CALL
+        response = requests.post(
+            embed_url, headers=headers, data=json.dumps(data)
+        )
+        ## LOGGING
+        logging_obj.post_call(
+                input=input,
+                api_key=api_key,
+                additional_args={"complete_input_dict": data},
+                original_response=response,
+            )
+        embeddings = response.json()
+        if "error" in embeddings:
+            raise HuggingfaceError(status_code=500, message=embeddings['error'])
+        output_data = []
+        if "similarities" in embeddings:
+            for idx, embedding in embeddings["similarities"]:
+                output_data.append(
+                {
+                    "object": "embedding",
+                    "index": idx,
+                    "embedding": embedding # flatten list returned from hf
+                }
+            )
+        else:
+            for idx, embedding in enumerate(embeddings):
+                if isinstance(embedding, float):
+                    output_data.append(
+                        {
+                            "object": "embedding",
+                            "index": idx,
+                            "embedding": embedding # flatten list returned from hf
+                        }
+                    )
+                else:
+                    output_data.append(
+                        {
+                            "object": "embedding",
+                            "index": idx,
+                            "embedding": embedding[0][0] # flatten list returned from hf
+                        }
+                    )
+        model_response["object"] = "list"
+        model_response["data"] = output_data
+        model_response["model"] = model
+        input_tokens = 0
+        for text in input:
+            input_tokens+=len(encoding.encode(text))
+        model_response["usage"] = {
+            "prompt_tokens": input_tokens,
+            "total_tokens": input_tokens,
+        }
+        return model_response

litellm/llms/maritalk.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time, traceback
+from typing import Callable, Optional, List
+from litellm.utils import ModelResponse, Choices, Message, Usage
+import litellm
+class MaritalkError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class MaritTalkConfig():
+    """
+    The class `MaritTalkConfig` provides configuration for the MaritTalk's API interface. Here are the parameters:
+    - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default is 1.
+    - `model` (string): The model used for conversation. Default is 'maritalk'.
+    - `do_sample` (boolean): If set to True, the API will generate a response using sampling. Default is True.
+    - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.7.
+    - `top_p` (number): Selection threshold for token inclusion based on cumulative probability. Default is 0.95.
+    - `repetition_penalty` (number): Penalty for repetition in the generated conversation. Default is 1.
+    - `stopping_tokens` (list of string): List of tokens where the conversation can be stopped/stopped.
+    """
+    max_tokens: Optional[int] = None
+    model: Optional[str] = None
+    do_sample: Optional[bool] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    repetition_penalty: Optional[float] = None
+    stopping_tokens: Optional[List[str]] = None
+    def __init__(self,
+                 max_tokens: Optional[int]=None,
+                 model: Optional[str] = None,
+                 do_sample: Optional[bool] = None,
+                 temperature: Optional[float] = None,
+                 top_p: Optional[float] = None,
+                 repetition_penalty: Optional[float] = None,
+                 stopping_tokens: Optional[List[str]] = None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Key {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    completion_url = api_base
+    model = model
+    ## Load Config
+    config=litellm.MaritTalkConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > maritalk_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "messages": messages,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=messages,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=messages,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise MaritalkError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                if len(completion_response["answer"]) > 0:
+                    model_response["choices"][0]["message"]["content"] = completion_response["answer"]
+            except Exception as e:
+                raise MaritalkError(message=response.text, status_code=response.status_code)
+        ## CALCULATING USAGE
+        prompt = "".join(m["content"] for m in messages)
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding(
+    model: str,
+    input: list,
+    api_key: Optional[str] = None,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+):
+    pass

litellm/llms/nlp_cloud.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, Usage
+class NLPCloudError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class NLPCloudConfig():
+    """
+    Reference: https://docs.nlpcloud.com/#generation
+    - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
+    - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
+    - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
+    - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
+    - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
+    - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
+    - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
+    - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+    - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
+    - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
+    - `num_beams` (int): Optional. Number of beams for beam search.
+    - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
+    """
+    max_length: Optional[int]=None
+    length_no_input: Optional[bool]=None
+    end_sequence: Optional[str]=None
+    remove_end_sequence: Optional[bool]=None
+    remove_input: Optional[bool]=None
+    bad_words: Optional[list]=None
+    temperature: Optional[float]=None
+    top_p: Optional[float]=None
+    top_k: Optional[int]=None
+    repetition_penalty: Optional[float]=None
+    num_beams: Optional[int]=None
+    num_return_sequences: Optional[int]=None
+    def __init__(self,
+                 max_length: Optional[int]=None,
+                 length_no_input: Optional[bool]=None,
+                 end_sequence: Optional[str]=None,
+                 remove_end_sequence: Optional[bool]=None,
+                 remove_input: Optional[bool]=None,
+                 bad_words: Optional[list]=None,
+                 temperature: Optional[float]=None,
+                 top_p: Optional[float]=None,
+                 top_k: Optional[int]=None,
+                 repetition_penalty: Optional[float]=None,
+                 num_beams: Optional[int]=None,
+                 num_return_sequences: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Token {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.NLPCloudConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    completion_url_fragment_1 = api_base
+    completion_url_fragment_2 = "/generation"
+    model = model
+    text = " ".join(message["content"] for message in messages)
+    data = {
+        "text": text,
+        **optional_params,
+    }
+    completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
+    ## LOGGING
+    logging_obj.pre_call(
+            input=text,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data, "headers": headers, "api_base": completion_url},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return clean_and_iterate_chunks(response)
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=text,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        try:
+            completion_response = response.json()
+        except:
+            raise NLPCloudError(message=response.text, status_code=response.status_code)
+        if "error" in completion_response:
+            raise NLPCloudError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                if len(completion_response["generated_text"]) > 0:
+                    model_response["choices"][0]["message"]["content"] = completion_response["generated_text"]
+            except:
+                raise NLPCloudError(message=json.dumps(completion_response), status_code=response.status_code)
+        ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+        prompt_tokens = completion_response["nb_input_tokens"]
+        completion_tokens = completion_response["nb_generated_tokens"]
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+# def clean_and_iterate_chunks(response):
+#     def process_chunk(chunk):
+#         print(f"received chunk: {chunk}")
+#         cleaned_chunk = chunk.decode("utf-8")
+#         # Perform further processing based on your needs
+#         return cleaned_chunk
+#     for line in response.iter_lines():
+#         if line:
+#             yield process_chunk(line)
+def clean_and_iterate_chunks(response):
+    buffer = b''
+    for chunk in response.iter_content(chunk_size=1024):
+        if not chunk:
+            break
+        buffer += chunk
+        while b'\x00' in buffer:
+            buffer = buffer.replace(b'\x00', b'')
+            yield buffer.decode('utf-8')
+            buffer = b''
+    # No more data expected, yield any remaining data in the buffer
+    if buffer:
+        yield buffer.decode('utf-8')
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/ollama.py ADDED Viewed

	@@ -0,0 +1,231 @@

+import requests, types
+import json
+import traceback
+from typing import Optional
+import litellm
+import httpx
+try:
+    from async_generator import async_generator, yield_  # optional dependency
+    async_generator_imported = True
+except ImportError:
+    async_generator_imported = False  # this should not throw an error, it will impact the 'import litellm' statement
+class OllamaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="http://localhost:11434")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class OllamaConfig():
+    """
+    Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
+    The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
+    - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
+    - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
+    - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
+    - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
+    - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
+    - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
+    - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
+    - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
+    - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
+    - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
+    - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
+    - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
+    - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
+    - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
+    - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
+    - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
+    - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
+    """
+    mirostat: Optional[int]=None
+    mirostat_eta: Optional[float]=None
+    mirostat_tau: Optional[float]=None
+    num_ctx: Optional[int]=None
+    num_gqa: Optional[int]=None
+    num_thread: Optional[int]=None
+    repeat_last_n: Optional[int]=None
+    repeat_penalty: Optional[float]=None
+    temperature: Optional[float]=None
+    stop: Optional[list]=None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
+    tfs_z: Optional[float]=None
+    num_predict: Optional[int]=None
+    top_k: Optional[int]=None
+    top_p: Optional[float]=None
+    system: Optional[str]=None
+    template: Optional[str]=None
+    def __init__(self,
+                 mirostat: Optional[int]=None,
+                 mirostat_eta: Optional[float]=None,
+                 mirostat_tau: Optional[float]=None,
+                 num_ctx: Optional[int]=None,
+                 num_gqa: Optional[int]=None,
+                 num_thread: Optional[int]=None,
+                 repeat_last_n: Optional[int]=None,
+                 repeat_penalty: Optional[float]=None,
+                 temperature: Optional[float]=None,
+                 stop: Optional[list]=None,
+                 tfs_z: Optional[float]=None,
+                 num_predict: Optional[int]=None,
+                 top_k: Optional[int]=None,
+                 top_p: Optional[float]=None,
+                 system: Optional[str]=None,
+                 template: Optional[str]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+# ollama implementation
+def get_ollama_response_stream(
+        api_base="http://localhost:11434",
+        model="llama2",
+        prompt="Why is the sky blue?",
+        optional_params=None,
+        logging_obj=None,
+    ):
+    if api_base.endswith("/api/generate"):
+        url = api_base
+    else:
+        url = f"{api_base}/api/generate"
+    ## Load Config
+    config=litellm.OllamaConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    data = {
+        "model": model,
+        "prompt": prompt,
+        **optional_params
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=None,
+        api_key=None,
+        additional_args={"api_base": url, "complete_input_dict": data},
+    )
+    session = requests.Session()
+    with session.post(url, json=data, stream=True) as resp:
+        if resp.status_code != 200:
+            raise OllamaError(status_code=resp.status_code, message=resp.text)
+        for line in resp.iter_lines():
+            if line:
+                try:
+                    json_chunk = line.decode("utf-8")
+                    chunks = json_chunk.split("\n")
+                    for chunk in chunks:
+                        if chunk.strip() != "":
+                            j = json.loads(chunk)
+                            if "error" in j:
+                                completion_obj = {
+                                    "role": "assistant",
+                                    "content": "",
+                                    "error": j
+                                }
+                                yield completion_obj
+                            if "response" in j:
+                                completion_obj = {
+                                    "role": "assistant",
+                                    "content": "",
+                                }
+                                completion_obj["content"] = j["response"]
+                                yield completion_obj
+                except Exception as e:
+                    traceback.print_exc()
+    session.close()
+if async_generator_imported:
+    # ollama implementation
+    @async_generator
+    async def async_get_ollama_response_stream(
+            api_base="http://localhost:11434",
+            model="llama2",
+            prompt="Why is the sky blue?",
+            optional_params=None,
+            logging_obj=None,
+        ):
+        url = f"{api_base}/api/generate"
+        ## Load Config
+        config=litellm.OllamaConfig.get_config()
+        for k, v in config.items():
+            if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
+                optional_params[k] = v
+        data = {
+            "model": model,
+            "prompt": prompt,
+            **optional_params
+        }
+        ## LOGGING
+        logging_obj.pre_call(
+            input=None,
+            api_key=None,
+            additional_args={"api_base": url, "complete_input_dict": data},
+        )
+        session = requests.Session()
+        with session.post(url, json=data, stream=True) as resp:
+            if resp.status_code != 200:
+                raise OllamaError(status_code=resp.status_code, message=resp.text)
+            for line in resp.iter_lines():
+                if line:
+                    try:
+                        json_chunk = line.decode("utf-8")
+                        chunks = json_chunk.split("\n")
+                        for chunk in chunks:
+                            if chunk.strip() != "":
+                                j = json.loads(chunk)
+                                if "error" in j:
+                                    completion_obj = {
+                                        "role": "assistant",
+                                        "content": "",
+                                        "error": j
+                                    }
+                                    await yield_({"choices": [{"delta": completion_obj}]})
+                                if "response" in j:
+                                    completion_obj = {
+                                        "role": "assistant",
+                                        "content": "",
+                                    }
+                                    completion_obj["content"] = j["response"]
+                                    await yield_({"choices": [{"delta": completion_obj}]})
+                    except Exception as e:
+                        import logging
+                        logging.debug(f"Error decoding JSON: {e}")
+        session.close()

litellm/llms/oobabooga.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class OobaboogaError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+def validate_environment(api_key):
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+    }
+    if api_key:
+        headers["Authorization"] = f"Token {api_key}"
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: Optional[str],
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    custom_prompt_dict={},
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+    default_max_tokens_to_sample=None,
+):
+    headers = validate_environment(api_key)
+    if "https" in model:
+        completion_url = model
+    elif api_base:
+        completion_url = api_base
+    else:
+        raise OobaboogaError(status_code=404, message="API Base not set. Set one via completion(..,api_base='your-api-url')")
+    model = model
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+            role_dict=model_prompt_details["roles"],
+            initial_prompt_value=model_prompt_details["initial_prompt_value"],
+            final_prompt_value=model_prompt_details["final_prompt_value"],
+            messages=messages
+        )
+    else:
+        prompt = prompt_factory(model=model, messages=messages)
+    completion_url = completion_url + "/api/v1/generate"
+    data = {
+        "prompt": prompt,
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
+    )
+    if "stream" in optional_params and optional_params["stream"] == True:
+        return response.iter_lines()
+    else:
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        try:
+            completion_response = response.json()
+        except:
+            raise OobaboogaError(message=response.text, status_code=response.status_code)
+        if "error" in completion_response:
+            raise OobaboogaError(
+                message=completion_response["error"],
+                status_code=response.status_code,
+            )
+        else:
+            try:
+                model_response["choices"][0]["message"]["content"] = completion_response['results'][0]['text']
+            except:
+                raise OobaboogaError(message=json.dumps(completion_response), status_code=response.status_code)
+        ## CALCULATING USAGE
+        prompt_tokens = len(
+            encoding.encode(prompt)
+        )
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"]["content"])
+        )
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/openai.py ADDED Viewed

	@@ -0,0 +1,590 @@

+from typing import Optional, Union, Any
+import types, time, json
+import httpx
+from .base import BaseLLM
+from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, convert_to_model_response_object, Usage
+from typing import Callable, Optional
+import aiohttp, requests
+import litellm
+from .prompt_templates.factory import prompt_factory, custom_prompt
+from openai import OpenAI, AsyncOpenAI
+class OpenAIError(Exception):
+    def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
+        self.status_code = status_code
+        self.message = message
+        if request:
+            self.request = request
+        else:
+            self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
+        if response:
+            self.response = response
+        else:
+            self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class OpenAIConfig():
+    """
+    Reference: https://platform.openai.com/docs/api-reference/chat/create
+    The class `OpenAIConfig` provides configuration for the OpenAI's Chat API interface. Below are the parameters:
+    - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
+    - `function_call` (string or object): This optional parameter controls how the model calls functions.
+    - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
+    - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+    - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
+    - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
+    - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
+    - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+    - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
+    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+    """
+    frequency_penalty: Optional[int]=None
+    function_call: Optional[Union[str, dict]]=None
+    functions: Optional[list]=None
+    logit_bias: Optional[dict]=None
+    max_tokens: Optional[int]=None
+    n: Optional[int]=None
+    presence_penalty: Optional[int]=None
+    stop: Optional[Union[str, list]]=None
+    temperature: Optional[int]=None
+    top_p: Optional[int]=None
+    def __init__(self,
+                 frequency_penalty: Optional[int]=None,
+                 function_call: Optional[Union[str, dict]]=None,
+                 functions: Optional[list]=None,
+                 logit_bias: Optional[dict]=None,
+                 max_tokens: Optional[int]=None,
+                 n: Optional[int]=None,
+                 presence_penalty: Optional[int]=None,
+                 stop: Optional[Union[str, list]]=None,
+                 temperature: Optional[int]=None,
+                 top_p: Optional[int]=None,) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class OpenAITextCompletionConfig():
+    """
+    Reference: https://platform.openai.com/docs/api-reference/completions/create
+    The class `OpenAITextCompletionConfig` provides configuration for the OpenAI's text completion API interface. Below are the parameters:
+    - `best_of` (integer or null): This optional parameter generates server-side completions and returns the one with the highest log probability per token.
+    - `echo` (boolean or null): This optional parameter will echo back the prompt in addition to the completion.
+    - `frequency_penalty` (number or null): Defaults to 0. It is a numbers from -2.0 to 2.0, where positive values decrease the model's likelihood to repeat the same line.
+    - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
+    - `logprobs` (integer or null): This optional parameter includes the log probabilities on the most likely tokens as well as the chosen tokens.
+    - `max_tokens` (integer or null): This optional parameter sets the maximum number of tokens to generate in the completion.
+    - `n` (integer or null): This optional parameter sets how many completions to generate for each prompt.
+    - `presence_penalty` (number or null): Defaults to 0 and can be between -2.0 and 2.0. Positive values increase the model's likelihood to talk about new topics.
+    - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
+    - `suffix` (string or null): Defines the suffix that comes after a completion of inserted text.
+    - `temperature` (number or null): This optional parameter defines the sampling temperature to use.
+    - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
+    """
+    best_of: Optional[int]=None
+    echo: Optional[bool]=None
+    frequency_penalty: Optional[int]=None
+    logit_bias: Optional[dict]=None
+    logprobs: Optional[int]=None
+    max_tokens: Optional[int]=None
+    n: Optional[int]=None
+    presence_penalty: Optional[int]=None
+    stop: Optional[Union[str, list]]=None
+    suffix: Optional[str]=None
+    temperature: Optional[float]=None
+    top_p: Optional[float]=None
+    def __init__(self,
+                 best_of: Optional[int]=None,
+                 echo: Optional[bool]=None,
+                 frequency_penalty: Optional[int]=None,
+                 logit_bias: Optional[dict]=None,
+                 logprobs: Optional[int]=None,
+                 max_tokens: Optional[int]=None,
+                 n: Optional[int]=None,
+                 presence_penalty: Optional[int]=None,
+                 stop: Optional[Union[str, list]]=None,
+                 suffix: Optional[str]=None,
+                 temperature: Optional[float]=None,
+                 top_p: Optional[float]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+class OpenAIChatCompletion(BaseLLM):
+    def __init__(self) -> None:
+        super().__init__()
+    def completion(self,
+                model_response: ModelResponse,
+                timeout: float,
+                model: Optional[str]=None,
+                messages: Optional[list]=None,
+                print_verbose: Optional[Callable]=None,
+                api_key: Optional[str]=None,
+                api_base: Optional[str]=None,
+                acompletion: bool = False,
+                logging_obj=None,
+                optional_params=None,
+                litellm_params=None,
+                logger_fn=None,
+                headers: Optional[dict]=None,
+                custom_prompt_dict: dict={},
+                client=None
+        ):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            if headers:
+                optional_params["extra_headers"] = headers
+            if model is None or messages is None:
+                raise OpenAIError(status_code=422, message=f"Missing model or messages")
+            if not isinstance(timeout, float):
+                raise OpenAIError(status_code=422, message=f"Timeout needs to be a float")
+            for _ in range(2): # if call fails due to alternating messages, retry with reformatted message
+                data = {
+                    "model": model,
+                    "messages": messages,
+                    **optional_params
+                }
+                ## LOGGING
+                logging_obj.pre_call(
+                    input=messages,
+                    api_key=api_key,
+                    additional_args={"headers": headers, "api_base": api_base, "acompletion": acompletion, "complete_input_dict": data},
+                )
+                try:
+                    max_retries = data.pop("max_retries", 2)
+                    if acompletion is True:
+                        if optional_params.get("stream", False):
+                            return self.async_streaming(logging_obj=logging_obj, data=data, model=model, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
+                        else:
+                            return self.acompletion(data=data, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
+                    elif optional_params.get("stream", False):
+                        return self.streaming(logging_obj=logging_obj, data=data, model=model, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
+                    else:
+                        if not isinstance(max_retries, int):
+                            raise OpenAIError(status_code=422, message="max retries must be an int")
+                        if client is None:
+                            openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
+                        else:
+                            openai_client = client
+                        response = openai_client.chat.completions.create(**data) # type: ignore
+                        logging_obj.post_call(
+                                input=None,
+                                api_key=api_key,
+                                original_response=response,
+                                additional_args={"complete_input_dict": data},
+                            )
+                        return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
+                except Exception as e:
+                    if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e):
+                        # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
+                        new_messages = []
+                        for i in range(len(messages)-1):
+                            new_messages.append(messages[i])
+                            if messages[i]["role"] == messages[i+1]["role"]:
+                                if messages[i]["role"] == "user":
+                                    new_messages.append({"role": "assistant", "content": ""})
+                                else:
+                                    new_messages.append({"role": "user", "content": ""})
+                        new_messages.append(messages[-1])
+                        messages = new_messages
+                    elif "Last message must have role `user`" in str(e):
+                        new_messages = messages
+                        new_messages.append({"role": "user", "content": ""})
+                        messages = new_messages
+                    else:
+                        raise e
+        except OpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            raise e
+    async def acompletion(self,
+                          data: dict,
+                          model_response: ModelResponse,
+                          timeout: float,
+                          api_key: Optional[str]=None,
+                          api_base: Optional[str]=None,
+                          client=None,
+                          max_retries=None,
+                        ):
+        response = None
+        try:
+            if client is None:
+                openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
+            else:
+                openai_aclient = client
+            response = await openai_aclient.chat.completions.create(**data)
+            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
+        except Exception as e:
+            if response and hasattr(response, "text"):
+                raise OpenAIError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
+            else:
+                if type(e).__name__ == "ReadTimeout":
+                    raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
+                else:
+                    raise OpenAIError(status_code=500, message=f"{str(e)}")
+    def streaming(self,
+                  logging_obj,
+                  timeout: float,
+                  data: dict,
+                  model: str,
+                  api_key: Optional[str]=None,
+                  api_base: Optional[str]=None,
+                  client = None,
+                  max_retries=None
+    ):
+        if client is None:
+            openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
+        else:
+            openai_client = client
+        response = openai_client.chat.completions.create(**data)
+        streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
+        return streamwrapper
+    async def async_streaming(self,
+                          logging_obj,
+                          timeout: float,
+                          data: dict,
+                          model: str,
+                          api_key: Optional[str]=None,
+                          api_base: Optional[str]=None,
+                          client=None,
+                          max_retries=None,
+                          ):
+        response = None
+        try:
+            if client is None:
+                openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
+            else:
+                openai_aclient = client
+            response = await openai_aclient.chat.completions.create(**data)
+            streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
+            async for transformed_chunk in streamwrapper:
+                yield transformed_chunk
+        except Exception as e: # need to exception handle here. async exceptions don't get caught in sync functions.
+            if response is not None and hasattr(response, "text"):
+                raise OpenAIError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
+            else:
+                if type(e).__name__ == "ReadTimeout":
+                    raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
+                else:
+                    raise OpenAIError(status_code=500, message=f"{str(e)}")
+    async def aembedding(
+            self,
+            data: dict,
+            model_response: ModelResponse,
+            timeout: float,
+            api_key: Optional[str]=None,
+            api_base: Optional[str]=None,
+            client=None,
+            max_retries=None,
+        ):
+        response = None
+        try:
+            if client is None:
+                openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
+            else:
+                openai_aclient = client
+            response = await openai_aclient.embeddings.create(**data) # type: ignore
+            return response
+        except Exception as e:
+            raise e
+    def embedding(self,
+                model: str,
+                input: list,
+                timeout: float,
+                api_key: Optional[str] = None,
+                api_base: Optional[str] = None,
+                model_response: Optional[litellm.utils.EmbeddingResponse] = None,
+                logging_obj=None,
+                optional_params=None,
+                client=None,
+                aembedding=None,
+                ):
+        super().embedding()
+        exception_mapping_worked = False
+        try:
+            model = model
+            data = {
+                "model": model,
+                "input": input,
+                **optional_params
+            }
+            max_retries = data.pop("max_retries", 2)
+            if not isinstance(max_retries, int):
+                raise OpenAIError(status_code=422, message="max retries must be an int")
+            if aembedding == True:
+                response =  self.aembedding(data=data, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
+                return response
+            if client is None:
+                openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
+            else:
+                openai_client = client
+            ## LOGGING
+            logging_obj.pre_call(
+                    input=input,
+                    api_key=api_key,
+                    additional_args={"complete_input_dict": data, "api_base": api_base},
+                )
+            ## COMPLETION CALL
+            response = openai_client.embeddings.create(**data) # type: ignore
+            ## LOGGING
+            logging_obj.post_call(
+                    input=input,
+                    api_key=api_key,
+                    additional_args={"complete_input_dict": data},
+                    original_response=response,
+                )
+            return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
+        except OpenAIError as e:
+            exception_mapping_worked = True
+            raise e
+        except Exception as e:
+            if exception_mapping_worked:
+                raise e
+            else:
+                import traceback
+                raise OpenAIError(status_code=500, message=traceback.format_exc())
+class OpenAITextCompletion(BaseLLM):
+    _client_session: httpx.Client
+    def __init__(self) -> None:
+        super().__init__()
+        self._client_session = self.create_client_session()
+    def validate_environment(self, api_key):
+        headers = {
+            "content-type": "application/json",
+        }
+        if api_key:
+            headers["Authorization"] = f"Bearer {api_key}"
+        return headers
+    def convert_to_model_response_object(self, response_object: Optional[dict]=None, model_response_object: Optional[ModelResponse]=None):
+        try:
+            ## RESPONSE OBJECT
+            if response_object is None or model_response_object is None:
+                raise ValueError("Error in response object format")
+            choice_list=[]
+            for idx, choice in enumerate(response_object["choices"]):
+                message = Message(content=choice["text"], role="assistant")
+                choice = Choices(finish_reason=choice["finish_reason"], index=idx, message=message)
+                choice_list.append(choice)
+            model_response_object.choices = choice_list
+            if "usage" in response_object:
+                model_response_object.usage = response_object["usage"]
+            if "id" in response_object:
+                model_response_object.id = response_object["id"]
+            if "model" in response_object:
+                model_response_object.model = response_object["model"]
+            model_response_object._hidden_params["original_response"] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
+            return model_response_object
+        except Exception as e:
+            raise e
+    def completion(self,
+               model_response: ModelResponse,
+               api_key: str,
+               model: str,
+               messages: list,
+               print_verbose: Optional[Callable]=None,
+               api_base: Optional[str]=None,
+               logging_obj=None,
+               acompletion: bool = False,
+               optional_params=None,
+               litellm_params=None,
+               logger_fn=None,
+               headers: Optional[dict]=None):
+        super().completion()
+        exception_mapping_worked = False
+        try:
+            if headers is None:
+                headers = self.validate_environment(api_key=api_key)
+            if model is None or messages is None:
+                raise OpenAIError(status_code=422, message=f"Missing model or messages")
+            api_base = f"{api_base}/completions"
+            if len(messages)>0 and "content" in messages[0] and type(messages[0]["content"]) == list:
+                prompt = messages[0]["content"]
+            else:
+                prompt = " ".join([message["content"] for message in messages]) # type: ignore
+            data = {
+                "model": model,
+                "prompt": prompt,
+                **optional_params
+            }
+            ## LOGGING
+            logging_obj.pre_call(
+                input=messages,
+                api_key=api_key,
+                additional_args={"headers": headers, "api_base": api_base, "complete_input_dict": data},
+            )
+            if acompletion == True:
+                if optional_params.get("stream", False):
+                    return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, headers=headers, model_response=model_response, model=model)
+                else:
+                    return self.acompletion(api_base=api_base, data=data, headers=headers, model_response=model_response, prompt=prompt, api_key=api_key, logging_obj=logging_obj, model=model) # type: ignore
+            elif optional_params.get("stream", False):
+                return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, headers=headers, model_response=model_response, model=model)
+            else:
+                response = httpx.post(
+                    url=f"{api_base}",
+                    json=data,
+                    headers=headers,
+                )
+                if response.status_code != 200:
+                    raise OpenAIError(status_code=response.status_code, message=response.text)
+                ## LOGGING
+                logging_obj.post_call(
+                    input=prompt,
+                    api_key=api_key,
+                    original_response=response,
+                    additional_args={
+                        "headers": headers,
+                        "api_base": api_base,
+                    },
+                )
+                ## RESPONSE OBJECT
+                return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
+        except Exception as e:
+            raise e
+    async def acompletion(self,
+                        logging_obj,
+                        api_base: str,
+                        data: dict,
+                        headers: dict,
+                        model_response: ModelResponse,
+                        prompt: str,
+                        api_key: str,
+                        model: str):
+        async with httpx.AsyncClient() as client:
+            response = await client.post(api_base, json=data, headers=headers, timeout=litellm.request_timeout)
+            response_json = response.json()
+            if response.status_code != 200:
+                raise OpenAIError(status_code=response.status_code, message=response.text)
+            ## LOGGING
+            logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response,
+                additional_args={
+                    "headers": headers,
+                    "api_base": api_base,
+                },
+            )
+            ## RESPONSE OBJECT
+            return self.convert_to_model_response_object(response_object=response_json, model_response_object=model_response)
+    def streaming(self,
+                  logging_obj,
+                  api_base: str,
+                  data: dict,
+                  headers: dict,
+                  model_response: ModelResponse,
+                  model: str
+    ):
+        with httpx.stream(
+                    url=f"{api_base}",
+                    json=data,
+                    headers=headers,
+                    method="POST",
+                    timeout=litellm.request_timeout
+                ) as response:
+                    if response.status_code != 200:
+                        raise OpenAIError(status_code=response.status_code, message=response.text)
+                    streamwrapper = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
+                    for transformed_chunk in streamwrapper:
+                        yield transformed_chunk
+    async def async_streaming(self,
+                          logging_obj,
+                          api_base: str,
+                          data: dict,
+                          headers: dict,
+                          model_response: ModelResponse,
+                          model: str):
+        client = httpx.AsyncClient()
+        async with client.stream(
+                    url=f"{api_base}",
+                    json=data,
+                    headers=headers,
+                    method="POST",
+                    timeout=litellm.request_timeout
+                ) as response:
+            if response.status_code != 200:
+                raise OpenAIError(status_code=response.status_code, message=response.text)
+            streamwrapper = CustomStreamWrapper(completion_stream=response.aiter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
+            async for transformed_chunk in streamwrapper:
+                yield transformed_chunk

litellm/llms/palm.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import os, types, traceback, copy
+import json
+from enum import Enum
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
+import litellm
+import sys, httpx
+class PalmError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://developers.generativeai.google/api/python/google/generativeai/chat")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class PalmConfig():
+    """
+    Reference: https://developers.generativeai.google/api/python/google/generativeai/chat
+    The class `PalmConfig` provides configuration for the Palm's API interface. Here are the parameters:
+    - `context` (string): Text that should be provided to the model first, to ground the response. This could be a prompt to guide the model's responses.
+    - `examples` (list): Examples of what the model should generate. They are treated identically to conversation messages except that they take precedence over the history in messages if the total input size exceeds the model's input_token_limit.
+    - `temperature` (float): Controls the randomness of the output. Must be positive. Higher values produce a more random and varied response. A temperature of zero will be deterministic.
+    - `candidate_count` (int): Maximum number of generated response messages to return. This value must be between [1, 8], inclusive. Only unique candidates are returned.
+    - `top_k` (int): The API uses combined nucleus and top-k sampling. `top_k` sets the maximum number of tokens to sample from on each step.
+    - `top_p` (float): The API uses combined nucleus and top-k sampling. `top_p` configures the nucleus sampling. It sets the maximum cumulative probability of tokens to sample from.
+    - `max_output_tokens` (int): Sets the maximum number of tokens to be returned in the output
+    """
+    context: Optional[str]=None
+    examples: Optional[list]=None
+    temperature: Optional[float]=None
+    candidate_count: Optional[int]=None
+    top_k: Optional[int]=None
+    top_p: Optional[float]=None
+    max_output_tokens: Optional[int]=None
+    def __init__(self,
+                 context: Optional[str]=None,
+                 examples: Optional[list]=None,
+                 temperature: Optional[float]=None,
+                 candidate_count: Optional[int]=None,
+                 top_k: Optional[int]=None,
+                 top_p: Optional[float]=None,
+                 max_output_tokens: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def completion(
+    model: str,
+    messages: list,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    api_key,
+    encoding,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    try:
+        import google.generativeai as palm
+    except:
+        raise Exception("Importing google.generativeai failed, please run 'pip install -q google-generativeai")
+    palm.configure(api_key=api_key)
+    model = model
+    ## Load Config
+    inference_params = copy.deepcopy(optional_params)
+    inference_params.pop("stream", None) # palm does not support streaming, so we handle this by fake streaming in main.py
+    config = litellm.PalmConfig.get_config()
+    for k, v in config.items():
+        if k not in inference_params: # completion(top_k=3) > palm_config(top_k=3) <- allows for dynamic variables to be passed in
+            inference_params[k] = v
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += (
+                    f"{message['content']}"
+                )
+            else:
+                prompt += (
+                    f"{message['content']}"
+                )
+        else:
+            prompt += f"{message['content']}"
+    ## LOGGING
+    logging_obj.pre_call(
+            input=prompt,
+            api_key="",
+            additional_args={"complete_input_dict": {"inference_params": inference_params}},
+        )
+    ## COMPLETION CALL
+    try:
+        response = palm.generate_text(prompt=prompt, **inference_params)
+    except Exception as e:
+        raise PalmError(
+            message=str(e),
+            status_code=500,
+        )
+    ## LOGGING
+    logging_obj.post_call(
+            input=prompt,
+            api_key="",
+            original_response=response,
+            additional_args={"complete_input_dict": {}},
+        )
+    print_verbose(f"raw model_response: {response}")
+    ## RESPONSE OBJECT
+    completion_response = response
+    try:
+        choices_list = []
+        for idx, item in enumerate(completion_response.candidates):
+            if len(item["output"]) > 0:
+                message_obj = Message(content=item["output"])
+            else:
+                message_obj = Message(content=None)
+            choice_obj = Choices(index=idx+1, message=message_obj)
+            choices_list.append(choice_obj)
+        model_response["choices"] = choices_list
+    except Exception as e:
+        traceback.print_exc()
+        raise PalmError(message=traceback.format_exc(), status_code=response.status_code)
+    try:
+        completion_response = model_response["choices"][0]["message"].get("content")
+    except:
+        raise PalmError(status_code=400, message=f"No response received. Original response - {response}")
+    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+    prompt_tokens = len(
+        encoding.encode(prompt)
+    )
+    completion_tokens = len(
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+    )
+    model_response["created"] = int(time.time())
+    model_response["model"] = "palm/" + model
+    usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+    model_response.usage = usage
+    return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/petals.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class PetalsError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class PetalsConfig():
+    """
+    Reference: https://github.com/petals-infra/chat.petals.dev#post-apiv1generate
+    The `PetalsConfig` class encapsulates the configuration for the Petals API. The properties of this class are described below:
+    - `max_length` (integer): This represents the maximum length of the generated text (including the prefix) in tokens.
+    - `max_new_tokens` (integer): This represents the maximum number of newly generated tokens (excluding the prefix).
+    The generation parameters are compatible with `.generate()` from Hugging Face's Transformers library:
+    - `do_sample` (boolean, optional): If set to 0 (default), the API runs greedy generation. If set to 1, the API performs sampling using the parameters below:
+    - `temperature` (float, optional): This value sets the temperature for sampling.
+    - `top_k` (integer, optional): This value sets the limit for top-k sampling.
+    - `top_p` (float, optional): This value sets the limit for top-p (nucleus) sampling.
+    - `repetition_penalty` (float, optional): This helps apply the repetition penalty during text generation, as discussed in this paper.
+    """
+    max_length: Optional[int]=None
+    max_new_tokens: Optional[int]=litellm.max_tokens # petals requires max tokens to be set
+    do_sample: Optional[bool]=None
+    temperature: Optional[float]=None
+    top_k: Optional[int]=None
+    top_p: Optional[float]=None
+    repetition_penalty: Optional[float]=None
+    def __init__(self,
+                 max_length: Optional[int]=None,
+                 max_new_tokens: Optional[int]=litellm.max_tokens, # petals requires max tokens to be set
+                 do_sample: Optional[bool]=None,
+                 temperature: Optional[float]=None,
+                 top_k: Optional[int]=None,
+                 top_p: Optional[float]=None,
+                 repetition_penalty: Optional[float]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def completion(
+    model: str,
+    messages: list,
+    api_base: Optional[str],
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    logging_obj,
+    optional_params=None,
+    stream=False,
+    litellm_params=None,
+    logger_fn=None,
+):
+    ## Load Config
+    config = litellm.PetalsConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > petals_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    if model in litellm.custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = litellm.custom_prompt_dict[model]
+        prompt = custom_prompt(
+            role_dict=model_prompt_details["roles"],
+            initial_prompt_value=model_prompt_details["initial_prompt_value"],
+            final_prompt_value=model_prompt_details["final_prompt_value"],
+            messages=messages
+        )
+    else:
+        prompt = prompt_factory(model=model, messages=messages)
+    if api_base:
+        ## LOGGING
+        logging_obj.pre_call(
+                input=prompt,
+                api_key="",
+                additional_args={"complete_input_dict": optional_params, "api_base": api_base},
+            )
+        data = {
+            "model": model,
+            "inputs": prompt,
+            **optional_params
+        }
+        ## COMPLETION CALL
+        response = requests.post(api_base, data=data)
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key="",
+                original_response=response.text,
+                additional_args={"complete_input_dict": optional_params},
+            )
+        ## RESPONSE OBJECT
+        try:
+            output_text = response.json()["outputs"]
+        except Exception as e:
+            PetalsError(status_code=response.status_code, message=str(e))
+    else:
+        try:
+            import torch
+            from transformers import AutoTokenizer
+            from petals import AutoDistributedModelForCausalLM # type: ignore
+        except:
+            raise Exception(
+                "Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals"
+            )
+        model = model
+        tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, add_bos_token=False)
+        model_obj = AutoDistributedModelForCausalLM.from_pretrained(model)
+        ## LOGGING
+        logging_obj.pre_call(
+                input=prompt,
+                api_key="",
+                additional_args={"complete_input_dict": optional_params},
+            )
+        ## COMPLETION CALL
+        inputs = tokenizer(prompt, return_tensors="pt")["input_ids"]
+        # optional params: max_new_tokens=1,temperature=0.9, top_p=0.6
+        outputs = model_obj.generate(inputs, **optional_params)
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key="",
+                original_response=outputs,
+                additional_args={"complete_input_dict": optional_params},
+            )
+        ## RESPONSE OBJECT
+        output_text = tokenizer.decode(outputs[0])
+    if len(output_text) > 0:
+        model_response["choices"][0]["message"]["content"] = output_text
+    prompt_tokens = len(
+        encoding.encode(prompt)
+    )
+    completion_tokens = len(
+        encoding.encode(model_response["choices"][0]["message"].get("content"))
+    )
+    model_response["created"] = int(time.time())
+    model_response["model"] = model
+    usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+    model_response.usage = usage
+    return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/prompt_templates/factory.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from enum import Enum
+import requests, traceback
+import json
+from jinja2 import Template, exceptions, Environment, meta
+from typing import Optional
+def default_pt(messages):
+    return " ".join(message["content"] for message in messages)
+# alpaca prompt template - for models like mythomax, etc.
+def alpaca_pt(messages):
+    prompt = custom_prompt(
+        role_dict={
+            "system": {
+                "pre_message": "### Instruction:\n",
+                "post_message": "\n\n",
+            },
+            "user": {
+                "pre_message": "### Instruction:\n",
+                "post_message": "\n\n",
+            },
+            "assistant": {
+                "pre_message": "### Response:\n",
+                "post_message": "\n\n"
+            }
+        },
+        bos_token="<s>",
+        eos_token="</s>",
+        messages=messages
+    )
+    return prompt
+# Llama2 prompt template
+def llama_2_chat_pt(messages):
+    prompt = custom_prompt(
+        role_dict={
+            "system": {
+                "pre_message": "[INST] <<SYS>>\n",
+                "post_message": "\n<</SYS>>\n [/INST]\n"
+            },
+            "user": { # follow this format https://github.com/facebookresearch/llama/blob/77062717054710e352a99add63d160274ce670c6/llama/generation.py#L348
+                "pre_message": "[INST] ",
+                "post_message": " [/INST]\n"
+            },
+            "assistant": {
+                "post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
+            }
+        },
+        messages=messages,
+        bos_token="<s>",
+        eos_token="</s>"
+    )
+    return prompt
+def ollama_pt(model, messages): # https://github.com/jmorganca/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template
+    if "instruct" in model:
+        prompt = custom_prompt(
+            role_dict={
+                "system": {
+                    "pre_message": "### System:\n",
+                    "post_message": "\n"
+                },
+                "user": {
+                    "pre_message": "### User:\n",
+                    "post_message": "\n",
+                },
+                "assistant": {
+                    "pre_message": "### Response:\n",
+                    "post_message": "\n",
+                }
+            },
+            final_prompt_value="### Response:",
+            messages=messages
+        )
+    else:
+        prompt = "".join(m["content"] for m in messages)
+    return prompt
+def mistral_instruct_pt(messages):
+    prompt = custom_prompt(
+        initial_prompt_value="<s>",
+        role_dict={
+            "system": {
+                "pre_message": "[INST]",
+                "post_message": "[/INST]"
+            },
+            "user": {
+                "pre_message": "[INST]",
+                "post_message": "[/INST]"
+            },
+            "assistant": {
+                "pre_message": "[INST]",
+                "post_message": "[/INST]"
+            }
+        },
+        final_prompt_value="</s>",
+        messages=messages
+    )
+    return prompt
+# Falcon prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
+def falcon_instruct_pt(messages):
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += message["content"]
+        else:
+            prompt += message['role']+":"+ message["content"].replace("\r\n", "\n").replace("\n\n", "\n")
+            prompt += "\n\n"
+    return prompt
+def falcon_chat_pt(messages):
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += "System: " + message["content"]
+        elif message["role"] == "assistant":
+            prompt += "Falcon: " + message["content"]
+        elif message["role"] == "user":
+            prompt += "User: " + message["content"]
+    return prompt
+# MPT prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
+def mpt_chat_pt(messages):
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += "<|im_start|>system" + message["content"] + "<|im_end|>" + "\n"
+        elif message["role"] == "assistant":
+            prompt += "<|im_start|>assistant" + message["content"] + "<|im_end|>" + "\n"
+        elif message["role"] == "user":
+            prompt += "<|im_start|>user" + message["content"] + "<|im_end|>" + "\n"
+    return prompt
+# WizardCoder prompt template - https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0#prompt-format
+def wizardcoder_pt(messages):
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += message["content"] + "\n\n"
+        elif message["role"] == "user": # map to 'Instruction'
+            prompt += "### Instruction:\n" + message["content"] + "\n\n"
+        elif message["role"] == "assistant": # map to 'Response'
+            prompt += "### Response:\n" + message["content"] + "\n\n"
+    return prompt
+# Phind-CodeLlama prompt template - https://huggingface.co/Phind/Phind-CodeLlama-34B-v2#how-to-prompt-the-model
+def phind_codellama_pt(messages):
+    prompt = ""
+    for message in messages:
+        if message["role"] == "system":
+            prompt += "### System Prompt\n" + message["content"] + "\n\n"
+        elif message["role"] == "user":
+            prompt += "### User Message\n" + message["content"] + "\n\n"
+        elif message["role"] == "assistant":
+            prompt += "### Assistant\n" + message["content"] + "\n\n"
+    return prompt
+def hf_chat_template(model: str, messages: list):
+    ## get the tokenizer config from huggingface
+    def _get_tokenizer_config(hf_model_name):
+        url = f"https://huggingface.co/{hf_model_name}/raw/main/tokenizer_config.json"
+        # Make a GET request to fetch the JSON data
+        response = requests.get(url)
+        if response.status_code == 200:
+            # Parse the JSON data
+            tokenizer_config = json.loads(response.content)
+            return {"status": "success", "tokenizer": tokenizer_config}
+        else:
+            return {"status": "failure"}
+    tokenizer_config = _get_tokenizer_config(model)
+    if tokenizer_config["status"] == "failure" or "chat_template" not in tokenizer_config["tokenizer"]:
+        raise Exception("No chat template found")
+    ## read the bos token, eos token and chat template from the json
+    tokenizer_config = tokenizer_config["tokenizer"]
+    bos_token = tokenizer_config["bos_token"]
+    eos_token = tokenizer_config["eos_token"]
+    chat_template = tokenizer_config["chat_template"]
+    def raise_exception(message):
+        raise Exception(f"Error message - {message}")
+    # Create a template object from the template text
+    env = Environment()
+    env.globals['raise_exception'] = raise_exception
+    template = env.from_string(chat_template)
+    def _is_system_in_template():
+        try:
+            # Try rendering the template with a system message
+            response = template.render(messages=[{"role": "system", "content": "test"}], eos_token= "<eos>", bos_token= "<bos>")
+            return True
+        # This will be raised if Jinja attempts to render the system message and it can't
+        except:
+            return False
+    try:
+        # Render the template with the provided values
+        if _is_system_in_template():
+            rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=messages)
+        else:
+            # treat a system message as a user message, if system not in template
+            try:
+                reformatted_messages = []
+                for message in messages:
+                    if message["role"] == "system":
+                        reformatted_messages.append({"role": "user", "content": message["content"]})
+                    else:
+                        reformatted_messages.append(message)
+                rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=reformatted_messages)
+            except Exception as e:
+                if "Conversation roles must alternate user/assistant" in str(e):
+                    # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
+                    new_messages = []
+                    for i in range(len(reformatted_messages)-1):
+                        new_messages.append(reformatted_messages[i])
+                        if reformatted_messages[i]["role"] == reformatted_messages[i+1]["role"]:
+                            if reformatted_messages[i]["role"] == "user":
+                                new_messages.append({"role": "assistant", "content": ""})
+                            else:
+                                new_messages.append({"role": "user", "content": ""})
+                    new_messages.append(reformatted_messages[-1])
+                    rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=new_messages)
+        return rendered_text
+    except:
+        raise Exception("Error rendering template")
+# Anthropic template
+def claude_2_1_pt(messages: list): # format - https://docs.anthropic.com/claude/docs/how-to-use-system-prompts
+    class AnthropicConstants(Enum):
+        HUMAN_PROMPT = "\n\nHuman: "
+        AI_PROMPT = "\n\nAssistant: "
+    prompt = ""
+    for idx, message in enumerate(messages): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
+        if message["role"] == "user":
+            prompt += (
+                f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+            )
+        elif message["role"] == "system":
+            prompt += (
+                f"{message['content']}"
+            )
+        else:
+            prompt += (
+                f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+            )
+        if idx == 0 and message["role"] == "assistant": # ensure the prompt always starts with `\n\nHuman: `
+            prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
+    prompt += f"{AnthropicConstants.AI_PROMPT.value}"
+    return prompt
+def anthropic_pt(messages: list): # format - https://docs.anthropic.com/claude/reference/complete_post
+    class AnthropicConstants(Enum):
+        HUMAN_PROMPT = "\n\nHuman: "
+        AI_PROMPT = "\n\nAssistant: "
+    prompt = ""
+    for idx, message in enumerate(messages): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
+        if message["role"] == "user":
+            prompt += (
+                f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
+            )
+        elif message["role"] == "system":
+            prompt += (
+                f"{AnthropicConstants.HUMAN_PROMPT.value}<admin>{message['content']}</admin>"
+            )
+        else:
+            prompt += (
+                f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
+            )
+        if idx == 0 and message["role"] == "assistant": # ensure the prompt always starts with `\n\nHuman: `
+            prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
+    prompt += f"{AnthropicConstants.AI_PROMPT.value}"
+    return prompt
+# Function call template
+def function_call_prompt(messages: list, functions: list):
+    function_prompt = "The following functions are available to you:"
+    for function in functions:
+        function_prompt += f"""\n{function}\n"""
+    function_added_to_prompt = False
+    for message in messages:
+        if "system" in message["role"]:
+            message['content'] += f"""{function_prompt}"""
+            function_added_to_prompt = True
+    if function_added_to_prompt == False:
+        messages.append({'role': 'system', 'content': f"""{function_prompt}"""})
+    return messages
+# Custom prompt template
+def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str="", bos_token: str="", eos_token: str=""):
+    prompt = bos_token + initial_prompt_value
+    bos_open = True
+    ## a bos token is at the start of a system / human message
+    ## an eos token is at the end of the assistant response to the message
+    for message in messages:
+        role = message["role"]
+        if role in ["system", "human"] and not bos_open:
+            prompt += bos_token
+            bos_open = True
+        pre_message_str = role_dict[role]["pre_message"] if role in role_dict and "pre_message" in role_dict[role] else ""
+        post_message_str = role_dict[role]["post_message"] if role in role_dict and "post_message" in role_dict[role] else ""
+        prompt += pre_message_str + message["content"] + post_message_str
+        if role == "assistant":
+            prompt += eos_token
+            bos_open = False
+    prompt += final_prompt_value
+    return prompt
+def prompt_factory(model: str, messages: list, custom_llm_provider: Optional[str]=None):
+    original_model_name = model
+    model = model.lower()
+    if custom_llm_provider == "ollama":
+        return ollama_pt(model=model, messages=messages)
+    elif custom_llm_provider == "anthropic":
+        if "claude-2.1" in model:
+            return claude_2_1_pt(messages=messages)
+        else:
+            return anthropic_pt(messages=messages)
+    try:
+        if "meta-llama/llama-2" in model and "chat" in model:
+            return llama_2_chat_pt(messages=messages)
+        elif "tiiuae/falcon" in model: # Note: for the instruct models, it's best to use a User: .., Assistant:.. approach in your prompt template.
+            if model == "tiiuae/falcon-180B-chat":
+                return falcon_chat_pt(messages=messages)
+            elif "instruct" in model:
+                return falcon_instruct_pt(messages=messages)
+        elif "mosaicml/mpt" in model:
+            if "chat" in model:
+                return mpt_chat_pt(messages=messages)
+        elif "codellama/codellama" in model:
+            if "instruct" in model:
+                return llama_2_chat_pt(messages=messages) # https://huggingface.co/blog/codellama#conversational-instructions
+        elif "wizardlm/wizardcoder" in model:
+            return wizardcoder_pt(messages=messages)
+        elif "phind/phind-codellama" in model:
+            return phind_codellama_pt(messages=messages)
+        elif "togethercomputer/llama-2" in model and ("instruct" in model or "chat" in model):
+            return llama_2_chat_pt(messages=messages)
+        elif model in ["gryphe/mythomax-l2-13b", "gryphe/mythomix-l2-13b", "gryphe/mythologic-l2-13b"]:
+            return alpaca_pt(messages=messages)
+        else:
+            return hf_chat_template(original_model_name, messages)
+    except:
+        return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)

litellm/llms/replicate.py ADDED Viewed

	@@ -0,0 +1,302 @@

+import os, types
+import json
+import requests
+import time
+from typing import Callable, Optional
+from litellm.utils import ModelResponse, Usage
+import litellm
+import httpx
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class ReplicateError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.replicate.com/v1/deployments")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class ReplicateConfig():
+    """
+    Reference: https://replicate.com/meta/llama-2-70b-chat/api
+    - `prompt` (string): The prompt to send to the model.
+    - `system_prompt` (string): The system prompt to send to the model. This is prepended to the prompt and helps guide system behavior. Default value: `You are a helpful assistant`.
+    - `max_new_tokens` (integer): Maximum number of tokens to generate. Typically, a word is made up of 2-3 tokens. Default value: `128`.
+    - `min_new_tokens` (integer): Minimum number of tokens to generate. To disable, set to `-1`. A word is usually 2-3 tokens. Default value: `-1`.
+    - `temperature` (number): Adjusts the randomness of outputs. Values greater than 1 increase randomness, 0 is deterministic, and 0.75 is a reasonable starting value. Default value: `0.75`.
+    - `top_p` (number): During text decoding, it samples from the top `p` percentage of most likely tokens. Reduce this to ignore less probable tokens. Default value: `0.9`.
+    - `top_k` (integer): During text decoding, samples from the top `k` most likely tokens. Reduce this to ignore less probable tokens. Default value: `50`.
+    - `stop_sequences` (string): A comma-separated list of sequences to stop generation at. For example, inputting '<end>,<stop>' will cease generation at the first occurrence of either 'end' or '<stop>'.
+    - `seed` (integer): This is the seed for the random generator. Leave it blank to randomize the seed.
+    - `debug` (boolean): If set to `True`, it provides debugging output in logs.
+    Please note that Replicate's mapping of these parameters can be inconsistent across different models, indicating that not all of these parameters may be available for use with all models.
+    """
+    system_prompt: Optional[str]=None
+    max_new_tokens: Optional[int]=None
+    min_new_tokens: Optional[int]=None
+    temperature: Optional[int]=None
+    top_p: Optional[int]=None
+    top_k: Optional[int]=None
+    stop_sequences: Optional[str]=None
+    seed: Optional[int]=None
+    debug: Optional[bool]=None
+    def __init__(self,
+                 system_prompt: Optional[str]=None,
+                 max_new_tokens: Optional[int]=None,
+                 min_new_tokens: Optional[int]=None,
+                 temperature: Optional[int]=None,
+                 top_p: Optional[int]=None,
+                 top_k: Optional[int]=None,
+                 stop_sequences: Optional[str]=None,
+                 seed: Optional[int]=None,
+                 debug: Optional[bool]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+# Function to start a prediction and get the prediction URL
+def start_prediction(version_id, input_data, api_token, api_base, logging_obj, print_verbose):
+    base_url = api_base
+    if "deployments" in version_id:
+        print_verbose("\nLiteLLM: Request to custom replicate deployment")
+        version_id = version_id.replace("deployments/", "")
+        base_url = f"https://api.replicate.com/v1/deployments/{version_id}"
+        print_verbose(f"Deployment base URL: {base_url}\n")
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json"
+    }
+    initial_prediction_data = {
+        "version": version_id,
+        "input": input_data,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+            input=input_data["prompt"],
+            api_key="",
+            additional_args={"complete_input_dict": initial_prediction_data, "headers": headers, "api_base": base_url},
+    )
+    response = requests.post(f"{base_url}/predictions", json=initial_prediction_data, headers=headers)
+    if response.status_code == 201:
+        response_data = response.json()
+        return response_data.get("urls", {}).get("get")
+    else:
+        raise ReplicateError(response.status_code, f"Failed to start prediction {response.text}")
+# Function to handle prediction response (non-streaming)
+def handle_prediction_response(prediction_url, api_token, print_verbose):
+    output_string = ""
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json"
+    }
+    status = ""
+    logs = ""
+    while True and (status not in ["succeeded", "failed", "canceled"]):
+        print_verbose(f"replicate: polling endpoint: {prediction_url}")
+        time.sleep(0.5)
+        response = requests.get(prediction_url, headers=headers)
+        if response.status_code == 200:
+            response_data = response.json()
+            if "output" in response_data:
+                output_string = "".join(response_data['output'])
+                print_verbose(f"Non-streamed output:{output_string}")
+            status = response_data.get('status', None)
+            logs = response_data.get("logs", "")
+            if status == "failed":
+                replicate_error = response_data.get("error", "")
+                raise ReplicateError(status_code=400, message=f"Error: {replicate_error}, \nReplicate logs:{logs}")
+        else:
+            # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
+            print_verbose("Replicate: Failed to fetch prediction status and output.")
+    return output_string, logs
+# Function to handle prediction response (streaming)
+def handle_prediction_response_streaming(prediction_url, api_token, print_verbose):
+    previous_output = ""
+    output_string = ""
+    headers = {
+        "Authorization": f"Token {api_token}",
+        "Content-Type": "application/json"
+    }
+    status = ""
+    while True and (status not in ["succeeded", "failed", "canceled"]):
+        time.sleep(0.5) # prevent being rate limited by replicate
+        print_verbose(f"replicate: polling endpoint: {prediction_url}")
+        response = requests.get(prediction_url, headers=headers)
+        if response.status_code == 200:
+            response_data = response.json()
+            status = response_data['status']
+            if "output" in response_data:
+                output_string = "".join(response_data['output'])
+                new_output = output_string[len(previous_output):]
+                print_verbose(f"New chunk: {new_output}")
+                yield {"output": new_output, "status": status}
+                previous_output = output_string
+            status = response_data['status']
+            if status == "failed":
+                replicate_error = response_data.get("error", "")
+                raise ReplicateError(status_code=400, message=f"Error: {replicate_error}")
+        else:
+            # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
+            print_verbose(f"Replicate: Failed to fetch prediction status and output.{response.status_code}{response.text}")
+# Function to extract version ID from model string
+def model_to_version_id(model):
+    if ":" in model:
+        split_model = model.split(":")
+        return split_model[1]
+    return model
+# Main function for prediction completion
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    logging_obj,
+    api_key,
+    encoding,
+    custom_prompt_dict={},
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    # Start a prediction and get the prediction URL
+    version_id = model_to_version_id(model)
+    ## Load Config
+    config = litellm.ReplicateConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > replicate_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    system_prompt = None
+    if optional_params is not None and "supports_system_prompt" in optional_params:
+        supports_sys_prompt = optional_params.pop("supports_system_prompt")
+    else:
+        supports_sys_prompt = False
+    if supports_sys_prompt:
+        for i in range(len(messages)):
+            if messages[i]["role"] == "system":
+                first_sys_message = messages.pop(i)
+                system_prompt = first_sys_message["content"]
+                break
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+                role_dict=model_prompt_details.get("roles", {}),
+                initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
+                final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+                bos_token=model_prompt_details.get("bos_token", ""),
+                eos_token=model_prompt_details.get("eos_token", ""),
+                messages=messages,
+            )
+    else:
+        prompt = prompt_factory(model=model, messages=messages)
+    # If system prompt is supported, and a system prompt is provided, use it
+    if system_prompt is not None:
+        input_data = {
+            "prompt": prompt,
+            "system_prompt": system_prompt
+        }
+    # Otherwise, use the prompt as is
+    else:
+        input_data = {
+            "prompt": prompt,
+            **optional_params
+        }
+    ## COMPLETION CALL
+    ## Replicate Compeltion calls have 2 steps
+    ## Step1: Start Prediction: gets a prediction url
+    ## Step2: Poll prediction url for response
+    ## Step2: is handled with and without streaming
+    model_response["created"] = int(time.time()) # for pricing this must remain right before calling api
+    prediction_url = start_prediction(version_id, input_data, api_key, api_base, logging_obj=logging_obj, print_verbose=print_verbose)
+    print_verbose(prediction_url)
+    # Handle the prediction response (streaming or non-streaming)
+    if "stream" in optional_params and optional_params["stream"] == True:
+        print_verbose("streaming request")
+        return handle_prediction_response_streaming(prediction_url, api_key, print_verbose)
+    else:
+        result, logs = handle_prediction_response(prediction_url, api_key, print_verbose)
+        model_response["ended"] = time.time() # for pricing this must remain right after calling api
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key="",
+                original_response=result,
+                additional_args={"complete_input_dict": input_data,"logs": logs, "api_base": prediction_url, },
+        )
+        print_verbose(f"raw model_response: {result}")
+        if len(result) == 0: # edge case, where result from replicate is empty
+            result = " "
+        ## Building RESPONSE OBJECT
+        if len(result) > 1:
+            model_response["choices"][0]["message"]["content"] = result
+        # Calculate usage
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", "")))
+        model_response["model"] = "replicate/" + model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+# # Example usage:
+# response = completion(
+#     api_key="",
+#     messages=[{"content": "good morning"}],
+#     model="replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
+#     model_response=ModelResponse(),
+#     print_verbose=print,
+#     logging_obj=print, # stub logging_obj
+#     optional_params={"stream": False}
+# )
+# print(response)

litellm/llms/sagemaker.py ADDED Viewed

	@@ -0,0 +1,190 @@

+import os, types
+from enum import Enum
+import json
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+from litellm.utils import ModelResponse, get_secret, Usage
+import sys
+from copy import deepcopy
+import httpx
+class SagemakerError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://us-west-2.console.aws.amazon.com/sagemaker")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class SagemakerConfig():
+    """
+    Reference: https://d-uuwbxj1u4cnu.studio.us-west-2.sagemaker.aws/jupyter/default/lab/workspaces/auto-q/tree/DemoNotebooks/meta-textgeneration-llama-2-7b-SDK_1.ipynb
+    """
+    max_new_tokens: Optional[int]=None
+    top_p: Optional[float]=None
+    temperature: Optional[float]=None
+    return_full_text: Optional[bool]=None
+    def __init__(self,
+                 max_new_tokens: Optional[int]=None,
+                 top_p: Optional[float]=None,
+                 temperature: Optional[float]=None,
+                 return_full_text: Optional[bool]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+"""
+SAGEMAKER AUTH Keys/Vars
+os.environ['AWS_ACCESS_KEY_ID'] = ""
+os.environ['AWS_SECRET_ACCESS_KEY'] = ""
+"""
+# set os.environ['AWS_REGION_NAME'] = <your-region_name>
+def completion(
+    model: str,
+    messages: list,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    logging_obj,
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    import boto3
+    # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
+    aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
+    aws_access_key_id = optional_params.pop("aws_access_key_id", None)
+    aws_region_name = optional_params.pop("aws_region_name", None)
+    if aws_access_key_id != None:
+        # uses auth params passed to completion
+        # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
+        client = boto3.client(
+            service_name="sagemaker-runtime",
+            aws_access_key_id=aws_access_key_id,
+            aws_secret_access_key=aws_secret_access_key,
+            region_name=aws_region_name,
+        )
+    else:
+        # aws_access_key_id is None, assume user is trying to auth using env variables
+        # boto3 automaticaly reads env variables
+        # we need to read region name from env
+        # I assume majority of users use .env for auth
+        region_name = (
+            get_secret("AWS_REGION_NAME") or
+            "us-west-2"  # default to us-west-2 if user not specified
+        )
+        client = boto3.client(
+            service_name="sagemaker-runtime",
+            region_name=region_name,
+        )
+    # pop streaming if it's in the optional params as 'stream' raises an error with sagemaker
+    inference_params = deepcopy(optional_params)
+    inference_params.pop("stream", None)
+    ## Load Config
+    config = litellm.SagemakerConfig.get_config()
+    for k, v in config.items():
+        if k not in inference_params: # completion(top_k=3) > sagemaker_config(top_k=3) <- allows for dynamic variables to be passed in
+            inference_params[k] = v
+    model = model
+    prompt = ""
+    for message in messages:
+        if "role" in message:
+            if message["role"] == "user":
+                prompt += (
+                    f"{message['content']}"
+                )
+            else:
+                prompt += (
+                    f"{message['content']}"
+                )
+        else:
+            prompt += f"{message['content']}"
+    data = json.dumps({
+        "inputs": prompt,
+        "parameters": inference_params
+    }).encode('utf-8')
+    ## LOGGING
+    request_str = f"""
+    response = client.invoke_endpoint(
+        EndpointName={model},
+        ContentType="application/json",
+        Body={data},
+        CustomAttributes="accept_eula=true",
+    )
+    """ # type: ignore
+    logging_obj.pre_call(
+            input=prompt,
+            api_key="",
+            additional_args={"complete_input_dict": data, "request_str": request_str},
+        )
+    ## COMPLETION CALL
+    response = client.invoke_endpoint(
+        EndpointName=model,
+        ContentType="application/json",
+        Body=data,
+        CustomAttributes="accept_eula=true",
+    )
+    response = response["Body"].read().decode("utf8")
+    ## LOGGING
+    logging_obj.post_call(
+            input=prompt,
+            api_key="",
+            original_response=response,
+            additional_args={"complete_input_dict": data},
+        )
+    print_verbose(f"raw model_response: {response}")
+    ## RESPONSE OBJECT
+    completion_response = json.loads(response)
+    try:
+        completion_response_choices = completion_response[0]
+        if "generation" in completion_response_choices:
+            model_response["choices"][0]["message"]["content"] = completion_response_choices["generation"]
+        elif "generated_text" in completion_response_choices:
+            model_response["choices"][0]["message"]["content"] = completion_response_choices["generated_text"]
+    except:
+        raise SagemakerError(message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}", status_code=500)
+    ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
+    prompt_tokens = len(
+        encoding.encode(prompt)
+    )
+    completion_tokens = len(
+        encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+    )
+    model_response["created"] = int(time.time())
+    model_response["model"] = model
+    usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+    model_response.usage = usage
+    return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/together_ai.py ADDED Viewed

	@@ -0,0 +1,198 @@

+import os, types
+import json
+from enum import Enum
+import requests
+import time
+from typing import Callable, Optional
+import litellm
+import httpx
+from litellm.utils import ModelResponse, Usage
+from .prompt_templates.factory import prompt_factory, custom_prompt
+class TogetherAIError(Exception):
+    def __init__(self, status_code, message):
+        self.status_code = status_code
+        self.message = message
+        self.request = httpx.Request(method="POST", url="https://api.together.xyz/inference")
+        self.response = httpx.Response(status_code=status_code, request=self.request)
+        super().__init__(
+            self.message
+        )  # Call the base class constructor with the parameters it needs
+class TogetherAIConfig():
+    """
+    Reference: https://docs.together.ai/reference/inference
+    The class `TogetherAIConfig` provides configuration for the TogetherAI's API interface. Here are the parameters:
+    - `max_tokens` (int32, required): The maximum number of tokens to generate.
+    - `stop` (string, optional): A string sequence that will truncate (stop) the inference text output. For example, "\n\n" will stop generation as soon as the model generates two newlines.
+    - `temperature` (float, optional): A decimal number that determines the degree of randomness in the response. A value of 1 will always yield the same output. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value greater than 1 introduces more randomness in the output.
+    - `top_p` (float, optional): The `top_p` (nucleus) parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.
+    - `top_k` (int32, optional): The `top_k` parameter is used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
+    - `repetition_penalty` (float, optional): A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
+    - `logprobs` (int32, optional): This parameter is not described in the prompt.
+    """
+    max_tokens: Optional[int]=None
+    stop: Optional[str]=None
+    temperature:Optional[int]=None
+    top_p: Optional[float]=None
+    top_k: Optional[int]=None
+    repetition_penalty: Optional[float]=None
+    logprobs: Optional[int]=None
+    def __init__(self,
+                 max_tokens: Optional[int]=None,
+                 stop: Optional[str]=None,
+                 temperature:Optional[int]=None,
+                 top_p: Optional[float]=None,
+                 top_k: Optional[int]=None,
+                 repetition_penalty: Optional[float]=None,
+                 logprobs: Optional[int]=None) -> None:
+        locals_ = locals()
+        for key, value in locals_.items():
+            if key != 'self' and value is not None:
+                setattr(self.__class__, key, value)
+    @classmethod
+    def get_config(cls):
+        return {k: v for k, v in cls.__dict__.items()
+                if not k.startswith('__')
+                and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
+                and v is not None}
+def validate_environment(api_key):
+    if api_key is None:
+        raise ValueError(
+            "Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params"
+        )
+    headers = {
+        "accept": "application/json",
+        "content-type": "application/json",
+        "Authorization": "Bearer " + api_key,
+    }
+    return headers
+def completion(
+    model: str,
+    messages: list,
+    api_base: str,
+    model_response: ModelResponse,
+    print_verbose: Callable,
+    encoding,
+    api_key,
+    logging_obj,
+    custom_prompt_dict={},
+    optional_params=None,
+    litellm_params=None,
+    logger_fn=None,
+):
+    headers = validate_environment(api_key)
+    ## Load Config
+    config = litellm.TogetherAIConfig.get_config()
+    for k, v in config.items():
+        if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
+            optional_params[k] = v
+    print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
+    if model in custom_prompt_dict:
+        # check if the model has a registered custom prompt
+        model_prompt_details = custom_prompt_dict[model]
+        prompt = custom_prompt(
+                role_dict=model_prompt_details.get("roles", {}),
+                initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
+                final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
+                bos_token=model_prompt_details.get("bos_token", ""),
+                eos_token=model_prompt_details.get("eos_token", ""),
+                messages=messages,
+            )
+    else:
+        prompt = prompt_factory(model=model, messages=messages)
+    data = {
+        "model": model,
+        "prompt": prompt,
+        "request_type": "language-model-inference",
+        **optional_params,
+    }
+    ## LOGGING
+    logging_obj.pre_call(
+        input=prompt,
+        api_key=api_key,
+        additional_args={"complete_input_dict": data, "headers": headers, "api_base": api_base},
+    )
+    ## COMPLETION CALL
+    if (
+            "stream_tokens" in optional_params
+            and optional_params["stream_tokens"] == True
+        ):
+        response = requests.post(
+            api_base,
+            headers=headers,
+            data=json.dumps(data),
+            stream=optional_params["stream_tokens"],
+        )
+        return response.iter_lines()
+    else:
+        response = requests.post(
+            api_base,
+            headers=headers,
+            data=json.dumps(data)
+        )
+        ## LOGGING
+        logging_obj.post_call(
+                input=prompt,
+                api_key=api_key,
+                original_response=response.text,
+                additional_args={"complete_input_dict": data},
+            )
+        print_verbose(f"raw model_response: {response.text}")
+        ## RESPONSE OBJECT
+        if response.status_code != 200:
+            raise TogetherAIError(
+                status_code=response.status_code, message=response.text
+            )
+        completion_response = response.json()
+        if "error" in completion_response:
+            raise TogetherAIError(
+                message=json.dumps(completion_response),
+                status_code=response.status_code,
+            )
+        elif "error" in completion_response["output"]:
+            raise TogetherAIError(
+                message=json.dumps(completion_response["output"]), status_code=response.status_code
+            )
+        if len(completion_response["output"]["choices"][0]["text"]) > 0:
+            model_response["choices"][0]["message"]["content"] = completion_response["output"]["choices"][0]["text"]
+        ## CALCULATING USAGE
+        prompt_tokens = len(encoding.encode(prompt))
+        completion_tokens = len(
+            encoding.encode(model_response["choices"][0]["message"].get("content", ""))
+        )
+        if "finish_reason" in completion_response["output"]["choices"][0]:
+            model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
+        model_response["created"] = int(time.time())
+        model_response["model"] = model
+        usage = Usage(
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+            total_tokens=prompt_tokens + completion_tokens
+        )
+        model_response.usage = usage
+        return model_response
+def embedding():
+    # logic for parsing in - calling - parsing out model embedding calls
+    pass

litellm/llms/tokenizers/anthropic_tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff