nonhuman commited on
Commit
395201c
1 Parent(s): 17261b5

Upload 165 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. litellm/__init__.py +408 -0
  2. litellm/_version.py +6 -0
  3. litellm/budget_manager.py +155 -0
  4. litellm/caching.py +275 -0
  5. litellm/cost.json +5 -0
  6. litellm/deprecated_litellm_server/.env.template +43 -0
  7. litellm/deprecated_litellm_server/Dockerfile +10 -0
  8. litellm/deprecated_litellm_server/README.md +3 -0
  9. litellm/deprecated_litellm_server/__init__.py +2 -0
  10. litellm/deprecated_litellm_server/main.py +193 -0
  11. litellm/deprecated_litellm_server/requirements.txt +7 -0
  12. litellm/deprecated_litellm_server/server_utils.py +86 -0
  13. litellm/exceptions.py +166 -0
  14. litellm/integrations/__init__.py +1 -0
  15. litellm/integrations/aispend.py +177 -0
  16. litellm/integrations/berrispend.py +184 -0
  17. litellm/integrations/custom_logger.py +83 -0
  18. litellm/integrations/helicone.py +114 -0
  19. litellm/integrations/langfuse.py +75 -0
  20. litellm/integrations/langsmith.py +76 -0
  21. litellm/integrations/litedebugger.py +231 -0
  22. litellm/integrations/llmonitor.py +127 -0
  23. litellm/integrations/prompt_layer.py +72 -0
  24. litellm/integrations/supabase.py +116 -0
  25. litellm/integrations/traceloop.py +78 -0
  26. litellm/integrations/weights_biases.py +219 -0
  27. litellm/llms/__init__.py +1 -0
  28. litellm/llms/ai21.py +194 -0
  29. litellm/llms/aleph_alpha.py +278 -0
  30. litellm/llms/anthropic.py +187 -0
  31. litellm/llms/azure.py +414 -0
  32. litellm/llms/base.py +47 -0
  33. litellm/llms/baseten.py +149 -0
  34. litellm/llms/bedrock.py +627 -0
  35. litellm/llms/cohere.py +273 -0
  36. litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt +2523 -0
  37. litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt +0 -0
  38. litellm/llms/huggingface_restapi.py +604 -0
  39. litellm/llms/maritalk.py +164 -0
  40. litellm/llms/nlp_cloud.py +212 -0
  41. litellm/llms/ollama.py +231 -0
  42. litellm/llms/oobabooga.py +124 -0
  43. litellm/llms/openai.py +590 -0
  44. litellm/llms/palm.py +177 -0
  45. litellm/llms/petals.py +189 -0
  46. litellm/llms/prompt_templates/factory.py +360 -0
  47. litellm/llms/replicate.py +302 -0
  48. litellm/llms/sagemaker.py +190 -0
  49. litellm/llms/together_ai.py +198 -0
  50. litellm/llms/tokenizers/anthropic_tokenizer.json +0 -0
litellm/__init__.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### INIT VARIABLES ###
2
+ import threading, requests
3
+ from typing import Callable, List, Optional, Dict, Union, Any
4
+ from litellm.caching import Cache
5
+ import httpx
6
+
7
+ input_callback: List[Union[str, Callable]] = []
8
+ success_callback: List[Union[str, Callable]] = []
9
+ failure_callback: List[Union[str, Callable]] = []
10
+ callbacks: List[Callable] = []
11
+ _async_success_callback: List[Callable] = [] # internal variable - async custom callbacks are routed here.
12
+ pre_call_rules: List[Callable] = []
13
+ post_call_rules: List[Callable] = []
14
+ set_verbose = False
15
+ email: Optional[
16
+ str
17
+ ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
18
+ token: Optional[
19
+ str
20
+ ] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
21
+ telemetry = True
22
+ max_tokens = 256 # OpenAI Defaults
23
+ drop_params = False
24
+ retry = True
25
+ api_key: Optional[str] = None
26
+ openai_key: Optional[str] = None
27
+ azure_key: Optional[str] = None
28
+ anthropic_key: Optional[str] = None
29
+ replicate_key: Optional[str] = None
30
+ cohere_key: Optional[str] = None
31
+ maritalk_key: Optional[str] = None
32
+ ai21_key: Optional[str] = None
33
+ openrouter_key: Optional[str] = None
34
+ huggingface_key: Optional[str] = None
35
+ vertex_project: Optional[str] = None
36
+ vertex_location: Optional[str] = None
37
+ togetherai_api_key: Optional[str] = None
38
+ baseten_key: Optional[str] = None
39
+ aleph_alpha_key: Optional[str] = None
40
+ nlp_cloud_key: Optional[str] = None
41
+ use_client: bool = False
42
+ logging: bool = True
43
+ caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
44
+ caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
45
+ cache: Optional[Cache] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
46
+ model_alias_map: Dict[str, str] = {}
47
+ max_budget: float = 0.0 # set the max budget across all providers
48
+ _current_cost = 0 # private variable, used if max budget is set
49
+ error_logs: Dict = {}
50
+ add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
51
+ client_session: Optional[httpx.Client] = None
52
+ aclient_session: Optional[httpx.AsyncClient] = None
53
+ model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
54
+ model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
55
+ suppress_debug_info = False
56
+ #### RELIABILITY ####
57
+ request_timeout: Optional[float] = 6000
58
+ num_retries: Optional[int] = None
59
+ fallbacks: Optional[List] = None
60
+ context_window_fallbacks: Optional[List] = None
61
+ allowed_fails: int = 0
62
+ ####### SECRET MANAGERS #####################
63
+ secret_manager_client: Optional[Any] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
64
+ #############################################
65
+
66
+ def get_model_cost_map(url: str):
67
+ try:
68
+ with requests.get(url, timeout=5) as response: # set a 5 second timeout for the get request
69
+ response.raise_for_status() # Raise an exception if the request is unsuccessful
70
+ content = response.json()
71
+ return content
72
+ except Exception as e:
73
+ import importlib.resources
74
+ import json
75
+ with importlib.resources.open_text("litellm", "model_prices_and_context_window_backup.json") as f:
76
+ content = json.load(f)
77
+ return content
78
+ model_cost = get_model_cost_map(url=model_cost_map_url)
79
+ custom_prompt_dict:Dict[str, dict] = {}
80
+ ####### THREAD-SPECIFIC DATA ###################
81
+ class MyLocal(threading.local):
82
+ def __init__(self):
83
+ self.user = "Hello World"
84
+
85
+
86
+ _thread_context = MyLocal()
87
+
88
+
89
+ def identify(event_details):
90
+ # Store user in thread local data
91
+ if "user" in event_details:
92
+ _thread_context.user = event_details["user"]
93
+
94
+
95
+ ####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
96
+ api_base = None
97
+ headers = None
98
+ api_version = None
99
+ organization = None
100
+ config_path = None
101
+ ####### COMPLETION MODELS ###################
102
+ open_ai_chat_completion_models: List = []
103
+ open_ai_text_completion_models: List = []
104
+ cohere_models: List = []
105
+ anthropic_models: List = []
106
+ openrouter_models: List = []
107
+ vertex_chat_models: List = []
108
+ vertex_code_chat_models: List = []
109
+ vertex_text_models: List = []
110
+ vertex_code_text_models: List = []
111
+ ai21_models: List = []
112
+ nlp_cloud_models: List = []
113
+ aleph_alpha_models: List = []
114
+ bedrock_models: List = []
115
+ deepinfra_models: List = []
116
+ perplexity_models: List = []
117
+ for key, value in model_cost.items():
118
+ if value.get('litellm_provider') == 'openai':
119
+ open_ai_chat_completion_models.append(key)
120
+ elif value.get('litellm_provider') == 'text-completion-openai':
121
+ open_ai_text_completion_models.append(key)
122
+ elif value.get('litellm_provider') == 'cohere':
123
+ cohere_models.append(key)
124
+ elif value.get('litellm_provider') == 'anthropic':
125
+ anthropic_models.append(key)
126
+ elif value.get('litellm_provider') == 'openrouter':
127
+ split_string = key.split('/', 1)
128
+ openrouter_models.append(split_string[1])
129
+ elif value.get('litellm_provider') == 'vertex_ai-text-models':
130
+ vertex_text_models.append(key)
131
+ elif value.get('litellm_provider') == 'vertex_ai-code-text-models':
132
+ vertex_code_text_models.append(key)
133
+ elif value.get('litellm_provider') == 'vertex_ai-chat-models':
134
+ vertex_chat_models.append(key)
135
+ elif value.get('litellm_provider') == 'vertex_ai-code-chat-models':
136
+ vertex_code_chat_models.append(key)
137
+ elif value.get('litellm_provider') == 'ai21':
138
+ ai21_models.append(key)
139
+ elif value.get('litellm_provider') == 'nlp_cloud':
140
+ nlp_cloud_models.append(key)
141
+ elif value.get('litellm_provider') == 'aleph_alpha':
142
+ aleph_alpha_models.append(key)
143
+ elif value.get('litellm_provider') == 'bedrock':
144
+ bedrock_models.append(key)
145
+ elif value.get('litellm_provider') == 'deepinfra':
146
+ deepinfra_models.append(key)
147
+ elif value.get('litellm_provider') == 'perplexity':
148
+ perplexity_models.append(key)
149
+
150
+ # known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
151
+ openai_compatible_endpoints: List = [
152
+ "api.perplexity.ai",
153
+ "api.endpoints.anyscale.com/v1",
154
+ "api.deepinfra.com/v1/openai"
155
+ ]
156
+
157
+
158
+ # well supported replicate llms
159
+ replicate_models: List = [
160
+ # llama replicate supported LLMs
161
+ "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
162
+ "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
163
+ "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
164
+ # Vicuna
165
+ "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
166
+ "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
167
+ # Flan T-5
168
+ "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
169
+ # Others
170
+ "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
171
+ "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
172
+ ]
173
+
174
+ huggingface_models: List = [
175
+ "meta-llama/Llama-2-7b-hf",
176
+ "meta-llama/Llama-2-7b-chat-hf",
177
+ "meta-llama/Llama-2-13b-hf",
178
+ "meta-llama/Llama-2-13b-chat-hf",
179
+ "meta-llama/Llama-2-70b-hf",
180
+ "meta-llama/Llama-2-70b-chat-hf",
181
+ "meta-llama/Llama-2-7b",
182
+ "meta-llama/Llama-2-7b-chat",
183
+ "meta-llama/Llama-2-13b",
184
+ "meta-llama/Llama-2-13b-chat",
185
+ "meta-llama/Llama-2-70b",
186
+ "meta-llama/Llama-2-70b-chat",
187
+ ] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
188
+
189
+ together_ai_models: List = [
190
+ # llama llms - chat
191
+ "togethercomputer/llama-2-70b-chat",
192
+
193
+ # llama llms - language / instruct
194
+ "togethercomputer/llama-2-70b",
195
+ "togethercomputer/LLaMA-2-7B-32K",
196
+ "togethercomputer/Llama-2-7B-32K-Instruct",
197
+ "togethercomputer/llama-2-7b",
198
+
199
+ # falcon llms
200
+ "togethercomputer/falcon-40b-instruct",
201
+ "togethercomputer/falcon-7b-instruct",
202
+
203
+ # alpaca
204
+ "togethercomputer/alpaca-7b",
205
+
206
+ # chat llms
207
+ "HuggingFaceH4/starchat-alpha",
208
+
209
+ # code llms
210
+ "togethercomputer/CodeLlama-34b",
211
+ "togethercomputer/CodeLlama-34b-Instruct",
212
+ "togethercomputer/CodeLlama-34b-Python",
213
+ "defog/sqlcoder",
214
+ "NumbersStation/nsql-llama-2-7B",
215
+ "WizardLM/WizardCoder-15B-V1.0",
216
+ "WizardLM/WizardCoder-Python-34B-V1.0",
217
+
218
+ # language llms
219
+ "NousResearch/Nous-Hermes-Llama2-13b",
220
+ "Austism/chronos-hermes-13b",
221
+ "upstage/SOLAR-0-70b-16bit",
222
+ "WizardLM/WizardLM-70B-V1.0",
223
+
224
+ ] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)
225
+
226
+
227
+ baseten_models: List = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML
228
+
229
+ petals_models = [
230
+ "petals-team/StableBeluga2",
231
+ ]
232
+
233
+ ollama_models = [
234
+ "llama2"
235
+ ]
236
+
237
+ maritalk_models = [
238
+ "maritalk"
239
+ ]
240
+
241
+ model_list = (
242
+ open_ai_chat_completion_models
243
+ + open_ai_text_completion_models
244
+ + cohere_models
245
+ + anthropic_models
246
+ + replicate_models
247
+ + openrouter_models
248
+ + huggingface_models
249
+ + vertex_chat_models
250
+ + vertex_text_models
251
+ + ai21_models
252
+ + together_ai_models
253
+ + baseten_models
254
+ + aleph_alpha_models
255
+ + nlp_cloud_models
256
+ + ollama_models
257
+ + bedrock_models
258
+ + deepinfra_models
259
+ + perplexity_models
260
+ + maritalk_models
261
+ )
262
+
263
+ provider_list: List = [
264
+ "openai",
265
+ "custom_openai",
266
+ "cohere",
267
+ "anthropic",
268
+ "replicate",
269
+ "huggingface",
270
+ "together_ai",
271
+ "openrouter",
272
+ "vertex_ai",
273
+ "palm",
274
+ "ai21",
275
+ "baseten",
276
+ "azure",
277
+ "sagemaker",
278
+ "bedrock",
279
+ "vllm",
280
+ "nlp_cloud",
281
+ "petals",
282
+ "oobabooga",
283
+ "ollama",
284
+ "deepinfra",
285
+ "perplexity",
286
+ "anyscale",
287
+ "maritalk",
288
+ "custom", # custom apis
289
+ ]
290
+
291
+ models_by_provider: dict = {
292
+ "openai": open_ai_chat_completion_models + open_ai_text_completion_models,
293
+ "cohere": cohere_models,
294
+ "anthropic": anthropic_models,
295
+ "replicate": replicate_models,
296
+ "huggingface": huggingface_models,
297
+ "together_ai": together_ai_models,
298
+ "baseten": baseten_models,
299
+ "openrouter": openrouter_models,
300
+ "vertex_ai": vertex_chat_models + vertex_text_models,
301
+ "ai21": ai21_models,
302
+ "bedrock": bedrock_models,
303
+ "petals": petals_models,
304
+ "ollama": ollama_models,
305
+ "deepinfra": deepinfra_models,
306
+ "perplexity": perplexity_models,
307
+ "maritalk": maritalk_models
308
+ }
309
+
310
+ # mapping for those models which have larger equivalents
311
+ longer_context_model_fallback_dict: dict = {
312
+ # openai chat completion models
313
+ "gpt-3.5-turbo": "gpt-3.5-turbo-16k",
314
+ "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
315
+ "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
316
+ "gpt-4": "gpt-4-32k",
317
+ "gpt-4-0314": "gpt-4-32k-0314",
318
+ "gpt-4-0613": "gpt-4-32k-0613",
319
+ # anthropic
320
+ "claude-instant-1": "claude-2",
321
+ "claude-instant-1.2": "claude-2",
322
+ # vertexai
323
+ "chat-bison": "chat-bison-32k",
324
+ "chat-bison@001": "chat-bison-32k",
325
+ "codechat-bison": "codechat-bison-32k",
326
+ "codechat-bison@001": "codechat-bison-32k",
327
+ # openrouter
328
+ "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
329
+ "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
330
+ }
331
+
332
+ ####### EMBEDDING MODELS ###################
333
+ open_ai_embedding_models: List = ["text-embedding-ada-002"]
334
+ cohere_embedding_models: List = [
335
+ "embed-english-v3.0",
336
+ "embed-english-light-v3.0",
337
+ "embed-multilingual-v3.0",
338
+ "embed-english-v2.0",
339
+ "embed-english-light-v2.0",
340
+ "embed-multilingual-v2.0",
341
+ ]
342
+ bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]
343
+
344
+ all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models
345
+
346
+ from .timeout import timeout
347
+ from .utils import (
348
+ client,
349
+ exception_type,
350
+ get_optional_params,
351
+ modify_integration,
352
+ token_counter,
353
+ cost_per_token,
354
+ completion_cost,
355
+ get_litellm_params,
356
+ Logging,
357
+ acreate,
358
+ get_model_list,
359
+ get_max_tokens,
360
+ get_model_info,
361
+ register_prompt_template,
362
+ validate_environment,
363
+ check_valid_key,
364
+ get_llm_provider,
365
+ completion_with_config,
366
+ register_model,
367
+ encode,
368
+ decode,
369
+ _calculate_retry_after,
370
+ _should_retry,
371
+ get_secret
372
+ )
373
+ from .llms.huggingface_restapi import HuggingfaceConfig
374
+ from .llms.anthropic import AnthropicConfig
375
+ from .llms.replicate import ReplicateConfig
376
+ from .llms.cohere import CohereConfig
377
+ from .llms.ai21 import AI21Config
378
+ from .llms.together_ai import TogetherAIConfig
379
+ from .llms.palm import PalmConfig
380
+ from .llms.nlp_cloud import NLPCloudConfig
381
+ from .llms.aleph_alpha import AlephAlphaConfig
382
+ from .llms.petals import PetalsConfig
383
+ from .llms.vertex_ai import VertexAIConfig
384
+ from .llms.sagemaker import SagemakerConfig
385
+ from .llms.ollama import OllamaConfig
386
+ from .llms.maritalk import MaritTalkConfig
387
+ from .llms.bedrock import AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig
388
+ from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
389
+ from .llms.azure import AzureOpenAIConfig
390
+ from .main import * # type: ignore
391
+ from .integrations import *
392
+ from .exceptions import (
393
+ AuthenticationError,
394
+ InvalidRequestError,
395
+ BadRequestError,
396
+ RateLimitError,
397
+ ServiceUnavailableError,
398
+ OpenAIError,
399
+ ContextWindowExceededError,
400
+ BudgetExceededError,
401
+ APIError,
402
+ Timeout,
403
+ APIConnectionError,
404
+ APIResponseValidationError
405
+ )
406
+ from .budget_manager import BudgetManager
407
+ from .proxy.proxy_cli import run_server
408
+ from .router import Router
litellm/_version.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import importlib_metadata
2
+
3
+ try:
4
+ version = importlib_metadata.version("litellm")
5
+ except:
6
+ pass
litellm/budget_manager.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, time
2
+ import litellm
3
+ from litellm.utils import ModelResponse
4
+ import requests, threading
5
+ from typing import Optional, Union, Literal
6
+
7
+ class BudgetManager:
8
+ def __init__(self, project_name: str, client_type: str = "local", api_base: Optional[str] = None):
9
+ self.client_type = client_type
10
+ self.project_name = project_name
11
+ self.api_base = api_base or "https://api.litellm.ai"
12
+ ## load the data or init the initial dictionaries
13
+ self.load_data()
14
+
15
+ def print_verbose(self, print_statement):
16
+ if litellm.set_verbose:
17
+ import logging
18
+ logging.info(print_statement)
19
+
20
+ def load_data(self):
21
+ if self.client_type == "local":
22
+ # Check if user dict file exists
23
+ if os.path.isfile("user_cost.json"):
24
+ # Load the user dict
25
+ with open("user_cost.json", 'r') as json_file:
26
+ self.user_dict = json.load(json_file)
27
+ else:
28
+ self.print_verbose("User Dictionary not found!")
29
+ self.user_dict = {}
30
+ self.print_verbose(f"user dict from local: {self.user_dict}")
31
+ elif self.client_type == "hosted":
32
+ # Load the user_dict from hosted db
33
+ url = self.api_base + "/get_budget"
34
+ headers = {'Content-Type': 'application/json'}
35
+ data = {
36
+ 'project_name' : self.project_name
37
+ }
38
+ response = requests.post(url, headers=headers, json=data)
39
+ response = response.json()
40
+ if response["status"] == "error":
41
+ self.user_dict = {} # assume this means the user dict hasn't been stored yet
42
+ else:
43
+ self.user_dict = response["data"]
44
+
45
+ def create_budget(self, total_budget: float, user: str, duration: Optional[Literal["daily", "weekly", "monthly", "yearly"]] = None, created_at: float = time.time()):
46
+ self.user_dict[user] = {"total_budget": total_budget}
47
+ if duration is None:
48
+ return self.user_dict[user]
49
+
50
+ if duration == 'daily':
51
+ duration_in_days = 1
52
+ elif duration == 'weekly':
53
+ duration_in_days = 7
54
+ elif duration == 'monthly':
55
+ duration_in_days = 28
56
+ elif duration == 'yearly':
57
+ duration_in_days = 365
58
+ else:
59
+ raise ValueError("""duration needs to be one of ["daily", "weekly", "monthly", "yearly"]""")
60
+ self.user_dict[user] = {"total_budget": total_budget, "duration": duration_in_days, "created_at": created_at, "last_updated_at": created_at}
61
+ self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
62
+ return self.user_dict[user]
63
+
64
+ def projected_cost(self, model: str, messages: list, user: str):
65
+ text = "".join(message["content"] for message in messages)
66
+ prompt_tokens = litellm.token_counter(model=model, text=text)
67
+ prompt_cost, _ = litellm.cost_per_token(model=model, prompt_tokens=prompt_tokens, completion_tokens=0)
68
+ current_cost = self.user_dict[user].get("current_cost", 0)
69
+ projected_cost = prompt_cost + current_cost
70
+ return projected_cost
71
+
72
+ def get_total_budget(self, user: str):
73
+ return self.user_dict[user]["total_budget"]
74
+
75
+ def update_cost(self, user: str, completion_obj: Optional[ModelResponse] = None, model: Optional[str] = None, input_text: Optional[str] = None, output_text: Optional[str] = None):
76
+ if model and input_text and output_text:
77
+ prompt_tokens = litellm.token_counter(model=model, messages=[{"role": "user", "content": input_text}])
78
+ completion_tokens = litellm.token_counter(model=model, messages=[{"role": "user", "content": output_text}])
79
+ prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar = litellm.cost_per_token(model=model, prompt_tokens=prompt_tokens, completion_tokens=completion_tokens)
80
+ cost = prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
81
+ elif completion_obj:
82
+ cost = litellm.completion_cost(completion_response=completion_obj)
83
+ model = completion_obj['model'] # if this throws an error try, model = completion_obj['model']
84
+ else:
85
+ raise ValueError("Either a chat completion object or the text response needs to be passed in. Learn more - https://docs.litellm.ai/docs/budget_manager")
86
+
87
+ self.user_dict[user]["current_cost"] = cost + self.user_dict[user].get("current_cost", 0)
88
+ if "model_cost" in self.user_dict[user]:
89
+ self.user_dict[user]["model_cost"][model] = cost + self.user_dict[user]["model_cost"].get(model, 0)
90
+ else:
91
+ self.user_dict[user]["model_cost"] = {model: cost}
92
+
93
+ self._save_data_thread() # [Non-Blocking] Update persistent storage without blocking execution
94
+ return {"user": self.user_dict[user]}
95
+
96
+
97
+ def get_current_cost(self, user):
98
+ return self.user_dict[user].get("current_cost", 0)
99
+
100
+ def get_model_cost(self, user):
101
+ return self.user_dict[user].get("model_cost", 0)
102
+
103
+ def is_valid_user(self, user: str) -> bool:
104
+ return user in self.user_dict
105
+
106
+ def get_users(self):
107
+ return list(self.user_dict.keys())
108
+
109
+ def reset_cost(self, user):
110
+ self.user_dict[user]["current_cost"] = 0
111
+ self.user_dict[user]["model_cost"] = {}
112
+ return {"user": self.user_dict[user]}
113
+
114
+ def reset_on_duration(self, user: str):
115
+ # Get current and creation time
116
+ last_updated_at = self.user_dict[user]["last_updated_at"]
117
+ current_time = time.time()
118
+
119
+ # Convert duration from days to seconds
120
+ duration_in_seconds = self.user_dict[user]["duration"] * 24 * 60 * 60
121
+
122
+ # Check if duration has elapsed
123
+ if current_time - last_updated_at >= duration_in_seconds:
124
+ # Reset cost if duration has elapsed and update the creation time
125
+ self.reset_cost(user)
126
+ self.user_dict[user]["last_updated_at"] = current_time
127
+ self._save_data_thread() # Save the data
128
+
129
+ def update_budget_all_users(self):
130
+ for user in self.get_users():
131
+ if "duration" in self.user_dict[user]:
132
+ self.reset_on_duration(user)
133
+
134
+ def _save_data_thread(self):
135
+ thread = threading.Thread(target=self.save_data) # [Non-Blocking]: saves data without blocking execution
136
+ thread.start()
137
+
138
+ def save_data(self):
139
+ if self.client_type == "local":
140
+ import json
141
+
142
+ # save the user dict
143
+ with open("user_cost.json", 'w') as json_file:
144
+ json.dump(self.user_dict, json_file, indent=4) # Indent for pretty formatting
145
+ return {"status": "success"}
146
+ elif self.client_type == "hosted":
147
+ url = self.api_base + "/set_budget"
148
+ headers = {'Content-Type': 'application/json'}
149
+ data = {
150
+ 'project_name' : self.project_name,
151
+ "user_dict": self.user_dict
152
+ }
153
+ response = requests.post(url, headers=headers, json=data)
154
+ response = response.json()
155
+ return response
litellm/caching.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +-----------------------------------------------+
2
+ # | |
3
+ # | Give Feedback / Get Help |
4
+ # | https://github.com/BerriAI/litellm/issues/new |
5
+ # | |
6
+ # +-----------------------------------------------+
7
+ #
8
+ # Thank you users! We ❤️ you! - Krrish & Ishaan
9
+
10
+ import litellm
11
+ import time, logging
12
+ import json, traceback, ast
13
+ from typing import Optional
14
+
15
+ def get_prompt(*args, **kwargs):
16
+ # make this safe checks, it should not throw any exceptions
17
+ if len(args) > 1:
18
+ messages = args[1]
19
+ prompt = " ".join(message["content"] for message in messages)
20
+ return prompt
21
+ if "messages" in kwargs:
22
+ messages = kwargs["messages"]
23
+ prompt = " ".join(message["content"] for message in messages)
24
+ return prompt
25
+ return None
26
+
27
+ def print_verbose(print_statement):
28
+ if litellm.set_verbose:
29
+ print(print_statement) # noqa
30
+
31
+ class BaseCache:
32
+ def set_cache(self, key, value, **kwargs):
33
+ raise NotImplementedError
34
+
35
+ def get_cache(self, key, **kwargs):
36
+ raise NotImplementedError
37
+
38
+
39
+ class InMemoryCache(BaseCache):
40
+ def __init__(self):
41
+ # if users don't provider one, use the default litellm cache
42
+ self.cache_dict = {}
43
+ self.ttl_dict = {}
44
+
45
+ def set_cache(self, key, value, **kwargs):
46
+ self.cache_dict[key] = value
47
+ if "ttl" in kwargs:
48
+ self.ttl_dict[key] = time.time() + kwargs["ttl"]
49
+
50
+ def get_cache(self, key, **kwargs):
51
+ if key in self.cache_dict:
52
+ if key in self.ttl_dict:
53
+ if time.time() > self.ttl_dict[key]:
54
+ self.cache_dict.pop(key, None)
55
+ return None
56
+ original_cached_response = self.cache_dict[key]
57
+ try:
58
+ cached_response = json.loads(original_cached_response)
59
+ except:
60
+ cached_response = original_cached_response
61
+ if isinstance(cached_response, dict):
62
+ cached_response['cache'] = True # set cache-hit flag to True
63
+ return cached_response
64
+ return None
65
+
66
+ def flush_cache(self):
67
+ self.cache_dict.clear()
68
+ self.ttl_dict.clear()
69
+
70
+
71
+ class RedisCache(BaseCache):
72
+ def __init__(self, host, port, password, **kwargs):
73
+ import redis
74
+ # if users don't provider one, use the default litellm cache
75
+ self.redis_client = redis.Redis(host=host, port=port, password=password, **kwargs)
76
+
77
+ def set_cache(self, key, value, **kwargs):
78
+ ttl = kwargs.get("ttl", None)
79
+ try:
80
+ self.redis_client.set(name=key, value=str(value), ex=ttl)
81
+ except Exception as e:
82
+ # NON blocking - notify users Redis is throwing an exception
83
+ logging.debug("LiteLLM Caching: set() - Got exception from REDIS : ", e)
84
+
85
+ def get_cache(self, key, **kwargs):
86
+ try:
87
+ # TODO convert this to a ModelResponse object
88
+ cached_response = self.redis_client.get(key)
89
+ if cached_response != None:
90
+ # cached_response is in `b{} convert it to ModelResponse
91
+ cached_response = cached_response.decode("utf-8") # Convert bytes to string
92
+ try:
93
+ cached_response = json.loads(cached_response) # Convert string to dictionary
94
+ except:
95
+ cached_response = ast.literal_eval(cached_response)
96
+ if isinstance(cached_response, dict):
97
+ cached_response['cache'] = True # set cache-hit flag to True
98
+ return cached_response
99
+ except Exception as e:
100
+ # NON blocking - notify users Redis is throwing an exception
101
+ traceback.print_exc()
102
+ logging.debug("LiteLLM Caching: get() - Got exception from REDIS: ", e)
103
+
104
+ def flush_cache(self):
105
+ self.redis_client.flushall()
106
+
107
+ class DualCache(BaseCache):
108
+ """
109
+ This updates both Redis and an in-memory cache simultaneously.
110
+ When data is updated or inserted, it is written to both the in-memory cache + Redis.
111
+ This ensures that even if Redis hasn't been updated yet, the in-memory cache reflects the most recent data.
112
+ """
113
+ def __init__(self, in_memory_cache: Optional[InMemoryCache] =None, redis_cache: Optional[RedisCache] =None) -> None:
114
+ super().__init__()
115
+ # If in_memory_cache is not provided, use the default InMemoryCache
116
+ self.in_memory_cache = in_memory_cache or InMemoryCache()
117
+ # If redis_cache is not provided, use the default RedisCache
118
+ self.redis_cache = redis_cache
119
+
120
+ def set_cache(self, key, value, **kwargs):
121
+ # Update both Redis and in-memory cache
122
+ try:
123
+ print_verbose(f"set cache: key: {key}; value: {value}")
124
+ if self.in_memory_cache is not None:
125
+ self.in_memory_cache.set_cache(key, value, **kwargs)
126
+
127
+ if self.redis_cache is not None:
128
+ self.redis_cache.set_cache(key, value, **kwargs)
129
+ except Exception as e:
130
+ print_verbose(e)
131
+
132
+ def get_cache(self, key, **kwargs):
133
+ # Try to fetch from in-memory cache first
134
+ try:
135
+ print_verbose(f"get cache: cache key: {key}")
136
+ result = None
137
+ if self.in_memory_cache is not None:
138
+ in_memory_result = self.in_memory_cache.get_cache(key, **kwargs)
139
+
140
+ if in_memory_result is not None:
141
+ result = in_memory_result
142
+
143
+ if self.redis_cache is not None:
144
+ # If not found in in-memory cache, try fetching from Redis
145
+ redis_result = self.redis_cache.get_cache(key, **kwargs)
146
+
147
+ if redis_result is not None:
148
+ # Update in-memory cache with the value from Redis
149
+ self.in_memory_cache.set_cache(key, redis_result, **kwargs)
150
+
151
+ result = redis_result
152
+
153
+ print_verbose(f"get cache: cache result: {result}")
154
+ return result
155
+ except Exception as e:
156
+ traceback.print_exc()
157
+
158
+ def flush_cache(self):
159
+ if self.in_memory_cache is not None:
160
+ self.in_memory_cache.flush_cache()
161
+ if self.redis_cache is not None:
162
+ self.redis_cache.flush_cache()
163
+
164
+ #### LiteLLM.Completion Cache ####
165
+ class Cache:
166
+ def __init__(
167
+ self,
168
+ type="local",
169
+ host=None,
170
+ port=None,
171
+ password=None,
172
+ **kwargs
173
+ ):
174
+ """
175
+ Initializes the cache based on the given type.
176
+
177
+ Args:
178
+ type (str, optional): The type of cache to initialize. Defaults to "local".
179
+ host (str, optional): The host address for the Redis cache. Required if type is "redis".
180
+ port (int, optional): The port number for the Redis cache. Required if type is "redis".
181
+ password (str, optional): The password for the Redis cache. Required if type is "redis".
182
+ **kwargs: Additional keyword arguments for redis.Redis() cache
183
+
184
+ Raises:
185
+ ValueError: If an invalid cache type is provided.
186
+
187
+ Returns:
188
+ None
189
+ """
190
+ if type == "redis":
191
+ self.cache = RedisCache(host, port, password, **kwargs)
192
+ if type == "local":
193
+ self.cache = InMemoryCache()
194
+ if "cache" not in litellm.input_callback:
195
+ litellm.input_callback.append("cache")
196
+ if "cache" not in litellm.success_callback:
197
+ litellm.success_callback.append("cache")
198
+
199
+ def get_cache_key(self, *args, **kwargs):
200
+ """
201
+ Get the cache key for the given arguments.
202
+
203
+ Args:
204
+ *args: args to litellm.completion() or embedding()
205
+ **kwargs: kwargs to litellm.completion() or embedding()
206
+
207
+ Returns:
208
+ str: The cache key generated from the arguments, or None if no cache key could be generated.
209
+ """
210
+ cache_key =""
211
+ for param in kwargs:
212
+ # ignore litellm params here
213
+ if param in set(["model", "messages", "temperature", "top_p", "n", "stop", "max_tokens", "presence_penalty", "frequency_penalty", "logit_bias", "user", "response_format", "seed", "tools", "tool_choice"]):
214
+ # check if param == model and model_group is passed in, then override model with model_group
215
+ if param == "model" and kwargs.get("metadata", None) is not None and kwargs["metadata"].get("model_group", None) is not None:
216
+ param_value = kwargs["metadata"].get("model_group", None) # for litellm.Router use model_group for caching over `model`
217
+ else:
218
+ param_value = kwargs[param]
219
+ cache_key+= f"{str(param)}: {str(param_value)}"
220
+ return cache_key
221
+
222
+ def generate_streaming_content(self, content):
223
+ chunk_size = 5 # Adjust the chunk size as needed
224
+ for i in range(0, len(content), chunk_size):
225
+ yield {'choices': [{'delta': {'role': 'assistant', 'content': content[i:i + chunk_size]}}]}
226
+ time.sleep(0.02)
227
+
228
+ def get_cache(self, *args, **kwargs):
229
+ """
230
+ Retrieves the cached result for the given arguments.
231
+
232
+ Args:
233
+ *args: args to litellm.completion() or embedding()
234
+ **kwargs: kwargs to litellm.completion() or embedding()
235
+
236
+ Returns:
237
+ The cached result if it exists, otherwise None.
238
+ """
239
+ try: # never block execution
240
+ if "cache_key" in kwargs:
241
+ cache_key = kwargs["cache_key"]
242
+ else:
243
+ cache_key = self.get_cache_key(*args, **kwargs)
244
+ if cache_key is not None:
245
+ cached_result = self.cache.get_cache(cache_key)
246
+ if cached_result != None and 'stream' in kwargs and kwargs['stream'] == True:
247
+ # if streaming is true and we got a cache hit, return a generator
248
+ return self.generate_streaming_content(cached_result["choices"][0]['message']['content'])
249
+ return cached_result
250
+ except Exception as e:
251
+ logging.debug(f"An exception occurred: {traceback.format_exc()}")
252
+ return None
253
+
254
+ def add_cache(self, result, *args, **kwargs):
255
+ """
256
+ Adds a result to the cache.
257
+
258
+ Args:
259
+ *args: args to litellm.completion() or embedding()
260
+ **kwargs: kwargs to litellm.completion() or embedding()
261
+
262
+ Returns:
263
+ None
264
+ """
265
+ try:
266
+ if "cache_key" in kwargs:
267
+ cache_key = kwargs["cache_key"]
268
+ else:
269
+ cache_key = self.get_cache_key(*args, **kwargs)
270
+ if cache_key is not None:
271
+ if isinstance(result, litellm.ModelResponse):
272
+ result = result.model_dump_json()
273
+ self.cache.set_cache(cache_key, result, **kwargs)
274
+ except Exception as e:
275
+ pass
litellm/cost.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "gpt-3.5-turbo-0613": 0.00015000000000000001,
3
+ "claude-2": 0.00016454,
4
+ "gpt-4-0613": 0.015408
5
+ }
litellm/deprecated_litellm_server/.env.template ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # set AUTH STRATEGY FOR LLM APIs - Defaults to using Environment Variables
2
+ # AUTH_STRATEGY = "ENV" # ENV or DYNAMIC, ENV always reads from environment variables, DYNAMIC reads request headers to set LLM api keys
3
+
4
+ # OPENAI_API_KEY = ""
5
+
6
+ # HUGGINGFACE_API_KEY=""
7
+
8
+ # TOGETHERAI_API_KEY=""
9
+
10
+ # REPLICATE_API_KEY=""
11
+
12
+ # ## bedrock / sagemaker
13
+ # AWS_ACCESS_KEY_ID = ""
14
+ # AWS_SECRET_ACCESS_KEY = ""
15
+
16
+ # AZURE_API_KEY = ""
17
+ # AZURE_API_BASE = ""
18
+ # AZURE_API_VERSION = ""
19
+
20
+ # ANTHROPIC_API_KEY = ""
21
+
22
+ # COHERE_API_KEY = ""
23
+
24
+ # ## CONFIG FILE ##
25
+ # # CONFIG_FILE_PATH = "" # uncomment to point to config file
26
+
27
+ # ## LOGGING ##
28
+
29
+ # SET_VERBOSE = "False" # set to 'True' to see detailed input/output logs
30
+
31
+ # ### LANGFUSE
32
+ # LANGFUSE_PUBLIC_KEY = ""
33
+ # LANGFUSE_SECRET_KEY = ""
34
+ # # Optional, defaults to https://cloud.langfuse.com
35
+ # LANGFUSE_HOST = "" # optional
36
+
37
+
38
+ # ## CACHING ##
39
+
40
+ # ### REDIS
41
+ # REDIS_HOST = ""
42
+ # REDIS_PORT = ""
43
+ # REDIS_PASSWORD = ""
litellm/deprecated_litellm_server/Dockerfile ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # FROM python:3.10
2
+
3
+ # ENV LITELLM_CONFIG_PATH="/litellm.secrets.toml"
4
+ # COPY . /app
5
+ # WORKDIR /app
6
+ # RUN pip install -r requirements.txt
7
+
8
+ # EXPOSE $PORT
9
+
10
+ # CMD exec uvicorn main:app --host 0.0.0.0 --port $PORT --workers 10
litellm/deprecated_litellm_server/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # litellm-server [experimental]
2
+
3
+ Deprecated. See litellm/proxy
litellm/deprecated_litellm_server/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # from .main import *
2
+ # from .server_utils import *
litellm/deprecated_litellm_server/main.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os, traceback
2
+ # from fastapi import FastAPI, Request, HTTPException
3
+ # from fastapi.routing import APIRouter
4
+ # from fastapi.responses import StreamingResponse, FileResponse
5
+ # from fastapi.middleware.cors import CORSMiddleware
6
+ # import json, sys
7
+ # from typing import Optional
8
+ # sys.path.insert(
9
+ # 0, os.path.abspath("../")
10
+ # ) # Adds the parent directory to the system path - for litellm local dev
11
+ # import litellm
12
+
13
+ # try:
14
+ # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
15
+ # except ImportError:
16
+ # from litellm.deprecated_litellm_server.server_utils import set_callbacks, load_router_config, print_verbose
17
+ # import dotenv
18
+ # dotenv.load_dotenv() # load env variables
19
+
20
+ # app = FastAPI(docs_url="/", title="LiteLLM API")
21
+ # router = APIRouter()
22
+ # origins = ["*"]
23
+
24
+ # app.add_middleware(
25
+ # CORSMiddleware,
26
+ # allow_origins=origins,
27
+ # allow_credentials=True,
28
+ # allow_methods=["*"],
29
+ # allow_headers=["*"],
30
+ # )
31
+ # #### GLOBAL VARIABLES ####
32
+ # llm_router: Optional[litellm.Router] = None
33
+ # llm_model_list: Optional[list] = None
34
+ # server_settings: Optional[dict] = None
35
+
36
+ # set_callbacks() # sets litellm callbacks for logging if they exist in the environment
37
+
38
+ # if "CONFIG_FILE_PATH" in os.environ:
39
+ # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router, config_file_path=os.getenv("CONFIG_FILE_PATH"))
40
+ # else:
41
+ # llm_router, llm_model_list, server_settings = load_router_config(router=llm_router)
42
+ # #### API ENDPOINTS ####
43
+ # @router.get("/v1/models")
44
+ # @router.get("/models") # if project requires model list
45
+ # def model_list():
46
+ # all_models = litellm.utils.get_valid_models()
47
+ # if llm_model_list:
48
+ # all_models += llm_model_list
49
+ # return dict(
50
+ # data=[
51
+ # {
52
+ # "id": model,
53
+ # "object": "model",
54
+ # "created": 1677610602,
55
+ # "owned_by": "openai",
56
+ # }
57
+ # for model in all_models
58
+ # ],
59
+ # object="list",
60
+ # )
61
+ # # for streaming
62
+ # def data_generator(response):
63
+
64
+ # for chunk in response:
65
+
66
+ # yield f"data: {json.dumps(chunk)}\n\n"
67
+
68
+ # @router.post("/v1/completions")
69
+ # @router.post("/completions")
70
+ # async def completion(request: Request):
71
+ # data = await request.json()
72
+ # response = litellm.completion(
73
+ # **data
74
+ # )
75
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
76
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
77
+ # return response
78
+
79
+ # @router.post("/v1/embeddings")
80
+ # @router.post("/embeddings")
81
+ # async def embedding(request: Request):
82
+ # try:
83
+ # data = await request.json()
84
+ # # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
85
+ # if os.getenv("AUTH_STRATEGY", None) == "DYNAMIC" and "authorization" in request.headers: # if users pass LLM api keys as part of header
86
+ # api_key = request.headers.get("authorization")
87
+ # api_key = api_key.replace("Bearer", "").strip() # type: ignore
88
+ # if len(api_key.strip()) > 0:
89
+ # api_key = api_key
90
+ # data["api_key"] = api_key
91
+ # response = litellm.embedding(
92
+ # **data
93
+ # )
94
+ # return response
95
+ # except Exception as e:
96
+ # error_traceback = traceback.format_exc()
97
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
98
+ # return {"error": error_msg}
99
+
100
+ # @router.post("/v1/chat/completions")
101
+ # @router.post("/chat/completions")
102
+ # @router.post("/openai/deployments/{model:path}/chat/completions") # azure compatible endpoint
103
+ # async def chat_completion(request: Request, model: Optional[str] = None):
104
+ # global llm_model_list, server_settings
105
+ # try:
106
+ # data = await request.json()
107
+ # server_model = server_settings.get("completion_model", None) if server_settings else None
108
+ # data["model"] = server_model or model or data["model"]
109
+ # ## CHECK KEYS ##
110
+ # # default to always using the "ENV" variables, only if AUTH_STRATEGY==DYNAMIC then reads headers
111
+ # # env_validation = litellm.validate_environment(model=data["model"])
112
+ # # if (env_validation['keys_in_environment'] is False or os.getenv("AUTH_STRATEGY", None) == "DYNAMIC") and ("authorization" in request.headers or "api-key" in request.headers): # if users pass LLM api keys as part of header
113
+ # # if "authorization" in request.headers:
114
+ # # api_key = request.headers.get("authorization")
115
+ # # elif "api-key" in request.headers:
116
+ # # api_key = request.headers.get("api-key")
117
+ # # print(f"api_key in headers: {api_key}")
118
+ # # if " " in api_key:
119
+ # # api_key = api_key.split(" ")[1]
120
+ # # print(f"api_key split: {api_key}")
121
+ # # if len(api_key) > 0:
122
+ # # api_key = api_key
123
+ # # data["api_key"] = api_key
124
+ # # print(f"api_key in data: {api_key}")
125
+ # ## CHECK CONFIG ##
126
+ # if llm_model_list and data["model"] in [m["model_name"] for m in llm_model_list]:
127
+ # for m in llm_model_list:
128
+ # if data["model"] == m["model_name"]:
129
+ # for key, value in m["litellm_params"].items():
130
+ # data[key] = value
131
+ # break
132
+ # response = litellm.completion(
133
+ # **data
134
+ # )
135
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
136
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
137
+ # return response
138
+ # except Exception as e:
139
+ # error_traceback = traceback.format_exc()
140
+
141
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
142
+ # # return {"error": error_msg}
143
+ # raise HTTPException(status_code=500, detail=error_msg)
144
+
145
+ # @router.post("/router/completions")
146
+ # async def router_completion(request: Request):
147
+ # global llm_router
148
+ # try:
149
+ # data = await request.json()
150
+ # if "model_list" in data:
151
+ # llm_router = litellm.Router(model_list=data.pop("model_list"))
152
+ # if llm_router is None:
153
+ # raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
154
+
155
+ # # openai.ChatCompletion.create replacement
156
+ # response = await llm_router.acompletion(model="gpt-3.5-turbo",
157
+ # messages=[{"role": "user", "content": "Hey, how's it going?"}])
158
+
159
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
160
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
161
+ # return response
162
+ # except Exception as e:
163
+ # error_traceback = traceback.format_exc()
164
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
165
+ # return {"error": error_msg}
166
+
167
+ # @router.post("/router/embedding")
168
+ # async def router_embedding(request: Request):
169
+ # global llm_router
170
+ # try:
171
+ # data = await request.json()
172
+ # if "model_list" in data:
173
+ # llm_router = litellm.Router(model_list=data.pop("model_list"))
174
+ # if llm_router is None:
175
+ # raise Exception("Save model list via config.yaml. Eg.: ` docker build -t myapp --build-arg CONFIG_FILE=myconfig.yaml .` or pass it in as model_list=[..] as part of the request body")
176
+
177
+ # response = await llm_router.aembedding(model="gpt-3.5-turbo", # type: ignore
178
+ # messages=[{"role": "user", "content": "Hey, how's it going?"}])
179
+
180
+ # if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
181
+ # return StreamingResponse(data_generator(response), media_type='text/event-stream')
182
+ # return response
183
+ # except Exception as e:
184
+ # error_traceback = traceback.format_exc()
185
+ # error_msg = f"{str(e)}\n\n{error_traceback}"
186
+ # return {"error": error_msg}
187
+
188
+ # @router.get("/")
189
+ # async def home(request: Request):
190
+ # return "LiteLLM: RUNNING"
191
+
192
+
193
+ # app.include_router(router)
litellm/deprecated_litellm_server/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # openai
2
+ # fastapi
3
+ # uvicorn
4
+ # boto3
5
+ # litellm
6
+ # python-dotenv
7
+ # redis
litellm/deprecated_litellm_server/server_utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os, litellm
2
+ # import pkg_resources
3
+ # import dotenv
4
+ # dotenv.load_dotenv() # load env variables
5
+
6
+ # def print_verbose(print_statement):
7
+ # pass
8
+
9
+ # def get_package_version(package_name):
10
+ # try:
11
+ # package = pkg_resources.get_distribution(package_name)
12
+ # return package.version
13
+ # except pkg_resources.DistributionNotFound:
14
+ # return None
15
+
16
+ # # Usage example
17
+ # package_name = "litellm"
18
+ # version = get_package_version(package_name)
19
+ # if version:
20
+ # print_verbose(f"The version of {package_name} is {version}")
21
+ # else:
22
+ # print_verbose(f"{package_name} is not installed")
23
+ # import yaml
24
+ # import dotenv
25
+ # from typing import Optional
26
+ # dotenv.load_dotenv() # load env variables
27
+
28
+ # def set_callbacks():
29
+ # ## LOGGING
30
+ # if len(os.getenv("SET_VERBOSE", "")) > 0:
31
+ # if os.getenv("SET_VERBOSE") == "True":
32
+ # litellm.set_verbose = True
33
+ # print_verbose("\033[92mLiteLLM: Switched on verbose logging\033[0m")
34
+ # else:
35
+ # litellm.set_verbose = False
36
+
37
+ # ### LANGFUSE
38
+ # if (len(os.getenv("LANGFUSE_PUBLIC_KEY", "")) > 0 and len(os.getenv("LANGFUSE_SECRET_KEY", ""))) > 0 or len(os.getenv("LANGFUSE_HOST", "")) > 0:
39
+ # litellm.success_callback = ["langfuse"]
40
+ # print_verbose("\033[92mLiteLLM: Switched on Langfuse feature\033[0m")
41
+
42
+ # ## CACHING
43
+ # ### REDIS
44
+ # # if len(os.getenv("REDIS_HOST", "")) > 0 and len(os.getenv("REDIS_PORT", "")) > 0 and len(os.getenv("REDIS_PASSWORD", "")) > 0:
45
+ # # print(f"redis host: {os.getenv('REDIS_HOST')}; redis port: {os.getenv('REDIS_PORT')}; password: {os.getenv('REDIS_PASSWORD')}")
46
+ # # from litellm.caching import Cache
47
+ # # litellm.cache = Cache(type="redis", host=os.getenv("REDIS_HOST"), port=os.getenv("REDIS_PORT"), password=os.getenv("REDIS_PASSWORD"))
48
+ # # print("\033[92mLiteLLM: Switched on Redis caching\033[0m")
49
+
50
+
51
+
52
+ # def load_router_config(router: Optional[litellm.Router], config_file_path: Optional[str]='/app/config.yaml'):
53
+ # config = {}
54
+ # server_settings = {}
55
+ # try:
56
+ # if os.path.exists(config_file_path): # type: ignore
57
+ # with open(config_file_path, 'r') as file: # type: ignore
58
+ # config = yaml.safe_load(file)
59
+ # else:
60
+ # pass
61
+ # except:
62
+ # pass
63
+
64
+ # ## SERVER SETTINGS (e.g. default completion model = 'ollama/mistral')
65
+ # server_settings = config.get("server_settings", None)
66
+ # if server_settings:
67
+ # server_settings = server_settings
68
+
69
+ # ## LITELLM MODULE SETTINGS (e.g. litellm.drop_params=True,..)
70
+ # litellm_settings = config.get('litellm_settings', None)
71
+ # if litellm_settings:
72
+ # for key, value in litellm_settings.items():
73
+ # setattr(litellm, key, value)
74
+
75
+ # ## MODEL LIST
76
+ # model_list = config.get('model_list', None)
77
+ # if model_list:
78
+ # router = litellm.Router(model_list=model_list)
79
+
80
+ # ## ENVIRONMENT VARIABLES
81
+ # environment_variables = config.get('environment_variables', None)
82
+ # if environment_variables:
83
+ # for key, value in environment_variables.items():
84
+ # os.environ[key] = value
85
+
86
+ # return router, model_list, server_settings
litellm/exceptions.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # +-----------------------------------------------+
2
+ # | |
3
+ # | Give Feedback / Get Help |
4
+ # | https://github.com/BerriAI/litellm/issues/new |
5
+ # | |
6
+ # +-----------------------------------------------+
7
+ #
8
+ # Thank you users! We ❤️ you! - Krrish & Ishaan
9
+
10
+ ## LiteLLM versions of the OpenAI Exception Types
11
+
12
+ from openai import (
13
+ AuthenticationError,
14
+ BadRequestError,
15
+ RateLimitError,
16
+ APIStatusError,
17
+ OpenAIError,
18
+ APIError,
19
+ APITimeoutError,
20
+ APIConnectionError,
21
+ APIResponseValidationError
22
+ )
23
+ import httpx
24
+
25
+ class AuthenticationError(AuthenticationError): # type: ignore
26
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
27
+ self.status_code = 401
28
+ self.message = message
29
+ self.llm_provider = llm_provider
30
+ self.model = model
31
+ super().__init__(
32
+ self.message,
33
+ response=response,
34
+ body=None
35
+ ) # Call the base class constructor with the parameters it needs
36
+
37
+ class BadRequestError(BadRequestError): # type: ignore
38
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
39
+ self.status_code = 400
40
+ self.message = message
41
+ self.model = model
42
+ self.llm_provider = llm_provider
43
+ super().__init__(
44
+ self.message,
45
+ response=response,
46
+ body=None
47
+ ) # Call the base class constructor with the parameters it needs
48
+
49
+ class Timeout(APITimeoutError): # type: ignore
50
+ def __init__(self, message, model, llm_provider):
51
+ self.status_code = 408
52
+ self.message = message
53
+ self.model = model
54
+ self.llm_provider = llm_provider
55
+ request = httpx.Request(method="POST", url="https://api.openai.com/v1")
56
+ super().__init__(
57
+ request=request
58
+ ) # Call the base class constructor with the parameters it needs
59
+
60
+ class RateLimitError(RateLimitError): # type: ignore
61
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
62
+ self.status_code = 429
63
+ self.message = message
64
+ self.llm_provider = llm_provider
65
+ self.modle = model
66
+ super().__init__(
67
+ self.message,
68
+ response=response,
69
+ body=None
70
+ ) # Call the base class constructor with the parameters it needs
71
+
72
+ # sub class of rate limit error - meant to give more granularity for error handling context window exceeded errors
73
+ class ContextWindowExceededError(BadRequestError): # type: ignore
74
+ def __init__(self, message, model, llm_provider, response: httpx.Response):
75
+ self.status_code = 400
76
+ self.message = message
77
+ self.model = model
78
+ self.llm_provider = llm_provider
79
+ super().__init__(
80
+ message=self.message,
81
+ model=self.model, # type: ignore
82
+ llm_provider=self.llm_provider, # type: ignore
83
+ response=response
84
+ ) # Call the base class constructor with the parameters it needs
85
+
86
+ class ServiceUnavailableError(APIStatusError): # type: ignore
87
+ def __init__(self, message, llm_provider, model, response: httpx.Response):
88
+ self.status_code = 503
89
+ self.message = message
90
+ self.llm_provider = llm_provider
91
+ self.model = model
92
+ super().__init__(
93
+ self.message,
94
+ response=response,
95
+ body=None
96
+ ) # Call the base class constructor with the parameters it needs
97
+
98
+
99
+ # raise this when the API returns an invalid response object - https://github.com/openai/openai-python/blob/1be14ee34a0f8e42d3f9aa5451aa4cb161f1781f/openai/api_requestor.py#L401
100
+ class APIError(APIError): # type: ignore
101
+ def __init__(self, status_code, message, llm_provider, model, request: httpx.Request):
102
+ self.status_code = status_code
103
+ self.message = message
104
+ self.llm_provider = llm_provider
105
+ self.model = model
106
+ super().__init__(
107
+ self.message,
108
+ request=request, # type: ignore
109
+ body=None
110
+ )
111
+
112
+ # raised if an invalid request (not get, delete, put, post) is made
113
+ class APIConnectionError(APIConnectionError): # type: ignore
114
+ def __init__(self, message, llm_provider, model, request: httpx.Request):
115
+ self.message = message
116
+ self.llm_provider = llm_provider
117
+ self.model = model
118
+ self.status_code = 500
119
+ super().__init__(
120
+ message=self.message,
121
+ request=request
122
+ )
123
+
124
+ # raised if an invalid request (not get, delete, put, post) is made
125
+ class APIResponseValidationError(APIResponseValidationError): # type: ignore
126
+ def __init__(self, message, llm_provider, model):
127
+ self.message = message
128
+ self.llm_provider = llm_provider
129
+ self.model = model
130
+ request = httpx.Request(method="POST", url="https://api.openai.com/v1")
131
+ response = httpx.Response(status_code=500, request=request)
132
+ super().__init__(
133
+ response=response,
134
+ body=None,
135
+ message=message
136
+ )
137
+
138
+ class OpenAIError(OpenAIError): # type: ignore
139
+ def __init__(self, original_exception):
140
+ self.status_code = original_exception.http_status
141
+ super().__init__(
142
+ http_body=original_exception.http_body,
143
+ http_status=original_exception.http_status,
144
+ json_body=original_exception.json_body,
145
+ headers=original_exception.headers,
146
+ code=original_exception.code,
147
+ )
148
+ self.llm_provider = "openai"
149
+
150
+ class BudgetExceededError(Exception):
151
+ def __init__(self, current_cost, max_budget):
152
+ self.current_cost = current_cost
153
+ self.max_budget = max_budget
154
+ message = f"Budget has been exceeded! Current cost: {current_cost}, Max budget: {max_budget}"
155
+ super().__init__(message)
156
+
157
+ ## DEPRECATED ##
158
+ class InvalidRequestError(BadRequestError): # type: ignore
159
+ def __init__(self, message, model, llm_provider):
160
+ self.status_code = 400
161
+ self.message = message
162
+ self.model = model
163
+ self.llm_provider = llm_provider
164
+ super().__init__(
165
+ self.message, f"{self.model}"
166
+ ) # Call the base class constructor with the parameters it needs
litellm/integrations/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
litellm/integrations/aispend.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+ import datetime
9
+
10
+ model_cost = {
11
+ "gpt-3.5-turbo": {
12
+ "max_tokens": 4000,
13
+ "input_cost_per_token": 0.0000015,
14
+ "output_cost_per_token": 0.000002,
15
+ },
16
+ "gpt-35-turbo": {
17
+ "max_tokens": 4000,
18
+ "input_cost_per_token": 0.0000015,
19
+ "output_cost_per_token": 0.000002,
20
+ }, # azure model name
21
+ "gpt-3.5-turbo-0613": {
22
+ "max_tokens": 4000,
23
+ "input_cost_per_token": 0.0000015,
24
+ "output_cost_per_token": 0.000002,
25
+ },
26
+ "gpt-3.5-turbo-0301": {
27
+ "max_tokens": 4000,
28
+ "input_cost_per_token": 0.0000015,
29
+ "output_cost_per_token": 0.000002,
30
+ },
31
+ "gpt-3.5-turbo-16k": {
32
+ "max_tokens": 16000,
33
+ "input_cost_per_token": 0.000003,
34
+ "output_cost_per_token": 0.000004,
35
+ },
36
+ "gpt-35-turbo-16k": {
37
+ "max_tokens": 16000,
38
+ "input_cost_per_token": 0.000003,
39
+ "output_cost_per_token": 0.000004,
40
+ }, # azure model name
41
+ "gpt-3.5-turbo-16k-0613": {
42
+ "max_tokens": 16000,
43
+ "input_cost_per_token": 0.000003,
44
+ "output_cost_per_token": 0.000004,
45
+ },
46
+ "gpt-4": {
47
+ "max_tokens": 8000,
48
+ "input_cost_per_token": 0.000003,
49
+ "output_cost_per_token": 0.00006,
50
+ },
51
+ "gpt-4-0613": {
52
+ "max_tokens": 8000,
53
+ "input_cost_per_token": 0.000003,
54
+ "output_cost_per_token": 0.00006,
55
+ },
56
+ "gpt-4-32k": {
57
+ "max_tokens": 8000,
58
+ "input_cost_per_token": 0.00006,
59
+ "output_cost_per_token": 0.00012,
60
+ },
61
+ "claude-instant-1": {
62
+ "max_tokens": 100000,
63
+ "input_cost_per_token": 0.00000163,
64
+ "output_cost_per_token": 0.00000551,
65
+ },
66
+ "claude-2": {
67
+ "max_tokens": 100000,
68
+ "input_cost_per_token": 0.00001102,
69
+ "output_cost_per_token": 0.00003268,
70
+ },
71
+ "text-bison-001": {
72
+ "max_tokens": 8192,
73
+ "input_cost_per_token": 0.000004,
74
+ "output_cost_per_token": 0.000004,
75
+ },
76
+ "chat-bison-001": {
77
+ "max_tokens": 4096,
78
+ "input_cost_per_token": 0.000002,
79
+ "output_cost_per_token": 0.000002,
80
+ },
81
+ "command-nightly": {
82
+ "max_tokens": 4096,
83
+ "input_cost_per_token": 0.000015,
84
+ "output_cost_per_token": 0.000015,
85
+ },
86
+ }
87
+
88
+
89
+ class AISpendLogger:
90
+ # Class variables or attributes
91
+ def __init__(self):
92
+ # Instance variables
93
+ self.account_id = os.getenv("AISPEND_ACCOUNT_ID")
94
+ self.api_key = os.getenv("AISPEND_API_KEY")
95
+
96
+ def price_calculator(self, model, response_obj, start_time, end_time):
97
+ # try and find if the model is in the model_cost map
98
+ # else default to the average of the costs
99
+ prompt_tokens_cost_usd_dollar = 0
100
+ completion_tokens_cost_usd_dollar = 0
101
+ if model in model_cost:
102
+ prompt_tokens_cost_usd_dollar = (
103
+ model_cost[model]["input_cost_per_token"]
104
+ * response_obj["usage"]["prompt_tokens"]
105
+ )
106
+ completion_tokens_cost_usd_dollar = (
107
+ model_cost[model]["output_cost_per_token"]
108
+ * response_obj["usage"]["completion_tokens"]
109
+ )
110
+ elif "replicate" in model:
111
+ # replicate models are charged based on time
112
+ # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
113
+ model_run_time = end_time - start_time # assuming time in seconds
114
+ cost_usd_dollar = model_run_time * 0.0032
115
+ prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
116
+ completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
117
+ else:
118
+ # calculate average input cost
119
+ input_cost_sum = 0
120
+ output_cost_sum = 0
121
+ for model in model_cost:
122
+ input_cost_sum += model_cost[model]["input_cost_per_token"]
123
+ output_cost_sum += model_cost[model]["output_cost_per_token"]
124
+ avg_input_cost = input_cost_sum / len(model_cost.keys())
125
+ avg_output_cost = output_cost_sum / len(model_cost.keys())
126
+ prompt_tokens_cost_usd_dollar = (
127
+ model_cost[model]["input_cost_per_token"]
128
+ * response_obj["usage"]["prompt_tokens"]
129
+ )
130
+ completion_tokens_cost_usd_dollar = (
131
+ model_cost[model]["output_cost_per_token"]
132
+ * response_obj["usage"]["completion_tokens"]
133
+ )
134
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
135
+
136
+ def log_event(self, model, response_obj, start_time, end_time, print_verbose):
137
+ # Method definition
138
+ try:
139
+ print_verbose(
140
+ f"AISpend Logging - Enters logging function for model {model}"
141
+ )
142
+
143
+ url = f"https://aispend.io/api/v1/accounts/{self.account_id}/data"
144
+ headers = {
145
+ "Authorization": f"Bearer {self.api_key}",
146
+ "Content-Type": "application/json",
147
+ }
148
+
149
+ response_timestamp = datetime.datetime.fromtimestamp(
150
+ int(response_obj["created"])
151
+ ).strftime("%Y-%m-%d")
152
+
153
+ (
154
+ prompt_tokens_cost_usd_dollar,
155
+ completion_tokens_cost_usd_dollar,
156
+ ) = self.price_calculator(model, response_obj, start_time, end_time)
157
+ prompt_tokens_cost_usd_cent = prompt_tokens_cost_usd_dollar * 100
158
+ completion_tokens_cost_usd_cent = completion_tokens_cost_usd_dollar * 100
159
+ data = [
160
+ {
161
+ "requests": 1,
162
+ "requests_context": 1,
163
+ "context_tokens": response_obj["usage"]["prompt_tokens"],
164
+ "requests_generated": 1,
165
+ "generated_tokens": response_obj["usage"]["completion_tokens"],
166
+ "recorded_date": response_timestamp,
167
+ "model_id": response_obj["model"],
168
+ "generated_tokens_cost_usd_cent": prompt_tokens_cost_usd_cent,
169
+ "context_tokens_cost_usd_cent": completion_tokens_cost_usd_cent,
170
+ }
171
+ ]
172
+
173
+ print_verbose(f"AISpend Logging - final data object: {data}")
174
+ except:
175
+ # traceback.print_exc()
176
+ print_verbose(f"AISpend Logging Error - {traceback.format_exc()}")
177
+ pass
litellm/integrations/berrispend.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+ import datetime
9
+
10
+ model_cost = {
11
+ "gpt-3.5-turbo": {
12
+ "max_tokens": 4000,
13
+ "input_cost_per_token": 0.0000015,
14
+ "output_cost_per_token": 0.000002,
15
+ },
16
+ "gpt-35-turbo": {
17
+ "max_tokens": 4000,
18
+ "input_cost_per_token": 0.0000015,
19
+ "output_cost_per_token": 0.000002,
20
+ }, # azure model name
21
+ "gpt-3.5-turbo-0613": {
22
+ "max_tokens": 4000,
23
+ "input_cost_per_token": 0.0000015,
24
+ "output_cost_per_token": 0.000002,
25
+ },
26
+ "gpt-3.5-turbo-0301": {
27
+ "max_tokens": 4000,
28
+ "input_cost_per_token": 0.0000015,
29
+ "output_cost_per_token": 0.000002,
30
+ },
31
+ "gpt-3.5-turbo-16k": {
32
+ "max_tokens": 16000,
33
+ "input_cost_per_token": 0.000003,
34
+ "output_cost_per_token": 0.000004,
35
+ },
36
+ "gpt-35-turbo-16k": {
37
+ "max_tokens": 16000,
38
+ "input_cost_per_token": 0.000003,
39
+ "output_cost_per_token": 0.000004,
40
+ }, # azure model name
41
+ "gpt-3.5-turbo-16k-0613": {
42
+ "max_tokens": 16000,
43
+ "input_cost_per_token": 0.000003,
44
+ "output_cost_per_token": 0.000004,
45
+ },
46
+ "gpt-4": {
47
+ "max_tokens": 8000,
48
+ "input_cost_per_token": 0.000003,
49
+ "output_cost_per_token": 0.00006,
50
+ },
51
+ "gpt-4-0613": {
52
+ "max_tokens": 8000,
53
+ "input_cost_per_token": 0.000003,
54
+ "output_cost_per_token": 0.00006,
55
+ },
56
+ "gpt-4-32k": {
57
+ "max_tokens": 8000,
58
+ "input_cost_per_token": 0.00006,
59
+ "output_cost_per_token": 0.00012,
60
+ },
61
+ "claude-instant-1": {
62
+ "max_tokens": 100000,
63
+ "input_cost_per_token": 0.00000163,
64
+ "output_cost_per_token": 0.00000551,
65
+ },
66
+ "claude-2": {
67
+ "max_tokens": 100000,
68
+ "input_cost_per_token": 0.00001102,
69
+ "output_cost_per_token": 0.00003268,
70
+ },
71
+ "text-bison-001": {
72
+ "max_tokens": 8192,
73
+ "input_cost_per_token": 0.000004,
74
+ "output_cost_per_token": 0.000004,
75
+ },
76
+ "chat-bison-001": {
77
+ "max_tokens": 4096,
78
+ "input_cost_per_token": 0.000002,
79
+ "output_cost_per_token": 0.000002,
80
+ },
81
+ "command-nightly": {
82
+ "max_tokens": 4096,
83
+ "input_cost_per_token": 0.000015,
84
+ "output_cost_per_token": 0.000015,
85
+ },
86
+ }
87
+
88
+
89
+ class BerriSpendLogger:
90
+ # Class variables or attributes
91
+ def __init__(self):
92
+ # Instance variables
93
+ self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID")
94
+
95
+ def price_calculator(self, model, response_obj, start_time, end_time):
96
+ # try and find if the model is in the model_cost map
97
+ # else default to the average of the costs
98
+ prompt_tokens_cost_usd_dollar = 0
99
+ completion_tokens_cost_usd_dollar = 0
100
+ if model in model_cost:
101
+ prompt_tokens_cost_usd_dollar = (
102
+ model_cost[model]["input_cost_per_token"]
103
+ * response_obj["usage"]["prompt_tokens"]
104
+ )
105
+ completion_tokens_cost_usd_dollar = (
106
+ model_cost[model]["output_cost_per_token"]
107
+ * response_obj["usage"]["completion_tokens"]
108
+ )
109
+ elif "replicate" in model:
110
+ # replicate models are charged based on time
111
+ # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat
112
+ model_run_time = end_time - start_time # assuming time in seconds
113
+ cost_usd_dollar = model_run_time * 0.0032
114
+ prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2
115
+ completion_tokens_cost_usd_dollar = cost_usd_dollar / 2
116
+ else:
117
+ # calculate average input cost
118
+ input_cost_sum = 0
119
+ output_cost_sum = 0
120
+ for model in model_cost:
121
+ input_cost_sum += model_cost[model]["input_cost_per_token"]
122
+ output_cost_sum += model_cost[model]["output_cost_per_token"]
123
+ avg_input_cost = input_cost_sum / len(model_cost.keys())
124
+ avg_output_cost = output_cost_sum / len(model_cost.keys())
125
+ prompt_tokens_cost_usd_dollar = (
126
+ model_cost[model]["input_cost_per_token"]
127
+ * response_obj["usage"]["prompt_tokens"]
128
+ )
129
+ completion_tokens_cost_usd_dollar = (
130
+ model_cost[model]["output_cost_per_token"]
131
+ * response_obj["usage"]["completion_tokens"]
132
+ )
133
+ return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
134
+
135
+ def log_event(
136
+ self, model, messages, response_obj, start_time, end_time, print_verbose
137
+ ):
138
+ # Method definition
139
+ try:
140
+ print_verbose(
141
+ f"BerriSpend Logging - Enters logging function for model {model}"
142
+ )
143
+
144
+ url = f"https://berrispend.berri.ai/spend"
145
+ headers = {"Content-Type": "application/json"}
146
+
147
+ (
148
+ prompt_tokens_cost_usd_dollar,
149
+ completion_tokens_cost_usd_dollar,
150
+ ) = self.price_calculator(model, response_obj, start_time, end_time)
151
+ total_cost = (
152
+ prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar
153
+ )
154
+
155
+ response_time = (end_time - start_time).total_seconds()
156
+ if "response" in response_obj:
157
+ data = [
158
+ {
159
+ "response_time": response_time,
160
+ "model_id": response_obj["model"],
161
+ "total_cost": total_cost,
162
+ "messages": messages,
163
+ "response": response_obj["choices"][0]["message"]["content"],
164
+ "account_id": self.account_id,
165
+ }
166
+ ]
167
+ elif "error" in response_obj:
168
+ data = [
169
+ {
170
+ "response_time": response_time,
171
+ "model_id": response_obj["model"],
172
+ "total_cost": total_cost,
173
+ "messages": messages,
174
+ "error": response_obj["error"],
175
+ "account_id": self.account_id,
176
+ }
177
+ ]
178
+
179
+ print_verbose(f"BerriSpend Logging - final data object: {data}")
180
+ response = requests.post(url, headers=headers, json=data)
181
+ except:
182
+ # traceback.print_exc()
183
+ print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}")
184
+ pass
litellm/integrations/custom_logger.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Promptlayer
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+
10
+
11
+ class CustomLogger: # https://docs.litellm.ai/docs/observability/custom_callback#callback-class
12
+ # Class variables or attributes
13
+ def __init__(self):
14
+ pass
15
+
16
+ def log_pre_api_call(self, model, messages, kwargs):
17
+ pass
18
+
19
+ def log_post_api_call(self, kwargs, response_obj, start_time, end_time):
20
+ pass
21
+
22
+ def log_stream_event(self, kwargs, response_obj, start_time, end_time):
23
+ pass
24
+
25
+ def log_success_event(self, kwargs, response_obj, start_time, end_time):
26
+ pass
27
+
28
+ def log_failure_event(self, kwargs, response_obj, start_time, end_time):
29
+ pass
30
+
31
+
32
+ #### SINGLE-USE #### - https://docs.litellm.ai/docs/observability/custom_callback#using-your-custom-callback-function
33
+
34
+ def log_input_event(self, model, messages, kwargs, print_verbose, callback_func):
35
+ try:
36
+ kwargs["model"] = model
37
+ kwargs["messages"] = messages
38
+ kwargs["log_event_type"] = "pre_api_call"
39
+ callback_func(
40
+ kwargs,
41
+ )
42
+ print_verbose(
43
+ f"Custom Logger - model call details: {kwargs}"
44
+ )
45
+ except:
46
+ traceback.print_exc()
47
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
48
+
49
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func):
50
+ # Method definition
51
+ try:
52
+ kwargs["log_event_type"] = "post_api_call"
53
+ callback_func(
54
+ kwargs, # kwargs to func
55
+ response_obj,
56
+ start_time,
57
+ end_time,
58
+ )
59
+ print_verbose(
60
+ f"Custom Logger - final response object: {response_obj}"
61
+ )
62
+ except:
63
+ # traceback.print_exc()
64
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
65
+ pass
66
+
67
+ async def async_log_event(self, kwargs, response_obj, start_time, end_time, print_verbose, callback_func):
68
+ # Method definition
69
+ try:
70
+ kwargs["log_event_type"] = "post_api_call"
71
+ await callback_func(
72
+ kwargs, # kwargs to func
73
+ response_obj,
74
+ start_time,
75
+ end_time,
76
+ )
77
+ print_verbose(
78
+ f"Custom Logger - final response object: {response_obj}"
79
+ )
80
+ except:
81
+ # traceback.print_exc()
82
+ print_verbose(f"Custom Logger Error - {traceback.format_exc()}")
83
+ pass
litellm/integrations/helicone.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Helicone
3
+ import dotenv, os
4
+ import requests
5
+
6
+ dotenv.load_dotenv() # Loading env variables using dotenv
7
+ import traceback
8
+
9
+
10
+ class HeliconeLogger:
11
+ # Class variables or attributes
12
+ helicone_model_list = ["gpt", "claude"]
13
+
14
+ def __init__(self):
15
+ # Instance variables
16
+ self.provider_url = "https://api.openai.com/v1"
17
+ self.key = os.getenv("HELICONE_API_KEY")
18
+
19
+ def claude_mapping(self, model, messages, response_obj):
20
+ from anthropic import HUMAN_PROMPT, AI_PROMPT
21
+
22
+ prompt = f"{HUMAN_PROMPT}"
23
+ for message in messages:
24
+ if "role" in message:
25
+ if message["role"] == "user":
26
+ prompt += f"{HUMAN_PROMPT}{message['content']}"
27
+ else:
28
+ prompt += f"{AI_PROMPT}{message['content']}"
29
+ else:
30
+ prompt += f"{HUMAN_PROMPT}{message['content']}"
31
+ prompt += f"{AI_PROMPT}"
32
+ claude_provider_request = {"model": model, "prompt": prompt}
33
+
34
+ claude_response_obj = {
35
+ "completion": response_obj["choices"][0]["message"]["content"],
36
+ "model": model,
37
+ "stop_reason": "stop_sequence",
38
+ }
39
+
40
+ return claude_provider_request, claude_response_obj
41
+
42
+ def log_success(
43
+ self, model, messages, response_obj, start_time, end_time, print_verbose
44
+ ):
45
+ # Method definition
46
+ try:
47
+ print_verbose(
48
+ f"Helicone Logging - Enters logging function for model {model}"
49
+ )
50
+ model = (
51
+ model
52
+ if any(
53
+ accepted_model in model
54
+ for accepted_model in self.helicone_model_list
55
+ )
56
+ else "gpt-3.5-turbo"
57
+ )
58
+ provider_request = {"model": model, "messages": messages}
59
+
60
+ if "claude" in model:
61
+ provider_request, response_obj = self.claude_mapping(
62
+ model=model, messages=messages, response_obj=response_obj
63
+ )
64
+
65
+ providerResponse = {
66
+ "json": response_obj,
67
+ "headers": {"openai-version": "2020-10-01"},
68
+ "status": 200,
69
+ }
70
+
71
+ # Code to be executed
72
+ url = "https://api.hconeai.com/oai/v1/log"
73
+ headers = {
74
+ "Authorization": f"Bearer {self.key}",
75
+ "Content-Type": "application/json",
76
+ }
77
+ start_time_seconds = int(start_time.timestamp())
78
+ start_time_milliseconds = int(
79
+ (start_time.timestamp() - start_time_seconds) * 1000
80
+ )
81
+ end_time_seconds = int(end_time.timestamp())
82
+ end_time_milliseconds = int(
83
+ (end_time.timestamp() - end_time_seconds) * 1000
84
+ )
85
+ data = {
86
+ "providerRequest": {
87
+ "url": self.provider_url,
88
+ "json": provider_request,
89
+ "meta": {"Helicone-Auth": f"Bearer {self.key}"},
90
+ },
91
+ "providerResponse": providerResponse,
92
+ "timing": {
93
+ "startTime": {
94
+ "seconds": start_time_seconds,
95
+ "milliseconds": start_time_milliseconds,
96
+ },
97
+ "endTime": {
98
+ "seconds": end_time_seconds,
99
+ "milliseconds": end_time_milliseconds,
100
+ },
101
+ }, # {"seconds": .., "milliseconds": ..}
102
+ }
103
+ response = requests.post(url, headers=headers, json=data)
104
+ if response.status_code == 200:
105
+ print_verbose("Helicone Logging - Success!")
106
+ else:
107
+ print_verbose(
108
+ f"Helicone Logging - Error Request was not successful. Status Code: {response.status_code}"
109
+ )
110
+ print_verbose(f"Helicone Logging - Error {response.text}")
111
+ except:
112
+ # traceback.print_exc()
113
+ print_verbose(f"Helicone Logging Error - {traceback.format_exc()}")
114
+ pass
litellm/integrations/langfuse.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Langfuse
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+ from datetime import datetime
7
+
8
+ dotenv.load_dotenv() # Loading env variables using dotenv
9
+ import traceback
10
+
11
+ class LangFuseLogger:
12
+ # Class variables or attributes
13
+ def __init__(self):
14
+ try:
15
+ from langfuse import Langfuse
16
+ except Exception as e:
17
+ raise Exception(f"\033[91mLangfuse not installed, try running 'pip install langfuse' to fix this error: {e}\033[0m")
18
+ # Instance variables
19
+ self.secret_key = os.getenv("LANGFUSE_SECRET_KEY")
20
+ self.public_key = os.getenv("LANGFUSE_PUBLIC_KEY")
21
+ self.langfuse_host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
22
+ self.langfuse_release = os.getenv("LANGFUSE_RELEASE")
23
+ self.langfuse_debug = os.getenv("LANGFUSE_DEBUG")
24
+ self.Langfuse = Langfuse(
25
+ public_key=self.public_key,
26
+ secret_key=self.secret_key,
27
+ host=self.langfuse_host,
28
+ release=self.langfuse_release,
29
+ debug=self.langfuse_debug
30
+ )
31
+
32
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
33
+ # Method definition
34
+ from langfuse.model import InitialGeneration, Usage
35
+ try:
36
+ print_verbose(
37
+ f"Langfuse Logging - Enters logging function for model {kwargs}"
38
+ )
39
+ litellm_params = kwargs.get("litellm_params", {})
40
+ metadata = litellm_params.get("metadata", {})
41
+ prompt = [kwargs.get('messages')]
42
+ optional_params = kwargs.get("optional_params", {})
43
+
44
+ # langfuse only accepts str, int, bool, float for logging
45
+ for param, value in optional_params.items():
46
+ if not isinstance(value, (str, int, bool, float)):
47
+ try:
48
+ optional_params[param] = str(value)
49
+ except:
50
+ # if casting value to str fails don't block logging
51
+ pass
52
+
53
+ # end of processing langfuse ########################
54
+ self.Langfuse.generation(InitialGeneration(
55
+ name=metadata.get("generation_name", "litellm-completion"),
56
+ startTime=start_time,
57
+ endTime=end_time,
58
+ model=kwargs['model'],
59
+ modelParameters=optional_params,
60
+ prompt=prompt,
61
+ completion=response_obj['choices'][0]['message'],
62
+ usage=Usage(
63
+ prompt_tokens=response_obj['usage']['prompt_tokens'],
64
+ completion_tokens=response_obj['usage']['completion_tokens']
65
+ ),
66
+ metadata=metadata
67
+ ))
68
+ self.Langfuse.flush()
69
+ print_verbose(
70
+ f"Langfuse Layer Logging - final response object: {response_obj}"
71
+ )
72
+ except:
73
+ # traceback.print_exc()
74
+ print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
75
+ pass
litellm/integrations/langsmith.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Langsmith
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+ from datetime import datetime
7
+
8
+ dotenv.load_dotenv() # Loading env variables using dotenv
9
+ import traceback
10
+
11
+ class LangsmithLogger:
12
+ # Class variables or attributes
13
+ def __init__(self):
14
+ self.langsmith_api_key = os.getenv("LANGSMITH_API_KEY")
15
+
16
+
17
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
18
+ # Method definition
19
+ # inspired by Langsmith http api here: https://github.com/langchain-ai/langsmith-cookbook/blob/main/tracing-examples/rest/rest.ipynb
20
+ metadata = {}
21
+ if "litellm_params" in kwargs:
22
+ metadata = kwargs["litellm_params"].get("metadata", {})
23
+ # set project name and run_name for langsmith logging
24
+ # users can pass project_name and run name to litellm.completion()
25
+ # Example: litellm.completion(model, messages, metadata={"project_name": "my-litellm-project", "run_name": "my-langsmith-run"})
26
+ # if not set litellm will use default project_name = litellm-completion, run_name = LLMRun
27
+ project_name = metadata.get("project_name", "litellm-completion")
28
+ run_name = metadata.get("run_name", "LLMRun")
29
+ print_verbose(f"Langsmith Logging - project_name: {project_name}, run_name {run_name}")
30
+ try:
31
+ print_verbose(
32
+ f"Langsmith Logging - Enters logging function for model {kwargs}"
33
+ )
34
+ import requests
35
+ import datetime
36
+ from datetime import timezone
37
+ try:
38
+ start_time = kwargs["start_time"].astimezone(timezone.utc).isoformat()
39
+ end_time = kwargs["end_time"].astimezone(timezone.utc).isoformat()
40
+ except:
41
+ start_time = datetime.datetime.utcnow().isoformat()
42
+ end_time = datetime.datetime.utcnow().isoformat()
43
+
44
+ # filter out kwargs to not include any dicts, langsmith throws an erros when trying to log kwargs
45
+ new_kwargs = {}
46
+ for key in kwargs:
47
+ value = kwargs[key]
48
+ if key == "start_time" or key =="end_time":
49
+ pass
50
+ elif type(value) != dict:
51
+ new_kwargs[key] = value
52
+
53
+ requests.post(
54
+ "https://api.smith.langchain.com/runs",
55
+ json={
56
+ "name": run_name,
57
+ "run_type": "llm", # this should always be llm, since litellm always logs llm calls. Langsmith allow us to log "chain"
58
+ "inputs": {
59
+ **new_kwargs
60
+ },
61
+ "outputs": response_obj,
62
+ "session_name": project_name,
63
+ "start_time": start_time,
64
+ "end_time": end_time,
65
+ },
66
+ headers={
67
+ "x-api-key": self.langsmith_api_key
68
+ }
69
+ )
70
+ print_verbose(
71
+ f"Langsmith Layer Logging - final response object: {response_obj}"
72
+ )
73
+ except:
74
+ # traceback.print_exc()
75
+ print_verbose(f"Langsmith Layer Error - {traceback.format_exc()}")
76
+ pass
litellm/integrations/litedebugger.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests, traceback, json, os
2
+ import types
3
+
4
+ class LiteDebugger:
5
+ user_email = None
6
+ dashboard_url = None
7
+
8
+ def __init__(self, email=None):
9
+ self.api_url = "https://api.litellm.ai/debugger"
10
+ self.validate_environment(email)
11
+ pass
12
+
13
+ def validate_environment(self, email):
14
+ try:
15
+ self.user_email = (email or os.getenv("LITELLM_TOKEN") or os.getenv("LITELLM_EMAIL"))
16
+ if self.user_email == None: # if users are trying to use_client=True but token not set
17
+ raise ValueError("litellm.use_client = True but no token or email passed. Please set it in litellm.token")
18
+ self.dashboard_url = "https://admin.litellm.ai/" + self.user_email
19
+ try:
20
+ print(
21
+ f"\033[92mHere's your LiteLLM Dashboard 👉 \033[94m\033[4m{self.dashboard_url}\033[0m"
22
+ )
23
+ except:
24
+ print(f"Here's your LiteLLM Dashboard 👉 {self.dashboard_url}")
25
+ if self.user_email == None:
26
+ raise ValueError(
27
+ "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
28
+ )
29
+ except Exception as e:
30
+ raise ValueError(
31
+ "[Non-Blocking Error] LiteLLMDebugger: Missing LITELLM_TOKEN. Set it in your environment. Eg.: os.environ['LITELLM_TOKEN']= <your_email>"
32
+ )
33
+
34
+ def input_log_event(
35
+ self,
36
+ model,
37
+ messages,
38
+ end_user,
39
+ litellm_call_id,
40
+ call_type,
41
+ print_verbose,
42
+ litellm_params,
43
+ optional_params,
44
+ ):
45
+ print_verbose(f"LiteDebugger: Pre-API Call Logging for call id {litellm_call_id}")
46
+ try:
47
+ print_verbose(
48
+ f"LiteLLMDebugger: Logging - Enters input logging function for model {model}"
49
+ )
50
+
51
+ def remove_key_value(dictionary, key):
52
+ new_dict = dictionary.copy() # Create a copy of the original dictionary
53
+ new_dict.pop(key) # Remove the specified key-value pair from the copy
54
+ return new_dict
55
+
56
+ updated_litellm_params = remove_key_value(litellm_params, "logger_fn")
57
+
58
+ if call_type == "embedding":
59
+ for message in messages: # assuming the input is a list as required by the embedding function
60
+ litellm_data_obj = {
61
+ "model": model,
62
+ "messages": [{"role": "user", "content": message}],
63
+ "end_user": end_user,
64
+ "status": "initiated",
65
+ "litellm_call_id": litellm_call_id,
66
+ "user_email": self.user_email,
67
+ "litellm_params": updated_litellm_params,
68
+ "optional_params": optional_params,
69
+ }
70
+ print_verbose(
71
+ f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
72
+ )
73
+ response = requests.post(
74
+ url=self.api_url,
75
+ headers={"content-type": "application/json"},
76
+ data=json.dumps(litellm_data_obj),
77
+ )
78
+ print_verbose(f"LiteDebugger: embedding api response - {response.text}")
79
+ elif call_type == "completion":
80
+ litellm_data_obj = {
81
+ "model": model,
82
+ "messages": messages if isinstance(messages, list) else [{"role": "user", "content": messages}],
83
+ "end_user": end_user,
84
+ "status": "initiated",
85
+ "litellm_call_id": litellm_call_id,
86
+ "user_email": self.user_email,
87
+ "litellm_params": updated_litellm_params,
88
+ "optional_params": optional_params,
89
+ }
90
+ print_verbose(
91
+ f"LiteLLMDebugger: Logging - logged data obj {litellm_data_obj}"
92
+ )
93
+ response = requests.post(
94
+ url=self.api_url,
95
+ headers={"content-type": "application/json"},
96
+ data=json.dumps(litellm_data_obj),
97
+ )
98
+ print_verbose(f"LiteDebugger: completion api response - {response.text}")
99
+ except:
100
+ print_verbose(
101
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
102
+ )
103
+ pass
104
+
105
+ def post_call_log_event(self, original_response, litellm_call_id, print_verbose, call_type, stream):
106
+ print_verbose(f"LiteDebugger: Post-API Call Logging for call id {litellm_call_id}")
107
+ try:
108
+ if call_type == "embedding":
109
+ litellm_data_obj = {
110
+ "status": "received",
111
+ "additional_details": {"original_response": str(original_response["data"][0]["embedding"][:5])}, # don't store the entire vector
112
+ "litellm_call_id": litellm_call_id,
113
+ "user_email": self.user_email,
114
+ }
115
+ elif call_type == "completion" and not stream:
116
+ litellm_data_obj = {
117
+ "status": "received",
118
+ "additional_details": {"original_response": original_response},
119
+ "litellm_call_id": litellm_call_id,
120
+ "user_email": self.user_email,
121
+ }
122
+ elif call_type == "completion" and stream:
123
+ litellm_data_obj = {
124
+ "status": "received",
125
+ "additional_details": {"original_response": "Streamed response" if isinstance(original_response, types.GeneratorType) else original_response},
126
+ "litellm_call_id": litellm_call_id,
127
+ "user_email": self.user_email,
128
+ }
129
+ print_verbose(f"litedebugger post-call data object - {litellm_data_obj}")
130
+ response = requests.post(
131
+ url=self.api_url,
132
+ headers={"content-type": "application/json"},
133
+ data=json.dumps(litellm_data_obj),
134
+ )
135
+ print_verbose(f"LiteDebugger: api response - {response.text}")
136
+ except:
137
+ print_verbose(
138
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
139
+ )
140
+
141
+ def log_event(
142
+ self,
143
+ end_user,
144
+ response_obj,
145
+ start_time,
146
+ end_time,
147
+ litellm_call_id,
148
+ print_verbose,
149
+ call_type,
150
+ stream = False
151
+ ):
152
+ print_verbose(f"LiteDebugger: Success/Failure Call Logging for call id {litellm_call_id}")
153
+ try:
154
+ print_verbose(
155
+ f"LiteLLMDebugger: Success/Failure Logging - Enters handler logging function for function {call_type} and stream set to {stream} with response object {response_obj}"
156
+ )
157
+ total_cost = 0 # [TODO] implement cost tracking
158
+ response_time = (end_time - start_time).total_seconds()
159
+ if call_type == "completion" and stream == False:
160
+ litellm_data_obj = {
161
+ "response_time": response_time,
162
+ "total_cost": total_cost,
163
+ "response": response_obj["choices"][0]["message"]["content"],
164
+ "litellm_call_id": litellm_call_id,
165
+ "status": "success",
166
+ }
167
+ print_verbose(
168
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
169
+ )
170
+ response = requests.post(
171
+ url=self.api_url,
172
+ headers={"content-type": "application/json"},
173
+ data=json.dumps(litellm_data_obj),
174
+ )
175
+ elif call_type == "embedding":
176
+ litellm_data_obj = {
177
+ "response_time": response_time,
178
+ "total_cost": total_cost,
179
+ "response": str(response_obj["data"][0]["embedding"][:5]),
180
+ "litellm_call_id": litellm_call_id,
181
+ "status": "success",
182
+ }
183
+ response = requests.post(
184
+ url=self.api_url,
185
+ headers={"content-type": "application/json"},
186
+ data=json.dumps(litellm_data_obj),
187
+ )
188
+ elif call_type == "completion" and stream == True:
189
+ if len(response_obj["content"]) > 0: # don't log the empty strings
190
+ litellm_data_obj = {
191
+ "response_time": response_time,
192
+ "total_cost": total_cost,
193
+ "response": response_obj["content"],
194
+ "litellm_call_id": litellm_call_id,
195
+ "status": "success",
196
+ }
197
+ print_verbose(
198
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
199
+ )
200
+ response = requests.post(
201
+ url=self.api_url,
202
+ headers={"content-type": "application/json"},
203
+ data=json.dumps(litellm_data_obj),
204
+ )
205
+ elif "error" in response_obj:
206
+ if "Unable to map your input to a model." in response_obj["error"]:
207
+ total_cost = 0
208
+ litellm_data_obj = {
209
+ "response_time": response_time,
210
+ "model": response_obj["model"],
211
+ "total_cost": total_cost,
212
+ "error": response_obj["error"],
213
+ "end_user": end_user,
214
+ "litellm_call_id": litellm_call_id,
215
+ "status": "failure",
216
+ "user_email": self.user_email,
217
+ }
218
+ print_verbose(
219
+ f"LiteDebugger: Logging - final data object: {litellm_data_obj}"
220
+ )
221
+ response = requests.post(
222
+ url=self.api_url,
223
+ headers={"content-type": "application/json"},
224
+ data=json.dumps(litellm_data_obj),
225
+ )
226
+ print_verbose(f"LiteDebugger: api response - {response.text}")
227
+ except:
228
+ print_verbose(
229
+ f"[Non-Blocking Error] LiteDebugger: Logging Error - {traceback.format_exc()}"
230
+ )
231
+ pass
litellm/integrations/llmonitor.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to aispend.io
3
+ import datetime
4
+ import traceback
5
+ import dotenv
6
+ import os
7
+ import requests
8
+
9
+ dotenv.load_dotenv() # Loading env variables using dotenv
10
+
11
+
12
+ # convert to {completion: xx, tokens: xx}
13
+ def parse_usage(usage):
14
+ return {
15
+ "completion": usage["completion_tokens"] if "completion_tokens" in usage else 0,
16
+ "prompt": usage["prompt_tokens"] if "prompt_tokens" in usage else 0,
17
+ }
18
+
19
+
20
+ def parse_messages(input):
21
+ if input is None:
22
+ return None
23
+
24
+ def clean_message(message):
25
+ # if is strin, return as is
26
+ if isinstance(message, str):
27
+ return message
28
+
29
+ if "message" in message:
30
+ return clean_message(message["message"])
31
+ text = message["content"]
32
+ if text == None:
33
+ text = message.get("function_call", None)
34
+
35
+ return {
36
+ "role": message["role"],
37
+ "text": text,
38
+ }
39
+
40
+ if isinstance(input, list):
41
+ if len(input) == 1:
42
+ return clean_message(input[0])
43
+ else:
44
+ return [clean_message(msg) for msg in input]
45
+ else:
46
+ return clean_message(input)
47
+
48
+
49
+ class LLMonitorLogger:
50
+ # Class variables or attributes
51
+ def __init__(self):
52
+ # Instance variables
53
+ self.api_url = os.getenv("LLMONITOR_API_URL") or "https://app.llmonitor.com"
54
+ self.app_id = os.getenv("LLMONITOR_APP_ID")
55
+
56
+ def log_event(
57
+ self,
58
+ type,
59
+ event,
60
+ run_id,
61
+ model,
62
+ print_verbose,
63
+ input=None,
64
+ user_id=None,
65
+ response_obj=None,
66
+ start_time=datetime.datetime.now(),
67
+ end_time=datetime.datetime.now(),
68
+ error=None,
69
+ ):
70
+ # Method definition
71
+ try:
72
+ print_verbose(f"LLMonitor Logging - Logging request for model {model}")
73
+
74
+ if response_obj:
75
+ usage = (
76
+ parse_usage(response_obj["usage"])
77
+ if "usage" in response_obj
78
+ else None
79
+ )
80
+ output = response_obj["choices"] if "choices" in response_obj else None
81
+ else:
82
+ usage = None
83
+ output = None
84
+
85
+ if error:
86
+ error_obj = {"stack": error}
87
+
88
+ else:
89
+ error_obj = None
90
+
91
+ data = [
92
+ {
93
+ "type": type,
94
+ "name": model,
95
+ "runId": run_id,
96
+ "app": self.app_id,
97
+ "event": "start",
98
+ "timestamp": start_time.isoformat(),
99
+ "userId": user_id,
100
+ "input": parse_messages(input),
101
+ },
102
+ {
103
+ "type": type,
104
+ "runId": run_id,
105
+ "app": self.app_id,
106
+ "event": event,
107
+ "error": error_obj,
108
+ "timestamp": end_time.isoformat(),
109
+ "userId": user_id,
110
+ "output": parse_messages(output),
111
+ "tokensUsage": usage,
112
+ },
113
+ ]
114
+
115
+ print_verbose(f"LLMonitor Logging - final data object: {data}")
116
+
117
+ response = requests.post(
118
+ self.api_url + "/api/report",
119
+ headers={"Content-Type": "application/json"},
120
+ json={"events": data},
121
+ )
122
+
123
+ print_verbose(f"LLMonitor Logging - response: {response}")
124
+ except:
125
+ # traceback.print_exc()
126
+ print_verbose(f"LLMonitor Logging Error - {traceback.format_exc()}")
127
+ pass
litellm/integrations/prompt_layer.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success, logs events to Promptlayer
3
+ import dotenv, os
4
+ import requests
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+
10
+
11
+ class PromptLayerLogger:
12
+ # Class variables or attributes
13
+ def __init__(self):
14
+ # Instance variables
15
+ self.key = os.getenv("PROMPTLAYER_API_KEY")
16
+
17
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
18
+ # Method definition
19
+ try:
20
+ new_kwargs = {}
21
+ new_kwargs['model'] = kwargs['model']
22
+ new_kwargs['messages'] = kwargs['messages']
23
+
24
+ # add kwargs["optional_params"] to new_kwargs
25
+ for optional_param in kwargs["optional_params"]:
26
+ new_kwargs[optional_param] = kwargs["optional_params"][optional_param]
27
+
28
+
29
+ print_verbose(
30
+ f"Prompt Layer Logging - Enters logging function for model kwargs: {new_kwargs}\n, response: {response_obj}"
31
+ )
32
+
33
+
34
+ request_response = requests.post(
35
+ "https://api.promptlayer.com/rest/track-request",
36
+ json={
37
+ "function_name": "openai.ChatCompletion.create",
38
+ "kwargs": new_kwargs,
39
+ "tags": ["hello", "world"],
40
+ "request_response": dict(response_obj),
41
+ "request_start_time": int(start_time.timestamp()),
42
+ "request_end_time": int(end_time.timestamp()),
43
+ "api_key": self.key,
44
+ # Optional params for PromptLayer
45
+ # "prompt_id": "<PROMPT ID>",
46
+ # "prompt_input_variables": "<Dictionary of variables for prompt>",
47
+ # "prompt_version":1,
48
+ },
49
+ )
50
+ print_verbose(
51
+ f"Prompt Layer Logging: success - final response object: {request_response.text}"
52
+ )
53
+ response_json = request_response.json()
54
+ if "success" not in request_response.json():
55
+ raise Exception("Promptlayer did not successfully log the response!")
56
+
57
+ if "request_id" in response_json:
58
+ print(kwargs["litellm_params"]["metadata"])
59
+ if kwargs["litellm_params"]["metadata"] is not None:
60
+ response = requests.post(
61
+ "https://api.promptlayer.com/rest/track-metadata",
62
+ json={
63
+ "request_id": response_json["request_id"],
64
+ "api_key": self.key,
65
+ "metadata": kwargs["litellm_params"]["metadata"]
66
+ },
67
+ )
68
+ print_verbose(f"Prompt Layer Logging: success - metadata post response object: {response.text}")
69
+
70
+ except:
71
+ print_verbose(f"error: Prompt Layer Error - {traceback.format_exc()}")
72
+ pass
litellm/integrations/supabase.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #### What this does ####
2
+ # On success + failure, log events to Supabase
3
+
4
+ import dotenv, os
5
+ import requests
6
+
7
+ dotenv.load_dotenv() # Loading env variables using dotenv
8
+ import traceback
9
+ import datetime, subprocess, sys
10
+ import litellm
11
+
12
+ class Supabase:
13
+ # Class variables or attributes
14
+ supabase_table_name = "request_logs"
15
+
16
+ def __init__(self):
17
+ # Instance variables
18
+ self.supabase_url = os.getenv("SUPABASE_URL")
19
+ self.supabase_key = os.getenv("SUPABASE_KEY")
20
+ try:
21
+ import supabase
22
+ except ImportError:
23
+ subprocess.check_call([sys.executable, "-m", "pip", "install", "supabase"])
24
+ import supabase
25
+ self.supabase_client = supabase.create_client(
26
+ self.supabase_url, self.supabase_key
27
+ )
28
+
29
+ def input_log_event(
30
+ self, model, messages, end_user, litellm_call_id, print_verbose
31
+ ):
32
+ try:
33
+ print_verbose(
34
+ f"Supabase Logging - Enters input logging function for model {model}"
35
+ )
36
+ supabase_data_obj = {
37
+ "model": model,
38
+ "messages": messages,
39
+ "end_user": end_user,
40
+ "status": "initiated",
41
+ "litellm_call_id": litellm_call_id,
42
+ }
43
+ data, count = (
44
+ self.supabase_client.table(self.supabase_table_name)
45
+ .insert(supabase_data_obj)
46
+ .execute()
47
+ )
48
+ print_verbose(f"data: {data}")
49
+ except:
50
+ print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
51
+ pass
52
+
53
+ def log_event(
54
+ self,
55
+ model,
56
+ messages,
57
+ end_user,
58
+ response_obj,
59
+ start_time,
60
+ end_time,
61
+ litellm_call_id,
62
+ print_verbose,
63
+ ):
64
+ try:
65
+ print_verbose(
66
+ f"Supabase Logging - Enters logging function for model {model}, response_obj: {response_obj}"
67
+ )
68
+
69
+ total_cost = litellm.completion_cost(completion_response=response_obj)
70
+
71
+ response_time = (end_time - start_time).total_seconds()
72
+ if "choices" in response_obj:
73
+ supabase_data_obj = {
74
+ "response_time": response_time,
75
+ "model": response_obj["model"],
76
+ "total_cost": total_cost,
77
+ "messages": messages,
78
+ "response": response_obj["choices"][0]["message"]["content"],
79
+ "end_user": end_user,
80
+ "litellm_call_id": litellm_call_id,
81
+ "status": "success",
82
+ }
83
+ print_verbose(
84
+ f"Supabase Logging - final data object: {supabase_data_obj}"
85
+ )
86
+ data, count = (
87
+ self.supabase_client.table(self.supabase_table_name)
88
+ .upsert(supabase_data_obj, on_conflict="litellm_call_id")
89
+ .execute()
90
+ )
91
+ elif "error" in response_obj:
92
+ if "Unable to map your input to a model." in response_obj["error"]:
93
+ total_cost = 0
94
+ supabase_data_obj = {
95
+ "response_time": response_time,
96
+ "model": response_obj["model"],
97
+ "total_cost": total_cost,
98
+ "messages": messages,
99
+ "error": response_obj["error"],
100
+ "end_user": end_user,
101
+ "litellm_call_id": litellm_call_id,
102
+ "status": "failure",
103
+ }
104
+ print_verbose(
105
+ f"Supabase Logging - final data object: {supabase_data_obj}"
106
+ )
107
+ data, count = (
108
+ self.supabase_client.table(self.supabase_table_name)
109
+ .upsert(supabase_data_obj, on_conflict="litellm_call_id")
110
+ .execute()
111
+ )
112
+
113
+ except:
114
+ # traceback.print_exc()
115
+ print_verbose(f"Supabase Logging Error - {traceback.format_exc()}")
116
+ pass
litellm/integrations/traceloop.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class TraceloopLogger:
2
+ def __init__(self):
3
+ from traceloop.sdk.tracing.tracing import TracerWrapper
4
+
5
+ self.tracer_wrapper = TracerWrapper()
6
+
7
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
8
+ from opentelemetry.trace import SpanKind
9
+ from opentelemetry.semconv.ai import SpanAttributes
10
+
11
+ try:
12
+ tracer = self.tracer_wrapper.get_tracer()
13
+
14
+ model = kwargs.get("model")
15
+
16
+ # LiteLLM uses the standard OpenAI library, so it's already handled by Traceloop SDK
17
+ if "gpt" in model:
18
+ return
19
+
20
+ with tracer.start_as_current_span(
21
+ "litellm.completion",
22
+ kind=SpanKind.CLIENT,
23
+ ) as span:
24
+ if span.is_recording():
25
+ span.set_attribute(
26
+ SpanAttributes.LLM_REQUEST_MODEL, kwargs.get("model")
27
+ )
28
+ span.set_attribute(
29
+ SpanAttributes.LLM_REQUEST_MAX_TOKENS, kwargs.get("max_tokens")
30
+ )
31
+ span.set_attribute(
32
+ SpanAttributes.LLM_TEMPERATURE, kwargs.get("temperature")
33
+ )
34
+
35
+ for idx, prompt in enumerate(kwargs.get("messages")):
36
+ span.set_attribute(
37
+ f"{SpanAttributes.LLM_PROMPTS}.{idx}.role",
38
+ prompt.get("role"),
39
+ )
40
+ span.set_attribute(
41
+ f"{SpanAttributes.LLM_PROMPTS}.{idx}.content",
42
+ prompt.get("content"),
43
+ )
44
+
45
+ span.set_attribute(
46
+ SpanAttributes.LLM_RESPONSE_MODEL, response_obj.get("model")
47
+ )
48
+ usage = response_obj.get("usage")
49
+ if usage:
50
+ span.set_attribute(
51
+ SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
52
+ usage.get("total_tokens"),
53
+ )
54
+ span.set_attribute(
55
+ SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
56
+ usage.get("completion_tokens"),
57
+ )
58
+ span.set_attribute(
59
+ SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
60
+ usage.get("prompt_tokens"),
61
+ )
62
+
63
+ for idx, choice in enumerate(response_obj.get("choices")):
64
+ span.set_attribute(
65
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.finish_reason",
66
+ choice.get("finish_reason"),
67
+ )
68
+ span.set_attribute(
69
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.role",
70
+ choice.get("message").get("role"),
71
+ )
72
+ span.set_attribute(
73
+ f"{SpanAttributes.LLM_COMPLETIONS}.{idx}.content",
74
+ choice.get("message").get("content"),
75
+ )
76
+
77
+ except Exception as e:
78
+ print_verbose(f"Traceloop Layer Error - {e}")
litellm/integrations/weights_biases.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ imported_openAIResponse=True
2
+ try:
3
+ import io
4
+ import logging
5
+ import sys
6
+ from typing import Any, Dict, List, Optional, TypeVar
7
+
8
+ from wandb.sdk.data_types import trace_tree
9
+
10
+ if sys.version_info >= (3, 8):
11
+ from typing import Literal, Protocol
12
+ else:
13
+ from typing_extensions import Literal, Protocol
14
+
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ K = TypeVar("K", bound=str)
20
+ V = TypeVar("V")
21
+
22
+
23
+ class OpenAIResponse(Protocol[K, V]): # type: ignore
24
+ # contains a (known) object attribute
25
+ object: Literal["chat.completion", "edit", "text_completion"]
26
+
27
+ def __getitem__(self, key: K) -> V:
28
+ ... # pragma: no cover
29
+
30
+ def get(self, key: K, default: Optional[V] = None) -> Optional[V]:
31
+ ... # pragma: no cover
32
+
33
+
34
+ class OpenAIRequestResponseResolver:
35
+ def __call__(
36
+ self,
37
+ request: Dict[str, Any],
38
+ response: OpenAIResponse,
39
+ time_elapsed: float,
40
+ ) -> Optional[trace_tree.WBTraceTree]:
41
+ try:
42
+ if response["object"] == "edit":
43
+ return self._resolve_edit(request, response, time_elapsed)
44
+ elif response["object"] == "text_completion":
45
+ return self._resolve_completion(request, response, time_elapsed)
46
+ elif response["object"] == "chat.completion":
47
+ return self._resolve_chat_completion(request, response, time_elapsed)
48
+ else:
49
+ logger.info(f"Unknown OpenAI response object: {response['object']}")
50
+ except Exception as e:
51
+ logger.warning(f"Failed to resolve request/response: {e}")
52
+ return None
53
+
54
+ @staticmethod
55
+ def results_to_trace_tree(
56
+ request: Dict[str, Any],
57
+ response: OpenAIResponse,
58
+ results: List[trace_tree.Result],
59
+ time_elapsed: float,
60
+ ) -> trace_tree.WBTraceTree:
61
+ """Converts the request, response, and results into a trace tree.
62
+
63
+ params:
64
+ request: The request dictionary
65
+ response: The response object
66
+ results: A list of results object
67
+ time_elapsed: The time elapsed in seconds
68
+ returns:
69
+ A wandb trace tree object.
70
+ """
71
+ start_time_ms = int(round(response["created"] * 1000))
72
+ end_time_ms = start_time_ms + int(round(time_elapsed * 1000))
73
+ span = trace_tree.Span(
74
+ name=f"{response.get('model', 'openai')}_{response['object']}_{response.get('created')}",
75
+ attributes=dict(response), # type: ignore
76
+ start_time_ms=start_time_ms,
77
+ end_time_ms=end_time_ms,
78
+ span_kind=trace_tree.SpanKind.LLM,
79
+ results=results,
80
+ )
81
+ model_obj = {"request": request, "response": response, "_kind": "openai"}
82
+ return trace_tree.WBTraceTree(root_span=span, model_dict=model_obj)
83
+
84
+ def _resolve_edit(
85
+ self,
86
+ request: Dict[str, Any],
87
+ response: OpenAIResponse,
88
+ time_elapsed: float,
89
+ ) -> trace_tree.WBTraceTree:
90
+ """Resolves the request and response objects for `openai.Edit`."""
91
+ request_str = (
92
+ f"\n\n**Instruction**: {request['instruction']}\n\n"
93
+ f"**Input**: {request['input']}\n"
94
+ )
95
+ choices = [
96
+ f"\n\n**Edited**: {choice['text']}\n" for choice in response["choices"]
97
+ ]
98
+
99
+ return self._request_response_result_to_trace(
100
+ request=request,
101
+ response=response,
102
+ request_str=request_str,
103
+ choices=choices,
104
+ time_elapsed=time_elapsed,
105
+ )
106
+
107
+ def _resolve_completion(
108
+ self,
109
+ request: Dict[str, Any],
110
+ response: OpenAIResponse,
111
+ time_elapsed: float,
112
+ ) -> trace_tree.WBTraceTree:
113
+ """Resolves the request and response objects for `openai.Completion`."""
114
+ request_str = f"\n\n**Prompt**: {request['prompt']}\n"
115
+ choices = [
116
+ f"\n\n**Completion**: {choice['text']}\n" for choice in response["choices"]
117
+ ]
118
+
119
+ return self._request_response_result_to_trace(
120
+ request=request,
121
+ response=response,
122
+ request_str=request_str,
123
+ choices=choices,
124
+ time_elapsed=time_elapsed,
125
+ )
126
+
127
+ def _resolve_chat_completion(
128
+ self,
129
+ request: Dict[str, Any],
130
+ response: OpenAIResponse,
131
+ time_elapsed: float,
132
+ ) -> trace_tree.WBTraceTree:
133
+ """Resolves the request and response objects for `openai.Completion`."""
134
+ prompt = io.StringIO()
135
+ for message in request["messages"]:
136
+ prompt.write(f"\n\n**{message['role']}**: {message['content']}\n")
137
+ request_str = prompt.getvalue()
138
+
139
+ choices = [
140
+ f"\n\n**{choice['message']['role']}**: {choice['message']['content']}\n"
141
+ for choice in response["choices"]
142
+ ]
143
+
144
+ return self._request_response_result_to_trace(
145
+ request=request,
146
+ response=response,
147
+ request_str=request_str,
148
+ choices=choices,
149
+ time_elapsed=time_elapsed,
150
+ )
151
+
152
+ def _request_response_result_to_trace(
153
+ self,
154
+ request: Dict[str, Any],
155
+ response: OpenAIResponse,
156
+ request_str: str,
157
+ choices: List[str],
158
+ time_elapsed: float,
159
+ ) -> trace_tree.WBTraceTree:
160
+ """Resolves the request and response objects for `openai.Completion`."""
161
+ results = [
162
+ trace_tree.Result(
163
+ inputs={"request": request_str},
164
+ outputs={"response": choice},
165
+ )
166
+ for choice in choices
167
+ ]
168
+ trace = self.results_to_trace_tree(request, response, results, time_elapsed)
169
+ return trace
170
+ except:
171
+ imported_openAIResponse=False
172
+
173
+
174
+
175
+ #### What this does ####
176
+ # On success, logs events to Langfuse
177
+ import dotenv, os
178
+ import requests
179
+ import requests
180
+ from datetime import datetime
181
+
182
+ dotenv.load_dotenv() # Loading env variables using dotenv
183
+ import traceback
184
+
185
+ class WeightsBiasesLogger:
186
+ # Class variables or attributes
187
+ def __init__(self):
188
+ try:
189
+ import wandb
190
+ except:
191
+ raise Exception("\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m")
192
+ if imported_openAIResponse==False:
193
+ raise Exception("\033[91m wandb not installed, try running 'pip install wandb' to fix this error\033[0m")
194
+ self.resolver = OpenAIRequestResponseResolver()
195
+
196
+ def log_event(self, kwargs, response_obj, start_time, end_time, print_verbose):
197
+ # Method definition
198
+ import wandb
199
+
200
+ try:
201
+ print_verbose(
202
+ f"W&B Logging - Enters logging function for model {kwargs}"
203
+ )
204
+ run = wandb.init()
205
+ print_verbose(response_obj)
206
+
207
+ trace = self.resolver(kwargs, response_obj, (end_time-start_time).total_seconds())
208
+
209
+ if trace is not None:
210
+ run.log({"trace": trace})
211
+
212
+ run.finish()
213
+ print_verbose(
214
+ f"W&B Logging Logging - final response object: {response_obj}"
215
+ )
216
+ except:
217
+ # traceback.print_exc()
218
+ print_verbose(f"W&B Logging Layer Error - {traceback.format_exc()}")
219
+ pass
litellm/llms/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from . import *
litellm/llms/ai21.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types, traceback
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, httpx
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Choices, Message
8
+ import litellm
9
+
10
+ class AI21Error(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ self.request = httpx.Request(method="POST", url="https://api.ai21.com/studio/v1/")
15
+ self.response = httpx.Response(status_code=status_code, request=self.request)
16
+ super().__init__(
17
+ self.message
18
+ ) # Call the base class constructor with the parameters it needs
19
+
20
+ class AI21Config():
21
+ """
22
+ Reference: https://docs.ai21.com/reference/j2-complete-ref
23
+
24
+ The class `AI21Config` provides configuration for the AI21's API interface. Below are the parameters:
25
+
26
+ - `numResults` (int32): Number of completions to sample and return. Optional, default is 1. If the temperature is greater than 0 (non-greedy decoding), a value greater than 1 can be meaningful.
27
+
28
+ - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
29
+
30
+ - `minTokens` (int32): The minimum number of tokens to generate per result. Optional, default is 0. If `stopSequences` are given, they are ignored until `minTokens` are generated.
31
+
32
+ - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
33
+
34
+ - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
35
+
36
+ - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
37
+
38
+ - `topKReturn` (int32): Range between 0 to 10, including both. Optional, default is 0. Specifies the top-K alternative tokens to return. A non-zero value includes the string representations and log-probabilities for each of the top-K alternatives at each position.
39
+
40
+ - `frequencyPenalty` (object): Placeholder for frequency penalty object.
41
+
42
+ - `presencePenalty` (object): Placeholder for presence penalty object.
43
+
44
+ - `countPenalty` (object): Placeholder for count penalty object.
45
+ """
46
+ numResults: Optional[int]=None
47
+ maxTokens: Optional[int]=None
48
+ minTokens: Optional[int]=None
49
+ temperature: Optional[float]=None
50
+ topP: Optional[float]=None
51
+ stopSequences: Optional[list]=None
52
+ topKReturn: Optional[int]=None
53
+ frequencePenalty: Optional[dict]=None
54
+ presencePenalty: Optional[dict]=None
55
+ countPenalty: Optional[dict]=None
56
+
57
+ def __init__(self,
58
+ numResults: Optional[int]=None,
59
+ maxTokens: Optional[int]=None,
60
+ minTokens: Optional[int]=None,
61
+ temperature: Optional[float]=None,
62
+ topP: Optional[float]=None,
63
+ stopSequences: Optional[list]=None,
64
+ topKReturn: Optional[int]=None,
65
+ frequencePenalty: Optional[dict]=None,
66
+ presencePenalty: Optional[dict]=None,
67
+ countPenalty: Optional[dict]=None) -> None:
68
+ locals_ = locals()
69
+ for key, value in locals_.items():
70
+ if key != 'self' and value is not None:
71
+ setattr(self.__class__, key, value)
72
+
73
+ @classmethod
74
+ def get_config(cls):
75
+ return {k: v for k, v in cls.__dict__.items()
76
+ if not k.startswith('__')
77
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
78
+ and v is not None}
79
+
80
+
81
+
82
+ def validate_environment(api_key):
83
+ if api_key is None:
84
+ raise ValueError(
85
+ "Missing AI21 API Key - A call is being made to ai21 but no key is set either in the environment variables or via params"
86
+ )
87
+ headers = {
88
+ "accept": "application/json",
89
+ "content-type": "application/json",
90
+ "Authorization": "Bearer " + api_key,
91
+ }
92
+ return headers
93
+
94
+ def completion(
95
+ model: str,
96
+ messages: list,
97
+ api_base: str,
98
+ model_response: ModelResponse,
99
+ print_verbose: Callable,
100
+ encoding,
101
+ api_key,
102
+ logging_obj,
103
+ optional_params=None,
104
+ litellm_params=None,
105
+ logger_fn=None,
106
+ ):
107
+ headers = validate_environment(api_key)
108
+ model = model
109
+ prompt = ""
110
+ for message in messages:
111
+ if "role" in message:
112
+ if message["role"] == "user":
113
+ prompt += (
114
+ f"{message['content']}"
115
+ )
116
+ else:
117
+ prompt += (
118
+ f"{message['content']}"
119
+ )
120
+ else:
121
+ prompt += f"{message['content']}"
122
+
123
+ ## Load Config
124
+ config = litellm.AI21Config.get_config()
125
+ for k, v in config.items():
126
+ if k not in optional_params: # completion(top_k=3) > ai21_config(top_k=3) <- allows for dynamic variables to be passed in
127
+ optional_params[k] = v
128
+
129
+ data = {
130
+ "prompt": prompt,
131
+ # "instruction": prompt, # some baseten models require the prompt to be passed in via the 'instruction' kwarg
132
+ **optional_params,
133
+ }
134
+
135
+ ## LOGGING
136
+ logging_obj.pre_call(
137
+ input=prompt,
138
+ api_key=api_key,
139
+ additional_args={"complete_input_dict": data},
140
+ )
141
+ ## COMPLETION CALL
142
+ response = requests.post(
143
+ api_base + model + "/complete", headers=headers, data=json.dumps(data)
144
+ )
145
+ if response.status_code != 200:
146
+ raise AI21Error(
147
+ status_code=response.status_code,
148
+ message=response.text
149
+ )
150
+ if "stream" in optional_params and optional_params["stream"] == True:
151
+ return response.iter_lines()
152
+ else:
153
+ ## LOGGING
154
+ logging_obj.post_call(
155
+ input=prompt,
156
+ api_key=api_key,
157
+ original_response=response.text,
158
+ additional_args={"complete_input_dict": data},
159
+ )
160
+ ## RESPONSE OBJECT
161
+ completion_response = response.json()
162
+ try:
163
+ choices_list = []
164
+ for idx, item in enumerate(completion_response["completions"]):
165
+ if len(item["data"]["text"]) > 0:
166
+ message_obj = Message(content=item["data"]["text"])
167
+ else:
168
+ message_obj = Message(content=None)
169
+ choice_obj = Choices(finish_reason=item["finishReason"]["reason"], index=idx+1, message=message_obj)
170
+ choices_list.append(choice_obj)
171
+ model_response["choices"] = choices_list
172
+ except Exception as e:
173
+ raise AI21Error(message=traceback.format_exc(), status_code=response.status_code)
174
+
175
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
176
+ prompt_tokens = len(
177
+ encoding.encode(prompt)
178
+ )
179
+ completion_tokens = len(
180
+ encoding.encode(model_response["choices"][0]["message"].get("content"))
181
+ )
182
+
183
+ model_response["created"] = int(time.time())
184
+ model_response["model"] = model
185
+ model_response["usage"] = {
186
+ "prompt_tokens": prompt_tokens,
187
+ "completion_tokens": completion_tokens,
188
+ "total_tokens": prompt_tokens + completion_tokens,
189
+ }
190
+ return model_response
191
+
192
+ def embedding():
193
+ # logic for parsing in - calling - parsing out model embedding calls
194
+ pass
litellm/llms/aleph_alpha.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, Choices, Message, Usage
9
+ import httpx
10
+
11
+ class AlephAlphaError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ self.request = httpx.Request(method="POST", url="https://api.aleph-alpha.com/complete")
16
+ self.response = httpx.Response(status_code=status_code, request=self.request)
17
+ super().__init__(
18
+ self.message
19
+ ) # Call the base class constructor with the parameters it needs
20
+
21
+ class AlephAlphaConfig():
22
+ """
23
+ Reference: https://docs.aleph-alpha.com/api/complete/
24
+
25
+ The `AlephAlphaConfig` class represents the configuration for the Aleph Alpha API. Here are the properties:
26
+
27
+ - `maximum_tokens` (integer, required): The maximum number of tokens to be generated by the completion. The sum of input tokens and maximum tokens may not exceed 2048.
28
+
29
+ - `minimum_tokens` (integer, optional; default value: 0): Generate at least this number of tokens before an end-of-text token is generated.
30
+
31
+ - `echo` (boolean, optional; default value: false): Whether to echo the prompt in the completion.
32
+
33
+ - `temperature` (number, nullable; default value: 0): Adjusts how creatively the model generates outputs. Use combinations of temperature, top_k, and top_p sensibly.
34
+
35
+ - `top_k` (integer, nullable; default value: 0): Introduces randomness into token generation by considering the top k most likely options.
36
+
37
+ - `top_p` (number, nullable; default value: 0): Adds randomness by considering the smallest set of tokens whose cumulative probability exceeds top_p.
38
+
39
+ - `presence_penalty`, `frequency_penalty`, `sequence_penalty` (number, nullable; default value: 0): Various penalties that can reduce repetition.
40
+
41
+ - `sequence_penalty_min_length` (integer; default value: 2): Minimum number of tokens to be considered as a sequence.
42
+
43
+ - `repetition_penalties_include_prompt`, `repetition_penalties_include_completion`, `use_multiplicative_presence_penalty`,`use_multiplicative_frequency_penalty`,`use_multiplicative_sequence_penalty` (boolean, nullable; default value: false): Various settings that adjust how the repetition penalties are applied.
44
+
45
+ - `penalty_bias` (string, nullable): Text used in addition to the penalized tokens for repetition penalties.
46
+
47
+ - `penalty_exceptions` (string[], nullable): Strings that may be generated without penalty.
48
+
49
+ - `penalty_exceptions_include_stop_sequences` (boolean, nullable; default value: true): Include all stop_sequences in penalty_exceptions.
50
+
51
+ - `best_of` (integer, nullable; default value: 1): The number of completions will be generated on the server side.
52
+
53
+ - `n` (integer, nullable; default value: 1): The number of completions to return.
54
+
55
+ - `logit_bias` (object, nullable): Adjust the logit scores before sampling.
56
+
57
+ - `log_probs` (integer, nullable): Number of top log probabilities for each token generated.
58
+
59
+ - `stop_sequences` (string[], nullable): List of strings that will stop generation if they're generated.
60
+
61
+ - `tokens` (boolean, nullable; default value: false): Flag indicating whether individual tokens of the completion should be returned or not.
62
+
63
+ - `raw_completion` (boolean; default value: false): if True, the raw completion of the model will be returned.
64
+
65
+ - `disable_optimizations` (boolean, nullable; default value: false): Disables any applied optimizations to both your prompt and completion.
66
+
67
+ - `completion_bias_inclusion`, `completion_bias_exclusion` (string[], default value: []): Set of strings to bias the generation of tokens.
68
+
69
+ - `completion_bias_inclusion_first_token_only`, `completion_bias_exclusion_first_token_only` (boolean; default value: false): Consider only the first token for the completion_bias_inclusion/exclusion.
70
+
71
+ - `contextual_control_threshold` (number, nullable): Control over how similar tokens are controlled.
72
+
73
+ - `control_log_additive` (boolean; default value: true): Method of applying control to attention scores.
74
+ """
75
+ maximum_tokens: Optional[int]=litellm.max_tokens # aleph alpha requires max tokens
76
+ minimum_tokens: Optional[int]=None
77
+ echo: Optional[bool]=None
78
+ temperature: Optional[int]=None
79
+ top_k: Optional[int]=None
80
+ top_p: Optional[int]=None
81
+ presence_penalty: Optional[int]=None
82
+ frequency_penalty: Optional[int]=None
83
+ sequence_penalty: Optional[int]=None
84
+ sequence_penalty_min_length: Optional[int]=None
85
+ repetition_penalties_include_prompt: Optional[bool]=None
86
+ repetition_penalties_include_completion: Optional[bool]=None
87
+ use_multiplicative_presence_penalty: Optional[bool]=None
88
+ use_multiplicative_frequency_penalty: Optional[bool]=None
89
+ use_multiplicative_sequence_penalty: Optional[bool]=None
90
+ penalty_bias: Optional[str]=None
91
+ penalty_exceptions_include_stop_sequences: Optional[bool]=None
92
+ best_of: Optional[int]=None
93
+ n: Optional[int]=None
94
+ logit_bias: Optional[dict]=None
95
+ log_probs: Optional[int]=None
96
+ stop_sequences: Optional[list]=None
97
+ tokens: Optional[bool]=None
98
+ raw_completion: Optional[bool]=None
99
+ disable_optimizations: Optional[bool]=None
100
+ completion_bias_inclusion: Optional[list]=None
101
+ completion_bias_exclusion: Optional[list]=None
102
+ completion_bias_inclusion_first_token_only: Optional[bool]=None
103
+ completion_bias_exclusion_first_token_only: Optional[bool]=None
104
+ contextual_control_threshold: Optional[int]=None
105
+ control_log_additive: Optional[bool]=None
106
+
107
+
108
+ def __init__(self,
109
+ maximum_tokens: Optional[int]=None,
110
+ minimum_tokens: Optional[int]=None,
111
+ echo: Optional[bool]=None,
112
+ temperature: Optional[int]=None,
113
+ top_k: Optional[int]=None,
114
+ top_p: Optional[int]=None,
115
+ presence_penalty: Optional[int]=None,
116
+ frequency_penalty: Optional[int]=None,
117
+ sequence_penalty: Optional[int]=None,
118
+ sequence_penalty_min_length: Optional[int]=None,
119
+ repetition_penalties_include_prompt: Optional[bool]=None,
120
+ repetition_penalties_include_completion: Optional[bool]=None,
121
+ use_multiplicative_presence_penalty: Optional[bool]=None,
122
+ use_multiplicative_frequency_penalty: Optional[bool]=None,
123
+ use_multiplicative_sequence_penalty: Optional[bool]=None,
124
+ penalty_bias: Optional[str]=None,
125
+ penalty_exceptions_include_stop_sequences: Optional[bool]=None,
126
+ best_of: Optional[int]=None,
127
+ n: Optional[int]=None,
128
+ logit_bias: Optional[dict]=None,
129
+ log_probs: Optional[int]=None,
130
+ stop_sequences: Optional[list]=None,
131
+ tokens: Optional[bool]=None,
132
+ raw_completion: Optional[bool]=None,
133
+ disable_optimizations: Optional[bool]=None,
134
+ completion_bias_inclusion: Optional[list]=None,
135
+ completion_bias_exclusion: Optional[list]=None,
136
+ completion_bias_inclusion_first_token_only: Optional[bool]=None,
137
+ completion_bias_exclusion_first_token_only: Optional[bool]=None,
138
+ contextual_control_threshold: Optional[int]=None,
139
+ control_log_additive: Optional[bool]=None) -> None:
140
+
141
+ locals_ = locals()
142
+ for key, value in locals_.items():
143
+ if key != 'self' and value is not None:
144
+ setattr(self.__class__, key, value)
145
+
146
+ @classmethod
147
+ def get_config(cls):
148
+ return {k: v for k, v in cls.__dict__.items()
149
+ if not k.startswith('__')
150
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
151
+ and v is not None}
152
+
153
+
154
+ def validate_environment(api_key):
155
+ headers = {
156
+ "accept": "application/json",
157
+ "content-type": "application/json",
158
+ }
159
+ if api_key:
160
+ headers["Authorization"] = f"Bearer {api_key}"
161
+ return headers
162
+
163
+ def completion(
164
+ model: str,
165
+ messages: list,
166
+ api_base: str,
167
+ model_response: ModelResponse,
168
+ print_verbose: Callable,
169
+ encoding,
170
+ api_key,
171
+ logging_obj,
172
+ optional_params=None,
173
+ litellm_params=None,
174
+ logger_fn=None,
175
+ default_max_tokens_to_sample=None,
176
+ ):
177
+ headers = validate_environment(api_key)
178
+
179
+ ## Load Config
180
+ config = litellm.AlephAlphaConfig.get_config()
181
+ for k, v in config.items():
182
+ if k not in optional_params: # completion(top_k=3) > aleph_alpha_config(top_k=3) <- allows for dynamic variables to be passed in
183
+ optional_params[k] = v
184
+
185
+ completion_url = api_base
186
+ model = model
187
+ prompt = ""
188
+ if "control" in model: # follow the ###Instruction / ###Response format
189
+ for idx, message in enumerate(messages):
190
+ if "role" in message:
191
+ if idx == 0: # set first message as instruction (required), let later user messages be input
192
+ prompt += f"###Instruction: {message['content']}"
193
+ else:
194
+ if message["role"] == "system":
195
+ prompt += (
196
+ f"###Instruction: {message['content']}"
197
+ )
198
+ elif message["role"] == "user":
199
+ prompt += (
200
+ f"###Input: {message['content']}"
201
+ )
202
+ else:
203
+ prompt += (
204
+ f"###Response: {message['content']}"
205
+ )
206
+ else:
207
+ prompt += f"{message['content']}"
208
+ else:
209
+ prompt = " ".join(message["content"] for message in messages)
210
+ data = {
211
+ "model": model,
212
+ "prompt": prompt,
213
+ **optional_params,
214
+ }
215
+
216
+ ## LOGGING
217
+ logging_obj.pre_call(
218
+ input=prompt,
219
+ api_key=api_key,
220
+ additional_args={"complete_input_dict": data},
221
+ )
222
+ ## COMPLETION CALL
223
+ response = requests.post(
224
+ completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
225
+ )
226
+ if "stream" in optional_params and optional_params["stream"] == True:
227
+ return response.iter_lines()
228
+ else:
229
+ ## LOGGING
230
+ logging_obj.post_call(
231
+ input=prompt,
232
+ api_key=api_key,
233
+ original_response=response.text,
234
+ additional_args={"complete_input_dict": data},
235
+ )
236
+ print_verbose(f"raw model_response: {response.text}")
237
+ ## RESPONSE OBJECT
238
+ completion_response = response.json()
239
+ if "error" in completion_response:
240
+ raise AlephAlphaError(
241
+ message=completion_response["error"],
242
+ status_code=response.status_code,
243
+ )
244
+ else:
245
+ try:
246
+ choices_list = []
247
+ for idx, item in enumerate(completion_response["completions"]):
248
+ if len(item["completion"]) > 0:
249
+ message_obj = Message(content=item["completion"])
250
+ else:
251
+ message_obj = Message(content=None)
252
+ choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
253
+ choices_list.append(choice_obj)
254
+ model_response["choices"] = choices_list
255
+ except:
256
+ raise AlephAlphaError(message=json.dumps(completion_response), status_code=response.status_code)
257
+
258
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
259
+ prompt_tokens = len(
260
+ encoding.encode(prompt)
261
+ )
262
+ completion_tokens = len(
263
+ encoding.encode(model_response["choices"][0]["message"]["content"])
264
+ )
265
+
266
+ model_response["created"] = int(time.time())
267
+ model_response["model"] = model
268
+ usage = Usage(
269
+ prompt_tokens=prompt_tokens,
270
+ completion_tokens=completion_tokens,
271
+ total_tokens=prompt_tokens + completion_tokens
272
+ )
273
+ model_response.usage = usage
274
+ return model_response
275
+
276
+ def embedding():
277
+ # logic for parsing in - calling - parsing out model embedding calls
278
+ pass
litellm/llms/anthropic.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Usage
8
+ import litellm
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+ import httpx
11
+
12
+ class AnthropicConstants(Enum):
13
+ HUMAN_PROMPT = "\n\nHuman: "
14
+ AI_PROMPT = "\n\nAssistant: "
15
+
16
+ class AnthropicError(Exception):
17
+ def __init__(self, status_code, message):
18
+ self.status_code = status_code
19
+ self.message = message
20
+ self.request = httpx.Request(method="POST", url="https://api.anthropic.com/v1/complete")
21
+ self.response = httpx.Response(status_code=status_code, request=self.request)
22
+ super().__init__(
23
+ self.message
24
+ ) # Call the base class constructor with the parameters it needs
25
+
26
+ class AnthropicConfig():
27
+ """
28
+ Reference: https://docs.anthropic.com/claude/reference/complete_post
29
+
30
+ to pass metadata to anthropic, it's {"user_id": "any-relevant-information"}
31
+ """
32
+ max_tokens_to_sample: Optional[int]=litellm.max_tokens # anthropic requires a default
33
+ stop_sequences: Optional[list]=None
34
+ temperature: Optional[int]=None
35
+ top_p: Optional[int]=None
36
+ top_k: Optional[int]=None
37
+ metadata: Optional[dict]=None
38
+
39
+ def __init__(self,
40
+ max_tokens_to_sample: Optional[int]=256, # anthropic requires a default
41
+ stop_sequences: Optional[list]=None,
42
+ temperature: Optional[int]=None,
43
+ top_p: Optional[int]=None,
44
+ top_k: Optional[int]=None,
45
+ metadata: Optional[dict]=None) -> None:
46
+
47
+ locals_ = locals()
48
+ for key, value in locals_.items():
49
+ if key != 'self' and value is not None:
50
+ setattr(self.__class__, key, value)
51
+
52
+ @classmethod
53
+ def get_config(cls):
54
+ return {k: v for k, v in cls.__dict__.items()
55
+ if not k.startswith('__')
56
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
57
+ and v is not None}
58
+
59
+
60
+ # makes headers for API call
61
+ def validate_environment(api_key):
62
+ if api_key is None:
63
+ raise ValueError(
64
+ "Missing Anthropic API Key - A call is being made to anthropic but no key is set either in the environment variables or via params"
65
+ )
66
+ headers = {
67
+ "accept": "application/json",
68
+ "anthropic-version": "2023-06-01",
69
+ "content-type": "application/json",
70
+ "x-api-key": api_key,
71
+ }
72
+ return headers
73
+
74
+ def completion(
75
+ model: str,
76
+ messages: list,
77
+ api_base: str,
78
+ custom_prompt_dict: dict,
79
+ model_response: ModelResponse,
80
+ print_verbose: Callable,
81
+ encoding,
82
+ api_key,
83
+ logging_obj,
84
+ optional_params=None,
85
+ litellm_params=None,
86
+ logger_fn=None,
87
+ ):
88
+ headers = validate_environment(api_key)
89
+ if model in custom_prompt_dict:
90
+ # check if the model has a registered custom prompt
91
+ model_prompt_details = custom_prompt_dict[model]
92
+ prompt = custom_prompt(
93
+ role_dict=model_prompt_details["roles"],
94
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
95
+ final_prompt_value=model_prompt_details["final_prompt_value"],
96
+ messages=messages
97
+ )
98
+ else:
99
+ prompt = prompt_factory(model=model, messages=messages, custom_llm_provider="anthropic")
100
+
101
+ ## Load Config
102
+ config = litellm.AnthropicConfig.get_config()
103
+ for k, v in config.items():
104
+ if k not in optional_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
105
+ optional_params[k] = v
106
+
107
+ data = {
108
+ "model": model,
109
+ "prompt": prompt,
110
+ **optional_params,
111
+ }
112
+
113
+ ## LOGGING
114
+ logging_obj.pre_call(
115
+ input=prompt,
116
+ api_key=api_key,
117
+ additional_args={"complete_input_dict": data, "api_base": api_base},
118
+ )
119
+
120
+ ## COMPLETION CALL
121
+ if "stream" in optional_params and optional_params["stream"] == True:
122
+ response = requests.post(
123
+ api_base,
124
+ headers=headers,
125
+ data=json.dumps(data),
126
+ stream=optional_params["stream"],
127
+ )
128
+
129
+ if response.status_code != 200:
130
+ raise AnthropicError(status_code=response.status_code, message=response.text)
131
+
132
+ return response.iter_lines()
133
+ else:
134
+ response = requests.post(
135
+ api_base, headers=headers, data=json.dumps(data)
136
+ )
137
+ if response.status_code != 200:
138
+ raise AnthropicError(status_code=response.status_code, message=response.text)
139
+
140
+ ## LOGGING
141
+ logging_obj.post_call(
142
+ input=prompt,
143
+ api_key=api_key,
144
+ original_response=response.text,
145
+ additional_args={"complete_input_dict": data},
146
+ )
147
+ print_verbose(f"raw model_response: {response.text}")
148
+ ## RESPONSE OBJECT
149
+ try:
150
+ completion_response = response.json()
151
+ except:
152
+ raise AnthropicError(
153
+ message=response.text, status_code=response.status_code
154
+ )
155
+ if "error" in completion_response:
156
+ raise AnthropicError(
157
+ message=str(completion_response["error"]),
158
+ status_code=response.status_code,
159
+ )
160
+ else:
161
+ if len(completion_response["completion"]) > 0:
162
+ model_response["choices"][0]["message"]["content"] = completion_response[
163
+ "completion"
164
+ ]
165
+ model_response.choices[0].finish_reason = completion_response["stop_reason"]
166
+
167
+ ## CALCULATING USAGE
168
+ prompt_tokens = len(
169
+ encoding.encode(prompt)
170
+ ) ##[TODO] use the anthropic tokenizer here
171
+ completion_tokens = len(
172
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
173
+ ) ##[TODO] use the anthropic tokenizer here
174
+
175
+ model_response["created"] = int(time.time())
176
+ model_response["model"] = model
177
+ usage = Usage(
178
+ prompt_tokens=prompt_tokens,
179
+ completion_tokens=completion_tokens,
180
+ total_tokens=prompt_tokens + completion_tokens
181
+ )
182
+ model_response.usage = usage
183
+ return model_response
184
+
185
+ def embedding():
186
+ # logic for parsing in - calling - parsing out model embedding calls
187
+ pass
litellm/llms/azure.py ADDED
@@ -0,0 +1,414 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union, Any
2
+ import types, requests
3
+ from .base import BaseLLM
4
+ from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, convert_to_model_response_object
5
+ from typing import Callable, Optional
6
+ from litellm import OpenAIConfig
7
+ import litellm, json
8
+ import httpx
9
+ from openai import AzureOpenAI, AsyncAzureOpenAI
10
+
11
+ class AzureOpenAIError(Exception):
12
+ def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ if request:
16
+ self.request = request
17
+ else:
18
+ self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
19
+ if response:
20
+ self.response = response
21
+ else:
22
+ self.response = httpx.Response(status_code=status_code, request=self.request)
23
+ super().__init__(
24
+ self.message
25
+ ) # Call the base class constructor with the parameters it needs
26
+
27
+ class AzureOpenAIConfig(OpenAIConfig):
28
+ """
29
+ Reference: https://platform.openai.com/docs/api-reference/chat/create
30
+
31
+ The class `AzureOpenAIConfig` provides configuration for the OpenAI's Chat API interface, for use with Azure. It inherits from `OpenAIConfig`. Below are the parameters::
32
+
33
+ - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
34
+
35
+ - `function_call` (string or object): This optional parameter controls how the model calls functions.
36
+
37
+ - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
38
+
39
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
40
+
41
+ - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
42
+
43
+ - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
44
+
45
+ - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
46
+
47
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
48
+
49
+ - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
50
+
51
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
52
+ """
53
+
54
+ def __init__(self,
55
+ frequency_penalty: Optional[int] = None,
56
+ function_call: Optional[Union[str, dict]]= None,
57
+ functions: Optional[list]= None,
58
+ logit_bias: Optional[dict]= None,
59
+ max_tokens: Optional[int]= None,
60
+ n: Optional[int]= None,
61
+ presence_penalty: Optional[int]= None,
62
+ stop: Optional[Union[str,list]]=None,
63
+ temperature: Optional[int]= None,
64
+ top_p: Optional[int]= None) -> None:
65
+ super().__init__(frequency_penalty,
66
+ function_call,
67
+ functions,
68
+ logit_bias,
69
+ max_tokens,
70
+ n,
71
+ presence_penalty,
72
+ stop,
73
+ temperature,
74
+ top_p)
75
+
76
+ class AzureChatCompletion(BaseLLM):
77
+
78
+ def __init__(self) -> None:
79
+ super().__init__()
80
+
81
+ def validate_environment(self, api_key, azure_ad_token):
82
+ headers = {
83
+ "content-type": "application/json",
84
+ }
85
+ if api_key is not None:
86
+ headers["api-key"] = api_key
87
+ elif azure_ad_token is not None:
88
+ headers["Authorization"] = f"Bearer {azure_ad_token}"
89
+ return headers
90
+
91
+ def completion(self,
92
+ model: str,
93
+ messages: list,
94
+ model_response: ModelResponse,
95
+ api_key: str,
96
+ api_base: str,
97
+ api_version: str,
98
+ api_type: str,
99
+ azure_ad_token: str,
100
+ print_verbose: Callable,
101
+ timeout,
102
+ logging_obj,
103
+ optional_params,
104
+ litellm_params,
105
+ logger_fn,
106
+ acompletion: bool = False,
107
+ headers: Optional[dict]=None,
108
+ client = None,
109
+ ):
110
+ super().completion()
111
+ exception_mapping_worked = False
112
+ try:
113
+
114
+ if model is None or messages is None:
115
+ raise AzureOpenAIError(status_code=422, message=f"Missing model or messages")
116
+
117
+ max_retries = optional_params.pop("max_retries", 2)
118
+
119
+ ### CHECK IF CLOUDFLARE AI GATEWAY ###
120
+ ### if so - set the model as part of the base url
121
+ if "gateway.ai.cloudflare.com" in api_base:
122
+ ## build base url - assume api base includes resource name
123
+ if client is None:
124
+ if not api_base.endswith("/"):
125
+ api_base += "/"
126
+ api_base += f"{model}"
127
+
128
+ azure_client_params = {
129
+ "api_version": api_version,
130
+ "base_url": f"{api_base}",
131
+ "http_client": litellm.client_session,
132
+ "max_retries": max_retries,
133
+ "timeout": timeout
134
+ }
135
+ if api_key is not None:
136
+ azure_client_params["api_key"] = api_key
137
+ elif azure_ad_token is not None:
138
+ azure_client_params["azure_ad_token"] = azure_ad_token
139
+
140
+ if acompletion is True:
141
+ client = AsyncAzureOpenAI(**azure_client_params)
142
+ else:
143
+ client = AzureOpenAI(**azure_client_params)
144
+
145
+ data = {
146
+ "model": None,
147
+ "messages": messages,
148
+ **optional_params
149
+ }
150
+ else:
151
+ data = {
152
+ "model": model, # type: ignore
153
+ "messages": messages,
154
+ **optional_params
155
+ }
156
+ ## LOGGING
157
+ logging_obj.pre_call(
158
+ input=messages,
159
+ api_key=api_key,
160
+ additional_args={
161
+ "headers": {
162
+ "api_key": api_key,
163
+ "azure_ad_token": azure_ad_token
164
+ },
165
+ "api_version": api_version,
166
+ "api_base": api_base,
167
+ "complete_input_dict": data,
168
+ },
169
+ )
170
+
171
+ if acompletion is True:
172
+ if optional_params.get("stream", False):
173
+ return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
174
+ else:
175
+ return self.acompletion(api_base=api_base, data=data, model_response=model_response, api_key=api_key, api_version=api_version, model=model, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
176
+ elif "stream" in optional_params and optional_params["stream"] == True:
177
+ return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, model=model, api_key=api_key, api_version=api_version, azure_ad_token=azure_ad_token, timeout=timeout, client=client)
178
+ else:
179
+ if not isinstance(max_retries, int):
180
+ raise AzureOpenAIError(status_code=422, message="max retries must be an int")
181
+ # init AzureOpenAI Client
182
+ azure_client_params = {
183
+ "api_version": api_version,
184
+ "azure_endpoint": api_base,
185
+ "azure_deployment": model,
186
+ "http_client": litellm.client_session,
187
+ "max_retries": max_retries,
188
+ "timeout": timeout
189
+ }
190
+ if api_key is not None:
191
+ azure_client_params["api_key"] = api_key
192
+ elif azure_ad_token is not None:
193
+ azure_client_params["azure_ad_token"] = azure_ad_token
194
+ if client is None:
195
+ azure_client = AzureOpenAI(**azure_client_params)
196
+ else:
197
+ azure_client = client
198
+ response = azure_client.chat.completions.create(**data) # type: ignore
199
+ response.model = "azure/" + str(response.model)
200
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
201
+ except AzureOpenAIError as e:
202
+ exception_mapping_worked = True
203
+ raise e
204
+ except Exception as e:
205
+ raise e
206
+
207
+ async def acompletion(self,
208
+ api_key: str,
209
+ api_version: str,
210
+ model: str,
211
+ api_base: str,
212
+ data: dict,
213
+ timeout: Any,
214
+ model_response: ModelResponse,
215
+ azure_ad_token: Optional[str]=None,
216
+ client = None, # this is the AsyncAzureOpenAI
217
+ ):
218
+ response = None
219
+ try:
220
+ max_retries = data.pop("max_retries", 2)
221
+ if not isinstance(max_retries, int):
222
+ raise AzureOpenAIError(status_code=422, message="max retries must be an int")
223
+ # init AzureOpenAI Client
224
+ azure_client_params = {
225
+ "api_version": api_version,
226
+ "azure_endpoint": api_base,
227
+ "azure_deployment": model,
228
+ "http_client": litellm.client_session,
229
+ "max_retries": max_retries,
230
+ "timeout": timeout
231
+ }
232
+ if api_key is not None:
233
+ azure_client_params["api_key"] = api_key
234
+ elif azure_ad_token is not None:
235
+ azure_client_params["azure_ad_token"] = azure_ad_token
236
+ if client is None:
237
+ azure_client = AsyncAzureOpenAI(**azure_client_params)
238
+ else:
239
+ azure_client = client
240
+ response = await azure_client.chat.completions.create(**data)
241
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
242
+ except AzureOpenAIError as e:
243
+ exception_mapping_worked = True
244
+ raise e
245
+ except Exception as e:
246
+ raise e
247
+
248
+ def streaming(self,
249
+ logging_obj,
250
+ api_base: str,
251
+ api_key: str,
252
+ api_version: str,
253
+ data: dict,
254
+ model: str,
255
+ timeout: Any,
256
+ azure_ad_token: Optional[str]=None,
257
+ client=None,
258
+ ):
259
+ max_retries = data.pop("max_retries", 2)
260
+ if not isinstance(max_retries, int):
261
+ raise AzureOpenAIError(status_code=422, message="max retries must be an int")
262
+ # init AzureOpenAI Client
263
+ azure_client_params = {
264
+ "api_version": api_version,
265
+ "azure_endpoint": api_base,
266
+ "azure_deployment": model,
267
+ "http_client": litellm.client_session,
268
+ "max_retries": max_retries,
269
+ "timeout": timeout
270
+ }
271
+ if api_key is not None:
272
+ azure_client_params["api_key"] = api_key
273
+ elif azure_ad_token is not None:
274
+ azure_client_params["azure_ad_token"] = azure_ad_token
275
+ if client is None:
276
+ azure_client = AzureOpenAI(**azure_client_params)
277
+ else:
278
+ azure_client = client
279
+ response = azure_client.chat.completions.create(**data)
280
+ streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
281
+ return streamwrapper
282
+
283
+ async def async_streaming(self,
284
+ logging_obj,
285
+ api_base: str,
286
+ api_key: str,
287
+ api_version: str,
288
+ data: dict,
289
+ model: str,
290
+ timeout: Any,
291
+ azure_ad_token: Optional[str]=None,
292
+ client = None,
293
+ ):
294
+ # init AzureOpenAI Client
295
+ azure_client_params = {
296
+ "api_version": api_version,
297
+ "azure_endpoint": api_base,
298
+ "azure_deployment": model,
299
+ "http_client": litellm.client_session,
300
+ "max_retries": data.pop("max_retries", 2),
301
+ "timeout": timeout
302
+ }
303
+ if api_key is not None:
304
+ azure_client_params["api_key"] = api_key
305
+ elif azure_ad_token is not None:
306
+ azure_client_params["azure_ad_token"] = azure_ad_token
307
+ if client is None:
308
+ azure_client = AsyncAzureOpenAI(**azure_client_params)
309
+ else:
310
+ azure_client = client
311
+ response = await azure_client.chat.completions.create(**data)
312
+ streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="azure",logging_obj=logging_obj)
313
+ async for transformed_chunk in streamwrapper:
314
+ yield transformed_chunk
315
+
316
+ async def aembedding(
317
+ self,
318
+ data: dict,
319
+ model_response: ModelResponse,
320
+ azure_client_params: dict,
321
+ client=None,
322
+ ):
323
+ response = None
324
+ try:
325
+ if client is None:
326
+ openai_aclient = AsyncAzureOpenAI(**azure_client_params)
327
+ else:
328
+ openai_aclient = client
329
+ response = await openai_aclient.embeddings.create(**data)
330
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding")
331
+ except Exception as e:
332
+ raise e
333
+
334
+ def embedding(self,
335
+ model: str,
336
+ input: list,
337
+ api_key: str,
338
+ api_base: str,
339
+ api_version: str,
340
+ timeout: float,
341
+ logging_obj=None,
342
+ model_response=None,
343
+ optional_params=None,
344
+ azure_ad_token: Optional[str]=None,
345
+ client = None,
346
+ aembedding=None,
347
+ ):
348
+ super().embedding()
349
+ exception_mapping_worked = False
350
+ if self._client_session is None:
351
+ self._client_session = self.create_client_session()
352
+ try:
353
+ data = {
354
+ "model": model,
355
+ "input": input,
356
+ **optional_params
357
+ }
358
+ max_retries = data.pop("max_retries", 2)
359
+ if not isinstance(max_retries, int):
360
+ raise AzureOpenAIError(status_code=422, message="max retries must be an int")
361
+
362
+ # init AzureOpenAI Client
363
+ azure_client_params = {
364
+ "api_version": api_version,
365
+ "azure_endpoint": api_base,
366
+ "azure_deployment": model,
367
+ "http_client": litellm.client_session,
368
+ "max_retries": max_retries,
369
+ "timeout": timeout
370
+ }
371
+ if api_key is not None:
372
+ azure_client_params["api_key"] = api_key
373
+ elif azure_ad_token is not None:
374
+ azure_client_params["azure_ad_token"] = azure_ad_token
375
+ if aembedding == True:
376
+ response = self.aembedding(data=data, model_response=model_response, azure_client_params=azure_client_params)
377
+ return response
378
+ if client is None:
379
+ azure_client = AzureOpenAI(**azure_client_params) # type: ignore
380
+ else:
381
+ azure_client = client
382
+ ## LOGGING
383
+ logging_obj.pre_call(
384
+ input=input,
385
+ api_key=api_key,
386
+ additional_args={
387
+ "complete_input_dict": data,
388
+ "headers": {
389
+ "api_key": api_key,
390
+ "azure_ad_token": azure_ad_token
391
+ }
392
+ },
393
+ )
394
+ ## COMPLETION CALL
395
+ response = azure_client.embeddings.create(**data) # type: ignore
396
+ ## LOGGING
397
+ logging_obj.post_call(
398
+ input=input,
399
+ api_key=api_key,
400
+ additional_args={"complete_input_dict": data, "api_base": api_base},
401
+ original_response=response,
402
+ )
403
+
404
+
405
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
406
+ except AzureOpenAIError as e:
407
+ exception_mapping_worked = True
408
+ raise e
409
+ except Exception as e:
410
+ if exception_mapping_worked:
411
+ raise e
412
+ else:
413
+ import traceback
414
+ raise AzureOpenAIError(status_code=500, message=traceback.format_exc())
litellm/llms/base.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## This is a template base class to be used for adding new LLM providers via API calls
2
+ import litellm
3
+ import httpx, certifi, ssl
4
+ from typing import Optional
5
+
6
+ class BaseLLM:
7
+ _client_session: Optional[httpx.Client] = None
8
+ def create_client_session(self):
9
+ if litellm.client_session:
10
+ _client_session = litellm.client_session
11
+ else:
12
+ _client_session = httpx.Client()
13
+
14
+ return _client_session
15
+
16
+ def create_aclient_session(self):
17
+ if litellm.aclient_session:
18
+ _aclient_session = litellm.aclient_session
19
+ else:
20
+ _aclient_session = httpx.AsyncClient()
21
+
22
+ return _aclient_session
23
+
24
+ def __exit__(self):
25
+ if hasattr(self, '_client_session'):
26
+ self._client_session.close()
27
+
28
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
29
+ if hasattr(self, '_aclient_session'):
30
+ await self._aclient_session.aclose()
31
+
32
+ def validate_environment(self): # set up the environment required to run the model
33
+ pass
34
+
35
+ def completion(
36
+ self,
37
+ *args,
38
+ **kwargs
39
+ ): # logic for parsing in - calling - parsing out model completion calls
40
+ pass
41
+
42
+ def embedding(
43
+ self,
44
+ *args,
45
+ **kwargs
46
+ ): # logic for parsing in - calling - parsing out model embedding calls
47
+ pass
litellm/llms/baseten.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable
7
+ from litellm.utils import ModelResponse, Usage
8
+
9
+ class BasetenError(Exception):
10
+ def __init__(self, status_code, message):
11
+ self.status_code = status_code
12
+ self.message = message
13
+ super().__init__(
14
+ self.message
15
+ ) # Call the base class constructor with the parameters it needs
16
+
17
+ def validate_environment(api_key):
18
+ headers = {
19
+ "accept": "application/json",
20
+ "content-type": "application/json",
21
+ }
22
+ if api_key:
23
+ headers["Authorization"] = f"Api-Key {api_key}"
24
+ return headers
25
+
26
+ def completion(
27
+ model: str,
28
+ messages: list,
29
+ model_response: ModelResponse,
30
+ print_verbose: Callable,
31
+ encoding,
32
+ api_key,
33
+ logging_obj,
34
+ optional_params=None,
35
+ litellm_params=None,
36
+ logger_fn=None,
37
+ ):
38
+ headers = validate_environment(api_key)
39
+ completion_url_fragment_1 = "https://app.baseten.co/models/"
40
+ completion_url_fragment_2 = "/predict"
41
+ model = model
42
+ prompt = ""
43
+ for message in messages:
44
+ if "role" in message:
45
+ if message["role"] == "user":
46
+ prompt += f"{message['content']}"
47
+ else:
48
+ prompt += f"{message['content']}"
49
+ else:
50
+ prompt += f"{message['content']}"
51
+ data = {
52
+ "inputs": prompt,
53
+ "prompt": prompt,
54
+ "parameters": optional_params,
55
+ "stream": True if "stream" in optional_params and optional_params["stream"] == True else False
56
+ }
57
+
58
+ ## LOGGING
59
+ logging_obj.pre_call(
60
+ input=prompt,
61
+ api_key=api_key,
62
+ additional_args={"complete_input_dict": data},
63
+ )
64
+ ## COMPLETION CALL
65
+ response = requests.post(
66
+ completion_url_fragment_1 + model + completion_url_fragment_2,
67
+ headers=headers,
68
+ data=json.dumps(data),
69
+ stream=True if "stream" in optional_params and optional_params["stream"] == True else False
70
+ )
71
+ if 'text/event-stream' in response.headers['Content-Type'] or ("stream" in optional_params and optional_params["stream"] == True):
72
+ return response.iter_lines()
73
+ else:
74
+ ## LOGGING
75
+ logging_obj.post_call(
76
+ input=prompt,
77
+ api_key=api_key,
78
+ original_response=response.text,
79
+ additional_args={"complete_input_dict": data},
80
+ )
81
+ print_verbose(f"raw model_response: {response.text}")
82
+ ## RESPONSE OBJECT
83
+ completion_response = response.json()
84
+ if "error" in completion_response:
85
+ raise BasetenError(
86
+ message=completion_response["error"],
87
+ status_code=response.status_code,
88
+ )
89
+ else:
90
+ if "model_output" in completion_response:
91
+ if (
92
+ isinstance(completion_response["model_output"], dict)
93
+ and "data" in completion_response["model_output"]
94
+ and isinstance(
95
+ completion_response["model_output"]["data"], list
96
+ )
97
+ ):
98
+ model_response["choices"][0]["message"][
99
+ "content"
100
+ ] = completion_response["model_output"]["data"][0]
101
+ elif isinstance(completion_response["model_output"], str):
102
+ model_response["choices"][0]["message"][
103
+ "content"
104
+ ] = completion_response["model_output"]
105
+ elif "completion" in completion_response and isinstance(
106
+ completion_response["completion"], str
107
+ ):
108
+ model_response["choices"][0]["message"][
109
+ "content"
110
+ ] = completion_response["completion"]
111
+ elif isinstance(completion_response, list) and len(completion_response) > 0:
112
+ if "generated_text" not in completion_response:
113
+ raise BasetenError(
114
+ message=f"Unable to parse response. Original response: {response.text}",
115
+ status_code=response.status_code
116
+ )
117
+ model_response["choices"][0]["message"]["content"] = completion_response[0]["generated_text"]
118
+ ## GETTING LOGPROBS
119
+ if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
120
+ model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
121
+ sum_logprob = 0
122
+ for token in completion_response[0]["details"]["tokens"]:
123
+ sum_logprob += token["logprob"]
124
+ model_response["choices"][0]["message"]._logprobs = sum_logprob
125
+ else:
126
+ raise BasetenError(
127
+ message=f"Unable to parse response. Original response: {response.text}",
128
+ status_code=response.status_code
129
+ )
130
+
131
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
132
+ prompt_tokens = len(encoding.encode(prompt))
133
+ completion_tokens = len(
134
+ encoding.encode(model_response["choices"][0]["message"]["content"])
135
+ )
136
+
137
+ model_response["created"] = int(time.time())
138
+ model_response["model"] = model
139
+ usage = Usage(
140
+ prompt_tokens=prompt_tokens,
141
+ completion_tokens=completion_tokens,
142
+ total_tokens=prompt_tokens + completion_tokens
143
+ )
144
+ model_response.usage = usage
145
+ return model_response
146
+
147
+ def embedding():
148
+ # logic for parsing in - calling - parsing out model embedding calls
149
+ pass
litellm/llms/bedrock.py ADDED
@@ -0,0 +1,627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json, copy, types
2
+ import os
3
+ from enum import Enum
4
+ import time
5
+ from typing import Callable, Optional
6
+ import litellm
7
+ from litellm.utils import ModelResponse, get_secret, Usage
8
+ from .prompt_templates.factory import prompt_factory, custom_prompt
9
+ import httpx
10
+
11
+ class BedrockError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ self.request = httpx.Request(method="POST", url="https://us-west-2.console.aws.amazon.com/bedrock")
16
+ self.response = httpx.Response(status_code=status_code, request=self.request)
17
+ super().__init__(
18
+ self.message
19
+ ) # Call the base class constructor with the parameters it needs
20
+
21
+ class AmazonTitanConfig():
22
+ """
23
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=titan-text-express-v1
24
+
25
+ Supported Params for the Amazon Titan models:
26
+
27
+ - `maxTokenCount` (integer) max tokens,
28
+ - `stopSequences` (string[]) list of stop sequence strings
29
+ - `temperature` (float) temperature for model,
30
+ - `topP` (int) top p for model
31
+ """
32
+ maxTokenCount: Optional[int]=None
33
+ stopSequences: Optional[list]=None
34
+ temperature: Optional[float]=None
35
+ topP: Optional[int]=None
36
+
37
+ def __init__(self,
38
+ maxTokenCount: Optional[int]=None,
39
+ stopSequences: Optional[list]=None,
40
+ temperature: Optional[float]=None,
41
+ topP: Optional[int]=None) -> None:
42
+ locals_ = locals()
43
+ for key, value in locals_.items():
44
+ if key != 'self' and value is not None:
45
+ setattr(self.__class__, key, value)
46
+
47
+ @classmethod
48
+ def get_config(cls):
49
+ return {k: v for k, v in cls.__dict__.items()
50
+ if not k.startswith('__')
51
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
52
+ and v is not None}
53
+
54
+ class AmazonAnthropicConfig():
55
+ """
56
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=claude
57
+
58
+ Supported Params for the Amazon / Anthropic models:
59
+
60
+ - `max_tokens_to_sample` (integer) max tokens,
61
+ - `temperature` (float) model temperature,
62
+ - `top_k` (integer) top k,
63
+ - `top_p` (integer) top p,
64
+ - `stop_sequences` (string[]) list of stop sequences - e.g. ["\\n\\nHuman:"],
65
+ - `anthropic_version` (string) version of anthropic for bedrock - e.g. "bedrock-2023-05-31"
66
+ """
67
+ max_tokens_to_sample: Optional[int]=litellm.max_tokens
68
+ stop_sequences: Optional[list]=None
69
+ temperature: Optional[float]=None
70
+ top_k: Optional[int]=None
71
+ top_p: Optional[int]=None
72
+ anthropic_version: Optional[str]=None
73
+
74
+ def __init__(self,
75
+ max_tokens_to_sample: Optional[int]=None,
76
+ stop_sequences: Optional[list]=None,
77
+ temperature: Optional[float]=None,
78
+ top_k: Optional[int]=None,
79
+ top_p: Optional[int]=None,
80
+ anthropic_version: Optional[str]=None) -> None:
81
+ locals_ = locals()
82
+ for key, value in locals_.items():
83
+ if key != 'self' and value is not None:
84
+ setattr(self.__class__, key, value)
85
+
86
+ @classmethod
87
+ def get_config(cls):
88
+ return {k: v for k, v in cls.__dict__.items()
89
+ if not k.startswith('__')
90
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
91
+ and v is not None}
92
+
93
+ class AmazonCohereConfig():
94
+ """
95
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=command
96
+
97
+ Supported Params for the Amazon / Cohere models:
98
+
99
+ - `max_tokens` (integer) max tokens,
100
+ - `temperature` (float) model temperature,
101
+ - `return_likelihood` (string) n/a
102
+ """
103
+ max_tokens: Optional[int]=None
104
+ temperature: Optional[float]=None
105
+ return_likelihood: Optional[str]=None
106
+
107
+ def __init__(self,
108
+ max_tokens: Optional[int]=None,
109
+ temperature: Optional[float]=None,
110
+ return_likelihood: Optional[str]=None) -> None:
111
+ locals_ = locals()
112
+ for key, value in locals_.items():
113
+ if key != 'self' and value is not None:
114
+ setattr(self.__class__, key, value)
115
+
116
+ @classmethod
117
+ def get_config(cls):
118
+ return {k: v for k, v in cls.__dict__.items()
119
+ if not k.startswith('__')
120
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
121
+ and v is not None}
122
+
123
+ class AmazonAI21Config():
124
+ """
125
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=j2-ultra
126
+
127
+ Supported Params for the Amazon / AI21 models:
128
+
129
+ - `maxTokens` (int32): The maximum number of tokens to generate per result. Optional, default is 16. If no `stopSequences` are given, generation stops after producing `maxTokens`.
130
+
131
+ - `temperature` (float): Modifies the distribution from which tokens are sampled. Optional, default is 0.7. A value of 0 essentially disables sampling and results in greedy decoding.
132
+
133
+ - `topP` (float): Used for sampling tokens from the corresponding top percentile of probability mass. Optional, default is 1. For instance, a value of 0.9 considers only tokens comprising the top 90% probability mass.
134
+
135
+ - `stopSequences` (array of strings): Stops decoding if any of the input strings is generated. Optional.
136
+
137
+ - `frequencyPenalty` (object): Placeholder for frequency penalty object.
138
+
139
+ - `presencePenalty` (object): Placeholder for presence penalty object.
140
+
141
+ - `countPenalty` (object): Placeholder for count penalty object.
142
+ """
143
+ maxTokens: Optional[int]=None
144
+ temperature: Optional[float]=None
145
+ topP: Optional[float]=None
146
+ stopSequences: Optional[list]=None
147
+ frequencePenalty: Optional[dict]=None
148
+ presencePenalty: Optional[dict]=None
149
+ countPenalty: Optional[dict]=None
150
+
151
+ def __init__(self,
152
+ maxTokens: Optional[int]=None,
153
+ temperature: Optional[float]=None,
154
+ topP: Optional[float]=None,
155
+ stopSequences: Optional[list]=None,
156
+ frequencePenalty: Optional[dict]=None,
157
+ presencePenalty: Optional[dict]=None,
158
+ countPenalty: Optional[dict]=None) -> None:
159
+ locals_ = locals()
160
+ for key, value in locals_.items():
161
+ if key != 'self' and value is not None:
162
+ setattr(self.__class__, key, value)
163
+
164
+ @classmethod
165
+ def get_config(cls):
166
+ return {k: v for k, v in cls.__dict__.items()
167
+ if not k.startswith('__')
168
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
169
+ and v is not None}
170
+
171
+ class AnthropicConstants(Enum):
172
+ HUMAN_PROMPT = "\n\nHuman: "
173
+ AI_PROMPT = "\n\nAssistant: "
174
+
175
+ class AmazonLlamaConfig():
176
+ """
177
+ Reference: https://us-west-2.console.aws.amazon.com/bedrock/home?region=us-west-2#/providers?model=meta.llama2-13b-chat-v1
178
+
179
+ Supported Params for the Amazon / Meta Llama models:
180
+
181
+ - `max_gen_len` (integer) max tokens,
182
+ - `temperature` (float) temperature for model,
183
+ - `top_p` (float) top p for model
184
+ """
185
+ max_gen_len: Optional[int]=None
186
+ temperature: Optional[float]=None
187
+ topP: Optional[float]=None
188
+
189
+ def __init__(self,
190
+ maxTokenCount: Optional[int]=None,
191
+ temperature: Optional[float]=None,
192
+ topP: Optional[int]=None) -> None:
193
+ locals_ = locals()
194
+ for key, value in locals_.items():
195
+ if key != 'self' and value is not None:
196
+ setattr(self.__class__, key, value)
197
+
198
+ @classmethod
199
+ def get_config(cls):
200
+ return {k: v for k, v in cls.__dict__.items()
201
+ if not k.startswith('__')
202
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
203
+ and v is not None}
204
+
205
+
206
+ def init_bedrock_client(
207
+ region_name = None,
208
+ aws_access_key_id = None,
209
+ aws_secret_access_key = None,
210
+ aws_region_name=None,
211
+ aws_bedrock_runtime_endpoint=None,
212
+ ):
213
+
214
+ # check for custom AWS_REGION_NAME and use it if not passed to init_bedrock_client
215
+ litellm_aws_region_name = get_secret("AWS_REGION_NAME")
216
+ standard_aws_region_name = get_secret("AWS_REGION")
217
+ if region_name:
218
+ pass
219
+ elif aws_region_name:
220
+ region_name = aws_region_name
221
+ elif litellm_aws_region_name:
222
+ region_name = litellm_aws_region_name
223
+ elif standard_aws_region_name:
224
+ region_name = standard_aws_region_name
225
+ else:
226
+ raise BedrockError(message="AWS region not set: set AWS_REGION_NAME or AWS_REGION env variable or in .env file", status_code=401)
227
+
228
+ # check for custom AWS_BEDROCK_RUNTIME_ENDPOINT and use it if not passed to init_bedrock_client
229
+ env_aws_bedrock_runtime_endpoint = get_secret("AWS_BEDROCK_RUNTIME_ENDPOINT")
230
+ if aws_bedrock_runtime_endpoint:
231
+ endpoint_url = aws_bedrock_runtime_endpoint
232
+ elif env_aws_bedrock_runtime_endpoint:
233
+ endpoint_url = env_aws_bedrock_runtime_endpoint
234
+ else:
235
+ endpoint_url = f'https://bedrock-runtime.{region_name}.amazonaws.com'
236
+
237
+ import boto3
238
+ if aws_access_key_id != None:
239
+ # uses auth params passed to completion
240
+ # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
241
+
242
+ client = boto3.client(
243
+ service_name="bedrock-runtime",
244
+ aws_access_key_id=aws_access_key_id,
245
+ aws_secret_access_key=aws_secret_access_key,
246
+ region_name=region_name,
247
+ endpoint_url=endpoint_url,
248
+ )
249
+ else:
250
+ # aws_access_key_id is None, assume user is trying to auth using env variables
251
+ # boto3 automatically reads env variables
252
+
253
+ client = boto3.client(
254
+ service_name="bedrock-runtime",
255
+ region_name=region_name,
256
+ endpoint_url=endpoint_url,
257
+ )
258
+
259
+ return client
260
+
261
+
262
+ def convert_messages_to_prompt(model, messages, provider, custom_prompt_dict):
263
+ # handle anthropic prompts using anthropic constants
264
+ if provider == "anthropic":
265
+ if model in custom_prompt_dict:
266
+ # check if the model has a registered custom prompt
267
+ model_prompt_details = custom_prompt_dict[model]
268
+ prompt = custom_prompt(
269
+ role_dict=model_prompt_details["roles"],
270
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
271
+ final_prompt_value=model_prompt_details["final_prompt_value"],
272
+ messages=messages
273
+ )
274
+ else:
275
+ prompt = prompt_factory(model=model, messages=messages, custom_llm_provider="anthropic")
276
+ else:
277
+ prompt = ""
278
+ for message in messages:
279
+ if "role" in message:
280
+ if message["role"] == "user":
281
+ prompt += (
282
+ f"{message['content']}"
283
+ )
284
+ else:
285
+ prompt += (
286
+ f"{message['content']}"
287
+ )
288
+ else:
289
+ prompt += f"{message['content']}"
290
+ return prompt
291
+
292
+
293
+ """
294
+ BEDROCK AUTH Keys/Vars
295
+ os.environ['AWS_ACCESS_KEY_ID'] = ""
296
+ os.environ['AWS_SECRET_ACCESS_KEY'] = ""
297
+ """
298
+
299
+
300
+ # set os.environ['AWS_REGION_NAME'] = <your-region_name>
301
+
302
+ def completion(
303
+ model: str,
304
+ messages: list,
305
+ custom_prompt_dict: dict,
306
+ model_response: ModelResponse,
307
+ print_verbose: Callable,
308
+ encoding,
309
+ logging_obj,
310
+ optional_params=None,
311
+ litellm_params=None,
312
+ logger_fn=None,
313
+ ):
314
+ exception_mapping_worked = False
315
+ try:
316
+ # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
317
+ aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
318
+ aws_access_key_id = optional_params.pop("aws_access_key_id", None)
319
+ aws_region_name = optional_params.pop("aws_region_name", None)
320
+
321
+ # use passed in BedrockRuntime.Client if provided, otherwise create a new one
322
+ client = optional_params.pop(
323
+ "aws_bedrock_client",
324
+ # only pass variables that are not None
325
+ init_bedrock_client(
326
+ aws_access_key_id=aws_access_key_id,
327
+ aws_secret_access_key=aws_secret_access_key,
328
+ aws_region_name=aws_region_name,
329
+ ),
330
+ )
331
+
332
+ model = model
333
+ provider = model.split(".")[0]
334
+ prompt = convert_messages_to_prompt(model, messages, provider, custom_prompt_dict)
335
+ inference_params = copy.deepcopy(optional_params)
336
+ stream = inference_params.pop("stream", False)
337
+ if provider == "anthropic":
338
+ ## LOAD CONFIG
339
+ config = litellm.AmazonAnthropicConfig.get_config()
340
+ for k, v in config.items():
341
+ if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
342
+ inference_params[k] = v
343
+ data = json.dumps({
344
+ "prompt": prompt,
345
+ **inference_params
346
+ })
347
+ elif provider == "ai21":
348
+ ## LOAD CONFIG
349
+ config = litellm.AmazonAI21Config.get_config()
350
+ for k, v in config.items():
351
+ if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
352
+ inference_params[k] = v
353
+
354
+ data = json.dumps({
355
+ "prompt": prompt,
356
+ **inference_params
357
+ })
358
+ elif provider == "cohere":
359
+ ## LOAD CONFIG
360
+ config = litellm.AmazonCohereConfig.get_config()
361
+ for k, v in config.items():
362
+ if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
363
+ inference_params[k] = v
364
+ if optional_params.get("stream", False) == True:
365
+ inference_params["stream"] = True # cohere requires stream = True in inference params
366
+ data = json.dumps({
367
+ "prompt": prompt,
368
+ **inference_params
369
+ })
370
+ elif provider == "meta":
371
+ ## LOAD CONFIG
372
+ config = litellm.AmazonLlamaConfig.get_config()
373
+ for k, v in config.items():
374
+ if k not in inference_params: # completion(top_k=3) > anthropic_config(top_k=3) <- allows for dynamic variables to be passed in
375
+ inference_params[k] = v
376
+ data = json.dumps({
377
+ "prompt": prompt,
378
+ **inference_params
379
+ })
380
+ elif provider == "amazon": # amazon titan
381
+ ## LOAD CONFIG
382
+ config = litellm.AmazonTitanConfig.get_config()
383
+ for k, v in config.items():
384
+ if k not in inference_params: # completion(top_k=3) > amazon_config(top_k=3) <- allows for dynamic variables to be passed in
385
+ inference_params[k] = v
386
+
387
+ data = json.dumps({
388
+ "inputText": prompt,
389
+ "textGenerationConfig": inference_params,
390
+ })
391
+
392
+ ## COMPLETION CALL
393
+ accept = 'application/json'
394
+ contentType = 'application/json'
395
+ if stream == True:
396
+ if provider == "ai21":
397
+ ## LOGGING
398
+ request_str = f"""
399
+ response = client.invoke_model(
400
+ body={data},
401
+ modelId={model},
402
+ accept=accept,
403
+ contentType=contentType
404
+ )
405
+ """
406
+ logging_obj.pre_call(
407
+ input=prompt,
408
+ api_key="",
409
+ additional_args={"complete_input_dict": data, "request_str": request_str},
410
+ )
411
+
412
+ response = client.invoke_model(
413
+ body=data,
414
+ modelId=model,
415
+ accept=accept,
416
+ contentType=contentType
417
+ )
418
+
419
+ response = response.get('body').read()
420
+ return response
421
+ else:
422
+ ## LOGGING
423
+ request_str = f"""
424
+ response = client.invoke_model_with_response_stream(
425
+ body={data},
426
+ modelId={model},
427
+ accept=accept,
428
+ contentType=contentType
429
+ )
430
+ """
431
+ logging_obj.pre_call(
432
+ input=prompt,
433
+ api_key="",
434
+ additional_args={"complete_input_dict": data, "request_str": request_str},
435
+ )
436
+
437
+ response = client.invoke_model_with_response_stream(
438
+ body=data,
439
+ modelId=model,
440
+ accept=accept,
441
+ contentType=contentType
442
+ )
443
+ response = response.get('body')
444
+ return response
445
+ try:
446
+ ## LOGGING
447
+ request_str = f"""
448
+ response = client.invoke_model(
449
+ body={data},
450
+ modelId={model},
451
+ accept=accept,
452
+ contentType=contentType
453
+ )
454
+ """
455
+ logging_obj.pre_call(
456
+ input=prompt,
457
+ api_key="",
458
+ additional_args={"complete_input_dict": data, "request_str": request_str},
459
+ )
460
+ response = client.invoke_model(
461
+ body=data,
462
+ modelId=model,
463
+ accept=accept,
464
+ contentType=contentType
465
+ )
466
+ except Exception as e:
467
+ raise BedrockError(status_code=500, message=str(e))
468
+
469
+ response_body = json.loads(response.get('body').read())
470
+
471
+ ## LOGGING
472
+ logging_obj.post_call(
473
+ input=prompt,
474
+ api_key="",
475
+ original_response=response_body,
476
+ additional_args={"complete_input_dict": data},
477
+ )
478
+ print_verbose(f"raw model_response: {response}")
479
+ ## RESPONSE OBJECT
480
+ outputText = "default"
481
+ if provider == "ai21":
482
+ outputText = response_body.get('completions')[0].get('data').get('text')
483
+ elif provider == "anthropic":
484
+ outputText = response_body['completion']
485
+ model_response["finish_reason"] = response_body["stop_reason"]
486
+ elif provider == "cohere":
487
+ outputText = response_body["generations"][0]["text"]
488
+ elif provider == "meta":
489
+ outputText = response_body["generation"]
490
+ else: # amazon titan
491
+ outputText = response_body.get('results')[0].get('outputText')
492
+
493
+ response_metadata = response.get("ResponseMetadata", {})
494
+ if response_metadata.get("HTTPStatusCode", 500) >= 400:
495
+ raise BedrockError(
496
+ message=outputText,
497
+ status_code=response_metadata.get("HTTPStatusCode", 500),
498
+ )
499
+ else:
500
+ try:
501
+ if len(outputText) > 0:
502
+ model_response["choices"][0]["message"]["content"] = outputText
503
+ except:
504
+ raise BedrockError(message=json.dumps(outputText), status_code=response_metadata.get("HTTPStatusCode", 500))
505
+
506
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
507
+ prompt_tokens = len(
508
+ encoding.encode(prompt)
509
+ )
510
+ completion_tokens = len(
511
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
512
+ )
513
+
514
+ model_response["created"] = int(time.time())
515
+ model_response["model"] = model
516
+ usage = Usage(
517
+ prompt_tokens=prompt_tokens,
518
+ completion_tokens=completion_tokens,
519
+ total_tokens = prompt_tokens + completion_tokens
520
+ )
521
+ model_response.usage = usage
522
+ return model_response
523
+ except BedrockError as e:
524
+ exception_mapping_worked = True
525
+ raise e
526
+ except Exception as e:
527
+ if exception_mapping_worked:
528
+ raise e
529
+ else:
530
+ import traceback
531
+ raise BedrockError(status_code=500, message=traceback.format_exc())
532
+
533
+ def _embedding_func_single(
534
+ model: str,
535
+ input: str,
536
+ optional_params=None,
537
+ encoding=None,
538
+ ):
539
+ # logic for parsing in - calling - parsing out model embedding calls
540
+ # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
541
+ aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
542
+ aws_access_key_id = optional_params.pop("aws_access_key_id", None)
543
+ aws_region_name = optional_params.pop("aws_region_name", None)
544
+
545
+ # use passed in BedrockRuntime.Client if provided, otherwise create a new one
546
+ client = optional_params.pop(
547
+ "aws_bedrock_client",
548
+ # only pass variables that are not None
549
+ init_bedrock_client(
550
+ aws_access_key_id=aws_access_key_id,
551
+ aws_secret_access_key=aws_secret_access_key,
552
+ aws_region_name=aws_region_name,
553
+ ),
554
+ )
555
+
556
+ input = input.replace(os.linesep, " ")
557
+ body = json.dumps({"inputText": input})
558
+ try:
559
+ response = client.invoke_model(
560
+ body=body,
561
+ modelId=model,
562
+ accept="application/json",
563
+ contentType="application/json",
564
+ )
565
+ response_body = json.loads(response.get("body").read())
566
+ return response_body.get("embedding")
567
+ except Exception as e:
568
+ raise BedrockError(message=f"Embedding Error with model {model}: {e}", status_code=500)
569
+
570
+ def embedding(
571
+ model: str,
572
+ input: list,
573
+ api_key: Optional[str] = None,
574
+ logging_obj=None,
575
+ model_response=None,
576
+ optional_params=None,
577
+ encoding=None,
578
+ ):
579
+
580
+ ## LOGGING
581
+ logging_obj.pre_call(
582
+ input=input,
583
+ api_key=api_key,
584
+ additional_args={"complete_input_dict": {"model": model,
585
+ "texts": input}},
586
+ )
587
+
588
+ ## Embedding Call
589
+ embeddings = [_embedding_func_single(model, i, optional_params) for i in input]
590
+
591
+
592
+ ## Populate OpenAI compliant dictionary
593
+ embedding_response = []
594
+ for idx, embedding in enumerate(embeddings):
595
+ embedding_response.append(
596
+ {
597
+ "object": "embedding",
598
+ "index": idx,
599
+ "embedding": embedding,
600
+ }
601
+ )
602
+ model_response["object"] = "list"
603
+ model_response["data"] = embedding_response
604
+ model_response["model"] = model
605
+ input_tokens = 0
606
+
607
+ input_str = "".join(input)
608
+
609
+ input_tokens+=len(encoding.encode(input_str))
610
+
611
+ usage = Usage(
612
+ prompt_tokens=input_tokens,
613
+ completion_tokens=0,
614
+ total_tokens=input_tokens + 0
615
+ )
616
+ model_response.usage = usage
617
+
618
+ ## LOGGING
619
+ logging_obj.post_call(
620
+ input=input,
621
+ api_key=api_key,
622
+ additional_args={"complete_input_dict": {"model": model,
623
+ "texts": input}},
624
+ original_response=embeddings,
625
+ )
626
+
627
+ return model_response
litellm/llms/cohere.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, traceback
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Choices, Message, Usage
8
+ import litellm
9
+ import httpx
10
+
11
+ class CohereError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ self.request = httpx.Request(method="POST", url="https://api.cohere.ai/v1/generate")
16
+ self.response = httpx.Response(status_code=status_code, request=self.request)
17
+ super().__init__(
18
+ self.message
19
+ ) # Call the base class constructor with the parameters it needs
20
+
21
+ class CohereConfig():
22
+ """
23
+ Reference: https://docs.cohere.com/reference/generate
24
+
25
+ The class `CohereConfig` provides configuration for the Cohere's API interface. Below are the parameters:
26
+
27
+ - `num_generations` (integer): Maximum number of generations returned. Default is 1, with a minimum value of 1 and a maximum value of 5.
28
+
29
+ - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default value is 20.
30
+
31
+ - `truncate` (string): Specifies how the API handles inputs longer than maximum token length. Options include NONE, START, END. Default is END.
32
+
33
+ - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.75.
34
+
35
+ - `preset` (string): Identifier of a custom preset, a combination of parameters such as prompt, temperature etc.
36
+
37
+ - `end_sequences` (array of strings): The generated text gets cut at the beginning of the earliest occurrence of an end sequence, which will be excluded from the text.
38
+
39
+ - `stop_sequences` (array of strings): The generated text gets cut at the end of the earliest occurrence of a stop sequence, which will be included in the text.
40
+
41
+ - `k` (integer): Limits generation at each step to top `k` most likely tokens. Default is 0.
42
+
43
+ - `p` (number): Limits generation at each step to most likely tokens with total probability mass of `p`. Default is 0.
44
+
45
+ - `frequency_penalty` (number): Reduces repetitiveness of generated tokens. Higher values apply stronger penalties to previously occurred tokens.
46
+
47
+ - `presence_penalty` (number): Reduces repetitiveness of generated tokens. Similar to frequency_penalty, but this penalty applies equally to all tokens that have already appeared.
48
+
49
+ - `return_likelihoods` (string): Specifies how and if token likelihoods are returned with the response. Options include GENERATION, ALL and NONE.
50
+
51
+ - `logit_bias` (object): Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. e.g. {"hello_world": 1233}
52
+ """
53
+ num_generations: Optional[int]=None
54
+ max_tokens: Optional[int]=None
55
+ truncate: Optional[str]=None
56
+ temperature: Optional[int]=None
57
+ preset: Optional[str]=None
58
+ end_sequences: Optional[list]=None
59
+ stop_sequences: Optional[list]=None
60
+ k: Optional[int]=None
61
+ p: Optional[int]=None
62
+ frequency_penalty: Optional[int]=None
63
+ presence_penalty: Optional[int]=None
64
+ return_likelihoods: Optional[str]=None
65
+ logit_bias: Optional[dict]=None
66
+
67
+ def __init__(self,
68
+ num_generations: Optional[int]=None,
69
+ max_tokens: Optional[int]=None,
70
+ truncate: Optional[str]=None,
71
+ temperature: Optional[int]=None,
72
+ preset: Optional[str]=None,
73
+ end_sequences: Optional[list]=None,
74
+ stop_sequences: Optional[list]=None,
75
+ k: Optional[int]=None,
76
+ p: Optional[int]=None,
77
+ frequency_penalty: Optional[int]=None,
78
+ presence_penalty: Optional[int]=None,
79
+ return_likelihoods: Optional[str]=None,
80
+ logit_bias: Optional[dict]=None) -> None:
81
+
82
+ locals_ = locals()
83
+ for key, value in locals_.items():
84
+ if key != 'self' and value is not None:
85
+ setattr(self.__class__, key, value)
86
+
87
+ @classmethod
88
+ def get_config(cls):
89
+ return {k: v for k, v in cls.__dict__.items()
90
+ if not k.startswith('__')
91
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
92
+ and v is not None}
93
+
94
+ def validate_environment(api_key):
95
+ headers = {
96
+ "accept": "application/json",
97
+ "content-type": "application/json",
98
+ }
99
+ if api_key:
100
+ headers["Authorization"] = f"Bearer {api_key}"
101
+ return headers
102
+
103
+ def completion(
104
+ model: str,
105
+ messages: list,
106
+ api_base: str,
107
+ model_response: ModelResponse,
108
+ print_verbose: Callable,
109
+ encoding,
110
+ api_key,
111
+ logging_obj,
112
+ optional_params=None,
113
+ litellm_params=None,
114
+ logger_fn=None,
115
+ ):
116
+ headers = validate_environment(api_key)
117
+ completion_url = api_base
118
+ model = model
119
+ prompt = " ".join(message["content"] for message in messages)
120
+
121
+ ## Load Config
122
+ config=litellm.CohereConfig.get_config()
123
+ for k, v in config.items():
124
+ if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
125
+ optional_params[k] = v
126
+
127
+ data = {
128
+ "model": model,
129
+ "prompt": prompt,
130
+ **optional_params,
131
+ }
132
+
133
+ ## LOGGING
134
+ logging_obj.pre_call(
135
+ input=prompt,
136
+ api_key=api_key,
137
+ additional_args={"complete_input_dict": data, "headers": headers, "api_base": completion_url},
138
+ )
139
+ ## COMPLETION CALL
140
+ response = requests.post(
141
+ completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
142
+ )
143
+ ## error handling for cohere calls
144
+ if response.status_code!=200:
145
+ raise CohereError(message=response.text, status_code=response.status_code)
146
+
147
+ if "stream" in optional_params and optional_params["stream"] == True:
148
+ return response.iter_lines()
149
+ else:
150
+ ## LOGGING
151
+ logging_obj.post_call(
152
+ input=prompt,
153
+ api_key=api_key,
154
+ original_response=response.text,
155
+ additional_args={"complete_input_dict": data},
156
+ )
157
+ print_verbose(f"raw model_response: {response.text}")
158
+ ## RESPONSE OBJECT
159
+ completion_response = response.json()
160
+ if "error" in completion_response:
161
+ raise CohereError(
162
+ message=completion_response["error"],
163
+ status_code=response.status_code,
164
+ )
165
+ else:
166
+ try:
167
+ choices_list = []
168
+ for idx, item in enumerate(completion_response["generations"]):
169
+ if len(item["text"]) > 0:
170
+ message_obj = Message(content=item["text"])
171
+ else:
172
+ message_obj = Message(content=None)
173
+ choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
174
+ choices_list.append(choice_obj)
175
+ model_response["choices"] = choices_list
176
+ except Exception as e:
177
+ raise CohereError(message=response.text, status_code=response.status_code)
178
+
179
+ ## CALCULATING USAGE
180
+ prompt_tokens = len(
181
+ encoding.encode(prompt)
182
+ )
183
+ completion_tokens = len(
184
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
185
+ )
186
+
187
+ model_response["created"] = int(time.time())
188
+ model_response["model"] = model
189
+ usage = Usage(
190
+ prompt_tokens=prompt_tokens,
191
+ completion_tokens=completion_tokens,
192
+ total_tokens=prompt_tokens + completion_tokens
193
+ )
194
+ model_response.usage = usage
195
+ return model_response
196
+
197
+ def embedding(
198
+ model: str,
199
+ input: list,
200
+ api_key: Optional[str] = None,
201
+ logging_obj=None,
202
+ model_response=None,
203
+ encoding=None,
204
+ optional_params=None,
205
+ ):
206
+ headers = validate_environment(api_key)
207
+ embed_url = "https://api.cohere.ai/v1/embed"
208
+ model = model
209
+ data = {
210
+ "model": model,
211
+ "texts": input,
212
+ **optional_params
213
+ }
214
+
215
+ if "3" in model and "input_type" not in data:
216
+ # cohere v3 embedding models require input_type, if no input_type is provided, default to "search_document"
217
+ data["input_type"] = "search_document"
218
+
219
+ ## LOGGING
220
+ logging_obj.pre_call(
221
+ input=input,
222
+ api_key=api_key,
223
+ additional_args={"complete_input_dict": data},
224
+ )
225
+ ## COMPLETION CALL
226
+ response = requests.post(
227
+ embed_url, headers=headers, data=json.dumps(data)
228
+ )
229
+ ## LOGGING
230
+ logging_obj.post_call(
231
+ input=input,
232
+ api_key=api_key,
233
+ additional_args={"complete_input_dict": data},
234
+ original_response=response,
235
+ )
236
+ """
237
+ response
238
+ {
239
+ 'object': "list",
240
+ 'data': [
241
+
242
+ ]
243
+ 'model',
244
+ 'usage'
245
+ }
246
+ """
247
+ if response.status_code!=200:
248
+ raise CohereError(message=response.text, status_code=response.status_code)
249
+ embeddings = response.json()['embeddings']
250
+ output_data = []
251
+ for idx, embedding in enumerate(embeddings):
252
+ output_data.append(
253
+ {
254
+ "object": "embedding",
255
+ "index": idx,
256
+ "embedding": embedding
257
+ }
258
+ )
259
+ model_response["object"] = "list"
260
+ model_response["data"] = output_data
261
+ model_response["model"] = model
262
+ input_tokens = 0
263
+ for text in input:
264
+ input_tokens+=len(encoding.encode(text))
265
+
266
+ model_response["usage"] = {
267
+ "prompt_tokens": input_tokens,
268
+ "total_tokens": input_tokens,
269
+ }
270
+ return model_response
271
+
272
+
273
+
litellm/llms/huggingface_llms_metadata/hf_conversational_models.txt ADDED
@@ -0,0 +1,2523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 0xDEADBEA7/DialoGPT-small-rick
2
+ 1Basco/DialoGPT-small-jake
3
+ 2early4coffee/DialoGPT-medium-deadpool
4
+ 2early4coffee/DialoGPT-small-deadpool
5
+ 2gud/DialogGPT-small-Koopsbot
6
+ ABBHISHEK/DialoGPT-small-harrypotter
7
+ AIDynamics/DialoGPT-medium-MentorDealerGuy
8
+ AJ/DialoGPT-small-ricksanchez
9
+ AJ/rick-discord-bot
10
+ AJ/rick-sanchez-bot
11
+ AJ-Dude/DialoGPT-small-harrypotter
12
+ AK270802/DialoGPT-small-harrypotter
13
+ ATGdev/DialoGPT-small-harrypotter
14
+ AVeryRealHuman/DialoGPT-small-TonyStark
15
+ AbhinavSaiTheGreat/DialoGPT-small-harrypotter
16
+ AccurateIsaiah/DialoGPT-small-jefftastic
17
+ AccurateIsaiah/DialoGPT-small-mozark
18
+ AccurateIsaiah/DialoGPT-small-mozarkv2
19
+ AccurateIsaiah/DialoGPT-small-sinclair
20
+ AdharshJolly/HarryPotterBot-Model
21
+ AdrianGzz/DialoGPT-small-harrypotter
22
+ Aero/Tsubomi-Haruno
23
+ AetherIT/DialoGPT-small-Hal
24
+ AiPorter/DialoGPT-small-Back_to_the_future
25
+ Aibox/DialoGPT-small-rick
26
+ Akjder/DialoGPT-small-harrypotter
27
+ AllwynJ/HarryBoy
28
+ AnthonyNelson/DialoGPT-small-ricksanchez
29
+ Apisate/DialoGPT-small-jordan
30
+ ArJakusz/DialoGPT-small-stark
31
+ Aran/DialoGPT-medium-harrypotter
32
+ Aran/DialoGPT-small-harrypotter
33
+ Arcktosh/DialoGPT-small-rick
34
+ AriakimTaiyo/DialoGPT-cultured-Kumiko
35
+ AriakimTaiyo/DialoGPT-medium-Kumiko
36
+ AriakimTaiyo/DialoGPT-revised-Kumiko
37
+ AriakimTaiyo/DialoGPT-small-Kumiko
38
+ AriakimTaiyo/DialoGPT-small-Rikka
39
+ ArtemisZealot/DialoGTP-small-Qkarin
40
+ Aruden/DialoGPT-medium-harrypotterall
41
+ Aspect11/DialoGPT-Medium-LiSBot
42
+ Asuramaru/DialoGPT-small-rintohsaka
43
+ Atchuth/DialoGPT-small-MichaelBot
44
+ Augustvember/WOKKAWOKKA
45
+ Augustvember/WokkaBot3
46
+ Augustvember/test
47
+ Augustvember/wokka2
48
+ Augustvember/wokka4
49
+ Augustvember/wokka5
50
+ Augustvember/wokkabottest2
51
+ AvatarXD/DialoGPT-medium-Blitzo
52
+ Awsaf/DialoGPT-medium-eren
53
+ Awsaf/large-eren
54
+ Axcel/DialoGPT-small-rick
55
+ Ayjayo/DialoGPT-medium-AyjayoAI
56
+ Ayran/DialoGPT-medium-harry-potter-1-through-3
57
+ Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6-e18
58
+ Ayran/DialoGPT-medium-harry-potter-1-through-4-plus-6
59
+ Ayran/DialoGPT-small-gandalf
60
+ Ayran/DialoGPT-small-harry-potter-1-through-3
61
+ Azuris/DialoGPT-medium-envy
62
+ Azuris/DialoGPT-medium-senorita
63
+ Azuris/DialoGPT-small-envy
64
+ BW/TEST
65
+ Backedman/DialoGPT-small-Anika
66
+ BalajiSathesh/DialoGPT-small-harrypotter
67
+ Batsy24/DialoGPT-medium-Twilight_BellaBot
68
+ Batsy24/DialoGPT-small-Twilight_EdBot
69
+ Bee-Garbs/DialoGPT-real-cartman-small
70
+ Biasface/DDDC
71
+ Biasface/DDDC2
72
+ BigTooth/DialoGPT-Megumin
73
+ BigTooth/DialoGPT-small-tohru
74
+ BigTooth/Megumin-v0.2
75
+ BigeS/DialoGPT-small-Rick
76
+ Bimal/my_bot_model
77
+ BinksSachary/DialoGPT-small-shaxx
78
+ BinksSachary/ShaxxBot
79
+ BinksSachary/ShaxxBot2
80
+ BlightZz/DialoGPT-medium-Kurisu
81
+ BlightZz/MakiseKurisu
82
+ BlueGamerBeast/DialoGPT-small-Morgana
83
+ BotterHax/DialoGPT-small-harrypotter
84
+ Broadus20/DialoGPT-small-joshua
85
+ BrunoNogueira/DialoGPT-kungfupanda
86
+ Brykee/DialoGPT-medium-Morty
87
+ Bubb-les/DisloGPT-medium-HarryPotter
88
+ Camzure/MaamiBot-test
89
+ Canadiancaleb/DialoGPT-small-jesse
90
+ Canadiancaleb/DialoGPT-small-walter
91
+ CasualHomie/DialoGPT-small-harrypotter
92
+ Chae/botman
93
+ Chakita/Friends
94
+ Chalponkey/DialoGPT-small-Barry
95
+ ChaseBread/DialoGPT-small-harrypotter
96
+ Chiuchiyin/DialoGPT-small-Donald
97
+ ChrisVCB/DialoGPT-medium-cmjs
98
+ ChrisVCB/DialoGPT-medium-ej
99
+ Chuah/DialoGPT-small-harrypotter
100
+ ChukSamuels/DialoGPT-small-Dr.FauciBot
101
+ Ciruzzo/DialoGPT-small-harrypotter
102
+ ClaudeCOULOMBE/RickBot
103
+ Cloudy/DialoGPT-CJ-large
104
+ ClydeWasTaken/DialoGPT-small-joshua
105
+ CodeDanCode/CartmenBot
106
+ CodeDanCode/SP-KyleBot
107
+ CoderBoy432/DialoGPT-small-harrypotter
108
+ CoderEFE/DialoGPT-marxbot
109
+ Connor/DialoGPT-small-rick
110
+ Connorvr/BrightBot-small
111
+ CopymySkill/DialoGPT-medium-atakan
112
+ Corvus/DialoGPT-medium-CaptainPrice-Extended
113
+ Corvus/DialoGPT-medium-CaptainPrice
114
+ Coyotl/DialoGPT-test-last-arthurmorgan
115
+ Coyotl/DialoGPT-test2-arthurmorgan
116
+ Coyotl/DialoGPT-test3-arthurmorgan
117
+ CracklesCreeper/Piglin-Talks-Harry-Potter
118
+ Cryptikdw/DialoGPT-small-rick
119
+ Cthyllax/DialoGPT-medium-PaladinDanse
120
+ CurtisBowser/DialoGPT-medium-sora-two
121
+ CurtisBowser/DialoGPT-medium-sora
122
+ CurtisBowser/DialoGPT-small-sora
123
+ CyberMuffin/DialoGPT-small-ChandlerBot
124
+ DARKVIP3R/DialoGPT-medium-Anakin
125
+ Daivakai/DialoGPT-small-saitama
126
+ Dawit/DialogGPT-small-ironman
127
+ Daymarebait/Discord_BOT_RICK
128
+ DecafNosebleed/DialoGPT-small-ScaraBot
129
+ Denny29/DialoGPT-medium-asunayuuki
130
+ Devid/DialoGPT-small-Miku
131
+ Dilmk2/DialoGPT-small-harrypotter
132
+ Dimedrolza/DialoGPT-small-cyberpunk
133
+ DingleyMaillotUrgell/homer-bot
134
+ Doiman/DialoGPT-medium-harrypotter
135
+ DongHai/DialoGPT-small-rick
136
+ Doquey/DialoGPT-small-Luisbot1
137
+ Doquey/DialoGPT-small-Michaelbot
138
+ Doxophobia/DialoGPT-medium-celeste
139
+ Dragoniod1596/DialoGPT-small-Legacies
140
+ Dreyzin/DialoGPT-medium-avatar
141
+ DueLinx0402/DialoGPT-small-harrypotter
142
+ Duugu/jakebot3000
143
+ Dyzi/DialoGPT-small-landcheese
144
+ EEE/DialoGPT-medium-brooke
145
+ EEE/DialoGPT-small-aang
146
+ EEE/DialoGPT-small-yoda
147
+ ESPersonnel/DialoGPT-small-got
148
+ Eagle3ye/DialoGPT-small-PeppaPig
149
+ Elzen7/DialoGPT-medium-harrypotter
150
+ Emi2160/DialoGPT-small-Neku
151
+ EmileAjar/DialoGPT-small-harrypotter
152
+ EmileAjar/DialoGPT-small-peppapig
153
+ Erikaka/DialoGPT-small-loki
154
+ EstoyDePaso/DialoGPT-small-harrypotter
155
+ EuropeanTurtle/DialoGPT-small-mrcobb
156
+ ExEngineer/DialoGPT-medium-jdt
157
+ Exilon/DialoGPT-large-quirk
158
+ EzioDD/house
159
+ FFF000/dialogpt-FFF
160
+ FangLee/DialoGPT-small-Kirito
161
+ Filosofas/DialoGPT-medium-PALPATINE
162
+ Flampt/DialoGPT-medium-Sheldon
163
+ For/sheldonbot
164
+ FosterPatch/GoT-test
165
+ Fu10k/DialoGPT-medium-Rick
166
+ GabbyDaBUNBUN/DialoGPT-medium-PinkiePie
167
+ Galaxy/DialoGPT-small-hermoine
168
+ GamerMan02/DialoGPT-medium-gamerbot
169
+ Gappy/DialoGPT-small-Zhongli
170
+ Geezy/DialoGPT-small-guy
171
+ GenDelport/DialoGPT-small-harrypotter
172
+ Gowtham25/DialoGPT-small-jackie
173
+ Gregor-Davies/DialoGPT-small-rick
174
+ Greysan/DialoGPT-medium-TOH
175
+ Guard-SK/DialoGPT-medium-ricksanchez
176
+ Guard-SK/DialoGPT-small-ricksanchez
177
+ GunjanPantha/DialoGPT-small-gameofthrones
178
+ Guy0/DialoGPT-small-Batmanbotty
179
+ HAttORi/DialoGPT-Medium-zerotwo
180
+ HackyHackyMan/DialoGPT-small-harrypotter
181
+ Hadron/DialoGPT-medium-nino
182
+ Hallzy/Peterbot
183
+ Hamas/DialoGPT-large-jake
184
+ Hamas/DialoGPT-large-jake2
185
+ Hamas/DialoGPT-large-jake3
186
+ Hamas/DialoGPT-large-jake4
187
+ Hamhams/DialoGPT-small-rick
188
+ HansAnonymous/DialoGPT-medium-rick
189
+ HansAnonymous/DialoGPT-small-shrek
190
+ HarryPuttar/HarryPotterDC
191
+ Harshal6927/Jack_Sparrow_GPT
192
+ Harshal6927/Tony_Stark_GPT
193
+ Havokx/DialoGPT-small-Rick
194
+ Heldhy/DialoGPT-small-tony
195
+ Heldhy/testingAgain
196
+ MagnusChase7/DialoGPT-medium-harrypotter
197
+ Htenn/DialoGPT-small-spongebob
198
+ Htenn/DialoGPT-small-spongebobv2
199
+ HueJanus/DialoGPT-small-ricksanchez
200
+ HypNyx/DialoGPT-small-DwightBot
201
+ HypNyx/DialoGPT-small-Thanos
202
+ HypedKid/PeterBot
203
+ ILoveThatLady/DialoGPT-small-rickandmorty
204
+ ITNODove/DialoGPT-medium-cyberbones
205
+ Icemiser/chat-test
206
+ Ilyabarigou/Genesis-harrybotter
207
+ ImAPizza/DialoGPT-medium-albert
208
+ ImAPizza/DialoGPT-medium-alberttwo
209
+ Invincible/Chat_bot-Harrypotter-medium
210
+ Invincible/Chat_bot-Harrypotter-small
211
+ Invincible/DialoGPT-medium-harryPotter
212
+ Istiaque190515/Sherlock
213
+ Istiaque190515/harry_bot_discord
214
+ Istiaque190515/harry_potter
215
+ ItoYagura/DialoGPT-medium-tohru
216
+ ItzJorinoPlays/DialoGPT-small-PickleRick
217
+ J-Chiang/DialoGPT-small-thor
218
+ JDS22/DialoGPT-medium-HarryPotterBot
219
+ Jedi33/tonystarkAI
220
+ Jeffrey/DialoGPT-small-Jeffrey
221
+ JimmyHodl/DialoGPT-medium
222
+ Jllama/dialoGPT-small-Joshua-test
223
+ Jonesy/DialoGPT-medium_Barney
224
+ Jonesy/FG_OLD
225
+ Jonesy/DialoGPT-small_JT
226
+ Julianqll/DialoGPT-small-finalmorty
227
+ Julianqll/DialoGPT-small-ricksanchez
228
+ KAIHATSU/DialoGPT-small-rick
229
+ KENNETHFOO/DialoGPT-medium-harrypotter
230
+ KOSTAS/DialoGPT-small-Cleverbot
231
+ KP2500/KPBot
232
+ Kai0857/DialoGPT-small-harrypotter
233
+ Kail91/DialoGPT-small-PeraltaBot
234
+ Kairu/DialoGPT-small-Rick
235
+ Kairu/RICKBOT
236
+ KakoSi/Smolmm3
237
+ KakoSi/opaazzi
238
+ Kaledmgo/DialoGPT-small-donajulia
239
+ Kargan/DialoGPT-small-randombot
240
+ KaydenSou/Joshua
241
+ Keen/DialoGPT-small-potter
242
+ KekLord/DialoGPT-small-rick3
243
+ Keqing/Keqing-Siesta
244
+ Keqipig/DialoGPT-small-spamton
245
+ KhanAdeeb/model-tony-stark
246
+ KingCodeSquid/Octavian
247
+ KingCodeSquid/Octavian2
248
+ Kirili4ik/ruDialoGpt3-medium-finetuned-telegram
249
+ KnutZuidema/DialoGPT-small-morty
250
+ Konggate/DialoGPT-small-harrypotter
251
+ Koriyy/DialoGPT-medium-gf
252
+ Koro/DialoGPT-medium-rickandmorty
253
+ Koro/DialoGPT-small-rickandmorty
254
+ KringleClaus/Dialog-santa
255
+ KrispyIChris/DialoGPT-small-harrypotter
256
+ Kryptone/Burobot
257
+ Kryptone/RinAI
258
+ Kryptone/monikAI-Unstable
259
+ Kryptone/monikAI
260
+ Kshaunish/DialoGPT-small-rick
261
+ Kush/DialoGPT-small-harrypotter
262
+ LARACHNIDE/DialogGPT-small-sw
263
+ LactoseLegend/DialoGPT-small-Rick
264
+ Laezor/DialoGPT-small-witcher1
265
+ Laezor/DialoGPT-small-yakuza_0
266
+ LaiJY/DialoGPTChatbot
267
+ Laptop/DialoGPT-small-gandalf
268
+ Lenza/DialoGPT-medium-Kobayashi
269
+ Leonel/DialoGPT-small-chandler
270
+ Leostronkest/DialoGPT-small-michael
271
+ Leostronkest/DialoGPT
272
+ Leviii03/Dialogpt-small-Jake99
273
+ Lizardon/Peterbot
274
+ Lovery/Aqua
275
+ Lucdi90/DialoGPT-medium-XiaoBot
276
+ LuckyWill/DialoGPT-small-JakeBot
277
+ Lurka/DialoGPT-medium-isseibot
278
+ Lurka/DialoGPT-medium-kon
279
+ Luxiere/DialoGPT-medium-tyrion
280
+ MAUtastic/DialoGPT-medium-RickandMortyBot
281
+ MCUxDaredevil/DialoGPT-small-rick
282
+ MS366/DialoGPT-small-vision
283
+ MadhanKumar/DialoGPT-small-HarryPotter
284
+ MadhanKumar/HarryPotter-Bot
285
+ MagmaCubes1133/DialoGPT-large-rick
286
+ Mandy/DialoGPT-small-Mikasa
287
+ Manthan/DialoGPT-small-harrypotter
288
+ Mara/DialoGPT-medium-harrypotter
289
+ MathiasVS/DialoGPT-small-RickAndMorty
290
+ MaxW0748/DialoGPT-small-Rick
291
+ MayankGupta/DialoGPT-small-harrypotter
292
+ MichaelTheLearner/DialoGPT-medium-harry
293
+ Midhunkrishna/DialoGPT-small-bjk
294
+ Mierln/SmartHarry
295
+ MightyCoderX/DialoGPT-medium-EdwardElric
296
+ ModzabazeR/small-okaberintaro
297
+ Mohsin272/DialoGPT-medium-harrypotter
298
+ Mona/DialoGPT-small-harrypotter
299
+ MoonlitEtherna/DialoGPT-small-Nyivae
300
+ MrDuckerino/DialoGPT-medium-Rick
301
+ MrE/DialoGPT-medium-SARGE
302
+ MrE/DialoGPT-medium-SARGER1
303
+ MrE/DialoGPT-medium-SARGER3
304
+ MrGentle/DeltaModel-genius1
305
+ MrZ/DialoGPT-small-Rick
306
+ Mythiie/DialoGPT-small-Modeus
307
+ N8Daawg/chat_bot
308
+ NASABOI/MachineLearningAI
309
+ nabarun/DialoGPT-small-joshua
310
+ NamPE/DialoGPT-medium-Aqua-konosuba
311
+ NamPE/DialoGPT-medium-Takanashi-Rikka
312
+ NamPE/DialoGPT-small-satouhina
313
+ NanniKirby/DialoGPT-medium-bapi
314
+ NanniKirby/bapismall
315
+ Naturealbe/DialoGPT-small-harrypotter-2
316
+ Naturealbe/DialoGPT-small-harrypotter
317
+ Navigator/DialoGPT-medium-martymcfly
318
+ Navya2608/DialoGPT-medium-chandler
319
+ Navya2608/DialoGPT-medium-rachel
320
+ Navya2608/DialoGPT-small-tonystarkscript
321
+ Necrozma/harrypotterbot
322
+ Nekoism/Zhongli-Beta
323
+ NibrasShami/DialopGPT-small-HarryPotter
324
+ NickCavarretta/DialoGPT-small-laffy
325
+ Nihwy/DialoSqui
326
+ NikhilKrishna/DialoGPT-medium-harrypotter
327
+ Ninja5000/DialoGPT-medium-HarryPotter
328
+ Ninja5000/DialoGPT-medium-TWEWYJoshua
329
+ Niphredil/DialoGPT-small-lotr
330
+ Nisarg2701/DialoGPT-medium-Rick
331
+ NoLawz/DialoGPT-medium-hagrid
332
+ NoLawz/DialoGPT-medium-harrypotter
333
+ NoLawz/DialoGPT-medium-spongebob
334
+ Nova/DialoGPT-medium-Lelouch
335
+ NovaChrono/twervy
336
+ Obesitycart/ChatBot
337
+ Obscurity/DialoGPT-Medium-707
338
+ Oji/DialoGPT-small-Rick
339
+ Optimal/Harry
340
+ P4RZ1V4L/DialoGPT-Medium-Tony
341
+ PVAbhiram2003/DialoGPT-medium-RickandMorty
342
+ Paradocx/Dialogpt-mid-hpai
343
+ Pensador777critico/DialoGPT-small-RickandMorty
344
+ PhilipTheGreat/DiabloGPT-small-Traveller
345
+ PinoCorgi/DialoGPT-small-Shrek1
346
+ Piumi/DialogGPT-small-harrypotter
347
+ Plencers/DialoGPT-small-homer
348
+ Poly-Pixel/shrek-medium-full
349
+ Poly-Pixel/shrek-medium
350
+ Poly-Pixel/shrek-test-small
351
+ Pupihed/DialoGPT-small-shrek
352
+ PurpleJacketGuy/My_Jarvis
353
+ PurpleJacketGuy/My_Jarvis_2
354
+ RAhul03/DialoGPT-small-harrypotter
355
+ REAP3R/Chat-bot
356
+ REZERO/DialoGPT-medium-saitama
357
+ RTM/ChatBot
358
+ RTM/Lucky
359
+ RTurk/DialoGPT-small-TIMBOT
360
+ Radicalkiddo/DialoGPT-small-Radical
361
+ Rashid11/DialoGPT-small-rick
362
+ Rathod/DialoGPT-small-harrypotter
363
+ Redolid/DialoGPT-small-Rick
364
+ Rei/DialoGPT-medium-kurisu
365
+ RifsxD/DialoGPT-medium-raifu
366
+ RishabhRawatt/DialoGPT-small-Rickmorty
367
+ RishabhRawatt/DialoGPT-small-kela
368
+ Ritchie/DialoGPT-small-Rickandmorty
369
+ RizqFarIDN/DialoGPT-medium-harrypotter
370
+ RizqFarIDN/DialoGPT-small-harrypotter
371
+ RobinMari/DialoGPT-small-mikoto
372
+ Royce23/DialoGPT-small-almas
373
+ Rush11/DialoGPT-small-HarryPotter
374
+ Ryanar/DialoGPT-medium-Zelda
375
+ Ryukie/DialoGPT-small-Rick
376
+ S34NtheGuy/DialoGPT-medium-Glass_Of_Water
377
+ S34NtheGuy/DialoGPT-medium-Mona
378
+ S34NtheGuy/DialoGPT-small-Harry282
379
+ S34NtheGuy/DialoGPT-small-MJOLNIR_Soul
380
+ S34NtheGuy/DialoGPT-small-cursedryno
381
+ S34NtheGuy/DialoGPT-small-pikamew362
382
+ S34NtheGuy/DialoGPT-small-wetterlettuce
383
+ SJSui/RickBot
384
+ SPGT/LiveSafe-DialoGPT
385
+ SaffronIce/DialoGPT-medium-Jett
386
+ Salma-2/DialoGPT-small-harrypotter
387
+ Sammigooof/Peterbot
388
+ SarahhhUwU/DialoGPT-small-ally
389
+ Sarumomo/DialoGPT-small-test
390
+ Saviour/ChandlerBot
391
+ Saz/DialoGPT-small-paimon
392
+ Saz/DialoGPT-small-saz
393
+ Science-geek32/DialoGPT-small-doctor
394
+ Science-geek32/DialoGPT-small-doctor2.0
395
+ Scoops/SandalBot
396
+ ScottaStrong/DialogGPT-medium-Scott
397
+ ScottaStrong/DialogGPT-medium-joshua
398
+ ScottaStrong/DialogGPT-small-Scott
399
+ ScottaStrong/DialogGPT-small-joshua
400
+ Sebastianthecrab/DialoGPT-small-melchior
401
+ Sedge/DialoGPT-small-Sedge
402
+ Shakaw/DialoGPT-small-spongebot
403
+ ShayoGun/DialoGPT-small-shayo
404
+ Sheel/DialoGPT-small-harrypotter
405
+ Sheerwin02/DialoGPT-medium-mikasa
406
+ Sheerwin02/DialoGPT-small-isla
407
+ Sherman/DialoGPT-medium-joey
408
+ Shike/DialoGPT_medium_harrypotter
409
+ Shinx/DialoGPT-medium-myheroacademia
410
+ NaturesDisaster/DialoGPT-large-Neku
411
+ NaturesDisaster/DialoGPT-small-Neku
412
+ ShiroNeko/DialoGPT-small-rick
413
+ Shubham-Kumar-DTU/DialoGPT-small-goku
414
+ SilentMyuth/sarcastic-model
415
+ SilentMyuth/stableben
416
+ SirBastianXVII/DialoGPT-small-TVD
417
+ Sired/DialoGPT-small-trumpbot
418
+ Siyris/DialoGPT-medium-SIY
419
+ Siyris/SIY
420
+ Skywhy/DialoGPT-medium-Churchyy
421
+ Snaky/StupidEdwin
422
+ Soapsy/DialoGPT-mid-cartman
423
+ SonMooSans/DialoGPT-small-joshua
424
+ SonMooSans/test
425
+ Sora4762/DialoGPT-small-naruto
426
+ Sora4762/DialoGPT-small-naruto1.1
427
+ Soumyajit1008/DialoGPT-small-harryPotterssen
428
+ SpacyGalaxy/DialoGPT-medium-Gandalf
429
+ Spectrox/emmybot
430
+ Spirax/DialoGPT-medium-sheldon
431
+ Spoon/DialoGPT-small-engineer
432
+ Stabley/DialoGPT-small-evelynn
433
+ Stevo/DiagloGPT-medium-spamton
434
+ Stoned-Code/DioloGPT-large-Rick-SC-420
435
+ Sunnydx/BillCipherBot
436
+ TTYU/DialoGPT-small-trump
437
+ TVLG/DialoGPT-small-Iroh-Bot
438
+ Taramiko/DialoGPT-small-hoshiyo_kojima
439
+ Taramiko/Hoshiyo_Kojima
440
+ Tejasvb/DialoGPT-small-rick
441
+ Tejasvb/DialogGPT-small-rick
442
+ ThatSkyFox/DialoGPT-medium-joshua
443
+ ThatSkyFox/DialoGPT-small-joshua
444
+ The-Programmer-With-Cool-Pens/TifaBotAIPackage
445
+ TheCatsMoo/DialoGGPT-small-joshua
446
+ TheDiamondKing/DialoGPT-small-harrypotter
447
+ ThePeachOx/DialoGPT-small-harry
448
+ TheReverendWes/DialoGPT-small-rick
449
+ TheTUFGuy/HermioneChatBot
450
+ Thejas/DialoGPT-small-Stewei
451
+ Thejas/DialoGPT-small-elon
452
+ ThoracicCosine/DialoGPT-small-harrypotter
453
+ Tidum/DialoGPT-large-Michael
454
+ Toadally/DialoGPT-small-david_mast
455
+ Tofu05/DialoGPT-large-boon2
456
+ Tofu05/DialoGPT-med-boon3
457
+ TofuBoy/DialoGPT-medium-Yubin2
458
+ TofuBoy/DialoGPT-medium-boon
459
+ Tr1ex/DialoGPT-small-rick
460
+ TrebleJeff/DialoGPT-small-Michael
461
+ TrimPeachu/Deadpool
462
+ Trixzy/rickai-v1
463
+ Tropics/DialoGPT-small-peppa
464
+ UKJ5/DialoGPT-small-harrypotter
465
+ Username1/Mourinhio-medium
466
+ Username1/Mourinho
467
+ Username1/Wenger
468
+ VLRevolution/DialogGPT-small-GGODMODEL
469
+ VMET/DialoGPT-small-dumbassbot
470
+ VaguelyCynical/DialoGPT-small-RickSanchez
471
+ Vampiro/DialoGPT-small-dante_b
472
+ Vampiro/DialoGPT-small-dante_c
473
+ VariableZee/DialoGPT-small-ivylia03
474
+ Verge/Peterbot
475
+ VincentButterfield/DialoGPT-small-harrypotter
476
+ VishalArun/DialoGPT-medium-harrypotter
477
+ Vitafeu/DialoGPT-medium-ricksanchez
478
+ VulcanBin/DialoGPT-small-cortana
479
+ WarrenK-Design/DialoGPT-small-Rick
480
+ Wessel/DiabloGPT-medium-harrypotter
481
+ White/white-bot
482
+ Whitez/DialoGPT-small-twety
483
+ Wise/DialogGPT-small-JC
484
+ WoutN2001/james3
485
+ WurmWillem/DialoGPT-medium-RickandMorty3
486
+ Xeouz/Ultron-Small
487
+ XuguangAi/DialoGPT-small-Harry
488
+ XuguangAi/DialoGPT-small-Leslie
489
+ XuguangAi/DialoGPT-small-Rick
490
+ Yankee/test1234
491
+ Zane/Ricky
492
+ Zane/Ricky3
493
+ Zeer0/DialoGPT-small-ZerO
494
+ Zen1/Derekbot
495
+ Zen1/test1
496
+ Zeph/DialoGPT-small-rick
497
+ Zephaus/Chromrepo
498
+ Zixtrauce/BDBot
499
+ Zixtrauce/BDBot4Epoch
500
+ Zixtrauce/BaekBot
501
+ Zixtrauce/BrandonBot
502
+ Zixtrauce/BrandonBot2
503
+ Zixtrauce/JohnBot
504
+ Zixtrauce/SelfAwareness
505
+ Zuha/DialoGPT-small-gandalf
506
+ a01709042/DialoGPT-medium
507
+ aadilhassan/Chandlerbot
508
+ aashutosh2102/DialoGPT-smalll-harrypotter
509
+ abhiramtirumala/DialoGPT-sarcastic
510
+ abhisht/DialoGPT-medium-Emilybot
511
+ abjbpi/DS_small
512
+ abjbpi/Dwight_Schrute
513
+ aced/DialoGPT-medium-3PO
514
+ adviksinghania/DialoGPT-medium-rick
515
+ af1tang/personaGPT
516
+ aggb/DialogGPT-small-AGGB-B
517
+ aimiekhe/yummv1
518
+ aimiekhe/yummv2
519
+ aishanisingh/DiagloGPT-small-michaelscott
520
+ aishanisingh/DialoGPT-small-harrypotter
521
+ akaushik1/DialoGPT-small-kaiser
522
+ akhooli/personachat-arabic
523
+ alankar/DialoGPT-small-rick
524
+ alipsezzar/DialoGPT-medium-harrypotter
525
+ alistair7/bbt-diagpt2-model
526
+ aluserhuggingface/DialoGPT-small-harrypotter
527
+ alvinkobe/DialoGPT-medium-steve_biko
528
+ alvinkobe/DialoGPT-small-KST
529
+ andikarachman/DialoGPT-small-sheldon
530
+ anduush/DialoGPT-small-Rick
531
+ ange/DialoGPT-medium-Monke
532
+ ankimt01/DialoGPT-small-anch
533
+ ann101020/le2sbot-hp
534
+ anshengli2/DialogGPT-small-Bot
535
+ anweasha/DialoGPT-small-Chandler
536
+ anweasha/DialoGPT-small-Jake
537
+ aplnestrella/Aladdin-Bot
538
+ arampacha/DialoGPT-medium-simpsons
539
+ archmagos/HourAI
540
+ ardatasc/miniMe-version1
541
+ arifbhrn/DialogGPT-small-Rickk
542
+ arnav7633/DialoGPT-medium-tony_stark
543
+ aryanbhosale/DialoGPT-medium-harrypotter
544
+ asad/DialoGPT-small-harryporter_bot
545
+ ashwinchandran13/DialoGPT-small-harrypotter
546
+ astrobreazy/DialoGPT-small-harrypotter
547
+ atkh6673/DialoGPT-small-harrypotter
548
+ atkh6673/DialoGPT-small-trump
549
+ atomsspawn/DialoGPT-small-dumbledore
550
+ augustojaba/DialoGPT-small-harrypotter
551
+ avinashshrangee/DialoGPT-small-Ricky
552
+ awvik360/DialoGPT-medium-plemons
553
+ awvik360/DialoGPT-medium-plemons2
554
+ awvik360/DialoGPT-small-plemons
555
+ aydin/DialoGPT-medium-michael
556
+ ayush19/rick-sanchez
557
+ b0shakk/DialoGPT-small-Ragnar
558
+ balta/DialoGPT-small-TestBot
559
+ banden/DialoGPT-medium-RickBot
560
+ banden/DialoGPT-small-LokiBot
561
+ beatajackowska/DialoGPT-RickBot
562
+ benajtil/DialoGPT-small-Daddyben
563
+ benajtil/DialoGPT-small-RickAndMortyScripts
564
+ benjaminbeilharz/dialoGPT-small-empatheticdialogues-generation
565
+ benmrtnz27/DialoGPT-small-misato
566
+ bensuydam/CartmanBot
567
+ bestminerevah/DialoGPT-small-thetenthdoctor
568
+ bhaden94/LokiDiscordBot-medium
569
+ bhavya689/DialoGPT-large-chandler
570
+ bleachybrain/DialoGPT-med-ss
571
+ bmdonnell/DialoGPT-medium-harrypotter
572
+ bonebambi/DialoGPT-small-ThakirClone
573
+ bookemdan/DialoGPT-small-harrypotter
574
+ boran/berkbot
575
+ boydster/DialoGPT-small-gollum
576
+ brimeggi/testbot2
577
+ brokentx/newbrokiev2
578
+ bspans/DialoGPT-small-yoda
579
+ byeongal/Ko-DialoGPT
580
+ bypequeno/DialoGPT-small-michaelscott
581
+ caps1994/DialoGPT-small-chrisbot-caps1994
582
+ caps1994/DialoGPT-small-chrisbot
583
+ caps1994/DialoGPT-small-harrypotter-caps1994
584
+ cartyparty/DialoGPT-small-harrypotter
585
+ cartyparty/DialoGPT-small-iteration1
586
+ cartyparty/DialoGPT-small-nerdherd
587
+ cedpsam/chatbot_fr
588
+ centon21/DialoGPT-small-harrypotter
589
+ chaitrabhat/DialoGPT-small-rick
590
+ chamindu/DialoGPT-medium-hermione
591
+ chamodkarunasena/DialoGPT-medium-sokka
592
+ chan030609/DialoGPT-medium-JAB
593
+ chan030609/DialoGPT-small-JAB
594
+ chellver24/DialoGPT-medium-chizuru_ichinose
595
+ chip/DialoGPT-small-chizuru
596
+ thu-coai/blenderbot-400M-esconv
597
+ clairesb/kindness_bot
598
+ clairesb/kindness_bot_repo
599
+ clancystudios/DialoGPT-medium-Morty
600
+ clayfox/DialoGPT-medium-Hiccup
601
+ clayfox/DialoGPT-small-Hiccup
602
+ cocoaclef/DialoGPT-small-kohaku
603
+ codealtgeek/DiabloGPT-medium-rickmorty
604
+ colochoplay/DialoGTP-small-harrypotter
605
+ conniezyj/DialoGPT-small-snape
606
+ cookirei/DialoGPT-medium-Joreyar
607
+ cosmic/DialoGPT-Rick
608
+ cosmicray001/prod-harry
609
+ cosmicray001/small-harry
610
+ crystalgate/DialoGPT-small-rick
611
+ cumtowndiscord/DialoGPT-small-joshua
612
+ cutiebunny639/DialoGPT-small-harry
613
+ d4rk/harry
614
+ danildany/DialoGPT-small-MichaelScott
615
+ danny481/DialoGPT-small-datnguyenchatbot
616
+ danny481/DialoGPT-small-harrypotter
617
+ danny481/Final_ChatBot
618
+ darkzek/chickenbot-jon-snow
619
+ darthboii/DialoGPT-small-PickleRick
620
+ darthboii/DialoGPT-small-Rick
621
+ dats/DialoGPT-small-harrypotter
622
+ dattam/DialoGPT-medium-TonyStarkBot
623
+ dead69/GPT-small-yoda
624
+ deepparag/Aeona
625
+ deepparag/DumBot-Beta
626
+ deepparag/DumBot
627
+ delvan/DialoGPT-medium-DwightV1
628
+ df4rfrrf/DialoGPT-medium-Aerith
629
+ dhanushlnaik/amySan
630
+ disdamoe/DialoGPT-small-moe
631
+ disdamoe/TheGreatManipulator
632
+ disdamoe/TheManipulator
633
+ divi/Peterbot
634
+ dk16gaming/DialoGPT-small-HarryPotter
635
+ dkminer81/Tromm
636
+ dreamline2/DialoGPT-small-joshua-demo
637
+ dukeme/DialoGPT-small-RDBotv1
638
+ eclare/DialoGPT-small-SCHAEFER
639
+ educhav/Austin-DialoGPT-small
640
+ educhav/Elijah-DialoGPT-small
641
+ educhav/J-DialoGPT-small
642
+ educhav/Sam-DialoGPT-small
643
+ eklrivera/DialoGPT-small-harrypotter
644
+ eldritch-axolotl/Rick
645
+ ericklasco/DialoGPT-small-erickHarryPotter
646
+ ericzhou/DialoGPT-Medium-Rick
647
+ ericzhou/DialoGPT-Medium-Rick_v2
648
+ ericzhou/DialoGPT-medium-elon
649
+ ericzhou/tsundere_v1
650
+ estehpanas/pascalbot
651
+ ethzhou/jooby
652
+ ethzhou/joobyChat
653
+ ethzhou/newJooby
654
+ f00d4tehg0dz/Peppa
655
+ f00d4tehg0dz/Yoda
656
+ facebook/blenderbot-1B-distill
657
+ facebook/blenderbot-3B
658
+ facebook/blenderbot-400M-distill
659
+ facebook/blenderbot-90M
660
+ facebook/blenderbot_small-90M
661
+ faketermz/DialoGPT
662
+ fatemaMeem98/DialoGPT-medium-HermioneGrangerBot
663
+ felinecity/DioloGPT-small-KaeyaBot
664
+ felinecity/DioloGPT-small-KaeyaBot2
665
+ felinecity/DioloGPT-small-LisaBot
666
+ felinecity/ScaraBot
667
+ fibruh/DialoGPT-small-harrypotter
668
+ flakje/DialoGPT-small-Marty
669
+ flooptherocket/DialogGPT-small-rick
670
+ ftnvir/DialoGPT-medium-bullyMaguire
671
+ gabtan99/dialogpt-tagalog-medium-10
672
+ gabtan99/dialogpt-tagalog-medium-20
673
+ gabtan99/dialogpt-tagalog-medium-30
674
+ gabtan99/dialogpt-tagalog-medium
675
+ gfdream/dialogpt-small-familyguy
676
+ gfdream/dialogpt-small-harrypotter
677
+ ghhostboy/DialoGPT-medium-connorDBH3-1
678
+ ghhostboy/DialoGPT-medium-connorDBH3-21
679
+ gizmo-dev/DialoGPT-small-jake
680
+ gorkemgoknar/gpt2chatbotenglish
681
+ grayson124/chatbotwaifu
682
+ grounddominator/DialoGPT-lar-Rick
683
+ gusintheshell/DialoGPT-small-rickbot
684
+ gwima/ryan-sackmott
685
+ hama/Doctor_Bot
686
+ hama/Harry_Bot
687
+ hama/barney_bot
688
+ hama/me0.01
689
+ hama/rick_bot
690
+ heabeoun/DiabloGPT-small-nuon-conv
691
+ henryoce/DialoGPT-small-rick-and-morty
692
+ hervetusse/DialogGPT-small-harrypotter
693
+ hireddivas/DialoGPT-small-ray
694
+ hireddivas/DialoGPT-small-scully
695
+ hireddivas/dialoGPT-small-mulder
696
+ hireddivas/dialoGPT-small-phil
697
+ hireddivas/dialoGPT-small-sonic
698
+ honguyenminh/old-zhongli
699
+ houssaineamzil/DialoGPT-small-joey
700
+ hrv/DialoGPT-small-rick-morty
701
+ hyunwoongko/blenderbot-9B
702
+ hyunwoongko/reddit-3B
703
+ hyunwoongko/reddit-9B
704
+ iamalpharius/GPT-Small-BenderBot
705
+ ianc89/hagrid
706
+ ignkai/DialoGPT-medium-spider-man-updated
707
+ ilikeapple12/DialoGPT-small-Phos
708
+ imran2part/DialogGPT-small-Doctor
709
+ imrit1999/DialoGPT-small-MCU
710
+ myynirew/DialoGPT-medium-ettengiv
711
+ myynirew/DialoGPT-medium-leirbag
712
+ myynirew/DialoGPT-small-awazimuruk
713
+ ionite/DialoGPT-large-Sh0rtiAI-v2
714
+ ionite/DialoGPT-medium-IoniteAI
715
+ ionite/DialoGPT-medium-McKayAI-v2
716
+ ionite/DialoGPT-medium-McKayAI
717
+ ionite/DialoGPT-medium-Sh0rtiAI
718
+ ionite/DialoGPT-medium-mohnjilesAI
719
+ ionite/DialoGPT-medium-orangeAI
720
+ ironman123/DialoGPT-small-harrypotter
721
+ ishraaqparvez/DialoGPT-small-harrypotter
722
+ jackky46/DialoGPT-medium-got
723
+ jahz/DialoGPT-medium-FF8
724
+ jalensmh/DialoGPT-medium-jalenbot
725
+ jalensmh/DialoGPT-small-exophoria
726
+ jamestop00/DialoGPT-spike-medium
727
+ jasper/DialoGPT-large-homersimpson
728
+ jchen/DialoGPT-evan
729
+ jeanlks/DialogGPT-small-gayvid
730
+ jeanlks/DialogGPT-small-pato
731
+ jfhr1999/CharacterTest
732
+ jogp10/DialoGPT-medium-arya
733
+ jollmimmim/DialoGPT-small-monkeydluffy
734
+ jordanhagan/DialoGPT-medium-NegaNetizen
735
+ josephmagnayon/DialoGPT-medium-Alfred
736
+ josepjulia/RepoHumanChatBot
737
+ josh8/DialoGPT-medium-josh
738
+ josh8/DialoGPT-small-josh
739
+ jpsxlr8/DialoGPT-small-harrypotter
740
+ jth1903/DialoGPT-small-rick
741
+ julianolf/DialoGPT-small-harrypotter
742
+ kaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaot1k/DialoGPT-small-Wanda
743
+ kagennotsuki/DialoGPT-medium-radion
744
+ kche0138/DialoGPT-medium-DIO
745
+ kingabzpro/DialoGPT-small-Rick-Bot
746
+ kipiiler/Rickbot
747
+ knightbat/harry-potter
748
+ kripanshudixit/DialoGPT-small-phoenix
749
+ kris/DialoGPT-small-spock
750
+ kris/DialoGPT-small-spock3
751
+ kris/DialoGPT-small-spock4
752
+ kris/DialoGPT-small-spock5
753
+ kshitiz/testing-bot-repo
754
+ kunalbhargava/DialoGPT-small-housebot
755
+ kvothe28/DiabloGPT-small-Rick
756
+ l41n/c3rbs
757
+ lain2/Peterbot
758
+ lanejm/DialoGPT-small-hagrid
759
+ lapacc33/DialoGPT-medium-rick
760
+ life4free96/DialogGPT-med-TeiaMoranta
761
+ life4free96/DialogGPT-med-TeiaMoranta3
762
+ light/small-rickk
763
+ limivan/DialoGPT-small-c3po
764
+ cosmicroxks/DialoGPT-small-scott
765
+ logube/DialogGPT_small_harrypotter
766
+ lonewanderer27/DialoGPT-small-Joshua
767
+ lonewanderer27/KeitaroBot
768
+ lonewanderer27/YoshinoriBot
769
+ lonewanderer27/YuriBot
770
+ lovellyweather/DialoGPT-medium-johnny
771
+ luca-martial/DialoGPT-Elon
772
+ lucas-bo/DialogGPT-small-yoda
773
+ ludowoods/KujouSara
774
+ lulueve3/DialoGPT-medium-Kokkoro
775
+ lulueve3/DialoGPT-medium-Kokkoro2
776
+ madbuda/DialoGPT-got-skippy
777
+ madbuda/DialoGPT-medium-skippy
778
+ majonez57/JoeBot
779
+ manav/dialogpt-large-kanye-reddit
780
+ manav/dialogpt-medium-berkeley-reddit
781
+ maniacGhost24/MichaelScott-bot-push-small
782
+ manraf/DialoGPT-smmall-harrypotter
783
+ matprado/DialoGPT-small-rick-sanchez
784
+ maxxx2021/DialGPT-small-harrypotter
785
+ mdc1616/DialoGPT-large-sherlock
786
+ melon422/DialoGPT-medium-MelonBot
787
+ melon422/DialoGPT-medium-MelonBot2
788
+ mewmew/DialoGPT-small-rick
789
+ michelleshx/DialoGPT-small-michelle-discord-bot
790
+ microsoft/DialoGPT-large
791
+ microsoft/DialoGPT-medium
792
+ microsoft/DialoGPT-small
793
+ mikabeebee/Peterbot
794
+ milayue/neosh-bot1
795
+ minsiam/DialoGPT-medium-harrypotterbot
796
+ minsiam/DialoGPT-small-harrypotterbot
797
+ miogfd1234/ll
798
+ mittalnishit/DialoGPT-medium-rickman2
799
+ mittalnishit/DialoGPT-small-rickman
800
+ mjstamper/DialoGPT-small-samwise
801
+ mk3smo/dialogpt-med-ahiru
802
+ mk3smo/dialogpt-med-duck2
803
+ mk3smo/dialogpt-med-duck3
804
+ mk3smo/dialogpt-med-duck5
805
+ mk3smo/dialogpt-med-duckfinal
806
+ mk3smo/dialogpt-med-stt3
807
+ mklucifer/DialoGPT-medium-DEADPOOL
808
+ mklucifer/DialoGPT-small-DEADPOOL
809
+ mluengas/DialogGPT-small-michaelscott
810
+ model-mili/DailoGPT-Yukub-v3
811
+ model-mili/DialoGPT-small-Sapph-v1
812
+ model-mili/DialoGPT-small-Yukub-v2
813
+ model-mili/DialoGPT-small-Yukub
814
+ mohammedks713/DialoGPT-small-harrypotter
815
+ mohammedks713/DialoGPT-small-jonsnow
816
+ mra1ster/DialoGPT_scully_small
817
+ muhardianab/DialoGPT-small-theoffice
818
+ munezah/DialoGPT-small-aot
819
+ munezah/DialoGPT-small-sherlock
820
+ mutamuta/DialoGPT-small-rick
821
+ mutamuta/DialoGPT-spongebob-small
822
+ namanrana16/DialoGPT-small-TrumpBot
823
+ nanometeres/DialoGPT-medium-halbot
824
+ nanometeres/DialoGPT-small-halbot
825
+ ncoop57/DiGPTame-medium
826
+ niharikadeokar/DialoGPT-small-Jakebot
827
+ nikhilpatil2532000/DialoGPT-small-harrypotter
828
+ nimrazaheer/DialoGPT-small-harrypotter
829
+ nitishk/IronStarkBot
830
+ nlokam/DialoGPT-digibot3.0-new
831
+ nlokam/Digibot
832
+ nlokam/ada_V.3
833
+ nlokam/ada_V.6
834
+ nlokam/ada_V.7
835
+ nlokam/books_to_bots_v.00
836
+ noobed/DialoGPT-small-astley
837
+ norie4/DialoGPT-small-kyutebot
838
+ norie4/DialoGPT-small-memoji
839
+ not7even/DialoGPT-small-7evenpool
840
+ npc-engine/exported-bart-light-gail-chatbot
841
+ ntjrrvarma/DialoGPT-small-RickBot
842
+ nwl/DialoGPT-small-enhypen
843
+ nytestalkerq/DialoGPT-medium-joshua
844
+ oakkas/Dialge-small-harrypotter-oguz
845
+ odinmay/joebot
846
+ odinmay/zackbotmodel
847
+ ogpat123/DialoGPT-small-Michael
848
+ ogpat23/Jules-Chatbot
849
+ omkar1309/RickBot
850
+ omnimokha/DialoGPT-medium-jakeamal
851
+ omnimokha/DialoGPT-small-jakeamal
852
+ omnimokha/jakebot2
853
+ oododo/DialoGPT-small-elon
854
+ otto-camp/DialoGPT-small-RickBot
855
+ overgrowth/jokeboy
856
+ owencubes/DialoGPT-small-Josuke
857
+ paladinx00/rh-bender
858
+ parigaswetha/DialoGPT-small-jakeperalta
859
+ parthsinha/DialoGPT-small-rickandmorty
860
+ pashin/DialoGPT-small-ironman-2
861
+ pashin/DialoGPT-small-ironman-3
862
+ pashin/DialoGPT-small-ironman1
863
+ pastlecry/DialoGPT-small-harrypotter
864
+ peamjo/DialoGPT-small-morty
865
+ person123/DialoGPT-small-petergriffin
866
+ pewriebontal/DialoGPT-medium-Pewpewbon
867
+ phantom-deluxe/dialoGPT-RickBot
868
+ phantom-deluxe/dialoGPT-harry
869
+ phozon/harry-potter-medium
870
+ piyushdubey/DialoGPT-Mi
871
+ pompeiifreckles/DialoGPT-medium-Rick
872
+ ppn/DialoGPT-small-harrypotter
873
+ pranavtharoor/test
874
+ professional/DialoGPT-small-joshua
875
+ ps2102/DialoGPT-small-harrypotter
876
+ psblade/DialoGPT-medium-PotterBot
877
+ puugz/DialoGPT-small-spiderman
878
+ qwerty/DialoGPT-small-rick
879
+ r3cdhummingbird/DialoGPT-medium-joshua
880
+ r3dhummingbird/DialoGPT-medium-joshua
881
+ r3dhummingbird/DialoGPT-medium-neku
882
+ r3dhummingbird/DialoGPT-small-harrypotter
883
+ r3dhummingbird/DialoGPT-small-neku
884
+ rachelcorey/DialoGPT-medium-kramer
885
+ rachelcorey/DialoGPT-medium-niles
886
+ rafakat/Botsuana-rick
887
+ rahul26/DialoGPT-small-rickandmorty
888
+ rahulMishra05/discord-chat-bot
889
+ raj2002jain/DialoGPT-small-Light
890
+ ravephelps/DialoGPT-small-MichaelSbott
891
+ redbloodyknife/DialoGPT-medium-shayo
892
+ rhollings/DialoGPT_small_steverogers
893
+ richiellei/Childe
894
+ richiellei/Childe3
895
+ richiellei/DialoGPT-small-rick
896
+ richielleisart/Childe
897
+ ridwanpratama/DialoGPT-small-misaki
898
+ rinz/DialoGPT-small-Harry-Potterrr
899
+ rlagusrlagus123/XTC20000
900
+ rlagusrlagus123/XTC4096
901
+ rmicheal48/DialoGPT-small-steven_universe
902
+ rodrigodz/DialoGPT-medium-dxd
903
+ romuNoob/Mine
904
+ romuNoob/test
905
+ rovai/AI
906
+ rovai/CARRIE
907
+ rovai/Chat_pytorch1
908
+ rovai/chatbotmedium1
909
+ rovai/chatbotmedium2
910
+ rovai/chatbotmedium3
911
+ rovai/chatbotmedium4
912
+ rovai/chatbotone
913
+ rpeng35/DialoGPT-small-erenyeager
914
+ rrtong/DialoGPT-medium-shang-chi
915
+ rsd511/DialoGPT-small-house
916
+ rsedlr/RickBot
917
+ rsedlr/RickBotExample
918
+ ruriko/bacqua
919
+ ruriko/konoaqua
920
+ ruriko/konodio
921
+ sachdevkartik/DialoGPT-small-rick
922
+ saintseer121323/DialoGPT-small-kotonoha
923
+ sakai026/Chizuru
924
+ sakai026/Mizuhara
925
+ sam213/DialoGPT-small-harrypotter
926
+ sambotx4/scamantha
927
+ samuelssonm/DialoGPT-small-rick
928
+ sanjanareddy226/JakeBot
929
+ sankalpjha1/mr.bot_haary
930
+ satkinson/DialoGPT-medium-marvin
931
+ satkinson/DialoGPT-small-marvin
932
+ satvikag/chatbot
933
+ satvikag/chatbot2
934
+ sergunow/movie-chat
935
+ setiadia/DialogGPT-small-HPBot
936
+ shelb-doc/DialoGPT-medium-ash
937
+ shihab/HarryPotter
938
+ shonuff/DialoGPT-medium-konosuba
939
+ shreeshaaithal/DialoGPT-small-Michael-Scott
940
+ shreeshaaithal/Discord-AI-bot
941
+ shreeshaaithal/whatsapp-medium-bot-2
942
+ sidkhuntia/harrypotter
943
+ sifclairhelix/DialoGPT-small-harrypot
944
+ simrana5/RickBotExample
945
+ skynex/DialoGPT-small-batman
946
+ skynex/DialoGPT-small-finalbatman
947
+ sleekmike/DialoGPT-small-joshua
948
+ smilesandtea/DialoGPT-medium-Rick
949
+ smmzhu/DialoGPT-small-SZ
950
+ solfer/DialoGPT-small-ryuji
951
+ spockinese/DialoGPT-small-sherlock
952
+ sreyanghosh/DialoGPT-medium-joker
953
+ srirachasenpai/DialoGPT-medium-harrypotter
954
+ srv/DialoGPT-medium-Breaking_Bad
955
+ ssam/DialoGPT-small-RickmfSanchez
956
+ ssspider/DialoGPT-medium-harrypotter
957
+ stfuowned/nek
958
+ stfuowned/rick
959
+ sthom/DialoGPT-small-tin
960
+ sudip/bot1
961
+ sudoabrar/DialoGPT-small-dwight
962
+ suhasjain/DailoGPT-small-harrypotter
963
+ swapnil165/DialoGPT-small-Rick
964
+ terter/rick-bot-test-v2
965
+ thatoneguy267/DialoGPT-small-Oscar
966
+ thatoneguy267/bruhpleasehelpme
967
+ theChanChanMan/DialoGPT-small-chandler
968
+ thefryingpan/gpt-neo-125M-splishy
969
+ theiconik/hermione-granger
970
+ thesamuelpena/Dialog-medium-Sonic
971
+ thesamuelpena/Dialog-medium-masterchief
972
+ thetlwin/DialoGPT-small-ironman
973
+ thinhda/chatbot
974
+ thu-coai/CDial-GPT2_LCCC-base
975
+ thu-coai/CDial-GPT_LCCC-base
976
+ thu-coai/CDial-GPT_LCCC-large
977
+ ticet11/DialoGPT-small-BOBBY
978
+ timslams666/DialoGPT-small-rick
979
+ tinega/DialoGPT-small-harrypotter
980
+ tngo/DialoGPT-small-HankHill
981
+ toiletwater/DialoGPT-medium-ironman
982
+ tom1804/HP
983
+ tom1804/HP_last
984
+ tom1804/hp_new
985
+ tomascerejo12/DialoGPT-small-Rick
986
+ tosin/dialogpt_mwoz
987
+ tosin/dialogpt_sv
988
+ toyfreak/DialoGPT-small-addy
989
+ toyfreak/DialoGPT-small-shy
990
+ tpri/DialoGPT-small-pa
991
+ tprincessazula/Dialog-GPT-small-AANG
992
+ tprincessazula/Dialog-GPT-small-KATARA-AVATAR
993
+ tprincessazula/Dialog-GPT-small-SOKKA-AVATAR
994
+ tprincessazula/Dialog-GPT-small-harrypotter
995
+ transfaeries/DialoGPT-Discord
996
+ transfaeries/DialoGPT-medium-Discord-1.0
997
+ transfaeries/DialoGPT-small-Discord-1.0
998
+ transfaeries/Twilight-Sparkle-GPT
999
+ trig/DialoGPT-small-harrypotter
1000
+ trig/multiverse-second
1001
+ trig/multiverse
1002
+ trig/sokka-chatbot-test
1003
+ trig/tlok-test
1004
+ troythewar/DialogGPT-small-harrypotter
1005
+ truthisneverlinear/EleventhDoctor
1006
+ ttntran/DialoGPT-small-human
1007
+ tuantt/GroundNet
1008
+ ughvom/Ginger
1009
+ ughvom/britnayBOTMAIN
1010
+ umr55766/DialogGPT-small-peppa-pig
1011
+ usamazaheer/DialoGPT-small-harrypotter
1012
+ uutkras/Pandabot
1013
+ uyharold86/DialoGPT-small-RickAndMorty
1014
+ valarikv/DialoGPT-small-bateman
1015
+ vibranium19/DialoGPT-medium-jake
1016
+ victordata/DialoGPT-small-Rick
1017
+ victorswedspot/DialoGPT-small-gandalf
1018
+ vijayv500/DialoGPT-small-Big-Bang-Theory-Series-Transcripts
1019
+ vijote/DialoGPT-small-Morty
1020
+ vivek-g-2009/DialoGPT-medium-harrypotter
1021
+ vlco-o/NLboto_o-aki-dialogpt
1022
+ vlco-o/NLboto_o-small-dialogpt
1023
+ wadeed/DialogGPT-small-chandlerbingg
1024
+ wanderer/DialoGPT-small-Phoebe
1025
+ wjching/DialoGPT-small-ricksanchez
1026
+ won/DialoGPT-small-harrypotter
1027
+ worms3401/DialoGPT-small-Eleonora
1028
+ worsterman/DialoGPT-small-mulder
1029
+ wtrClover/DialoGPT-small-Flutterbot
1030
+ wtrClover/DialoGPT-small-TwilightBot
1031
+ xdmason/pretrainedCas
1032
+ xiaoheiqaq/DialoGPT-mediumJojo
1033
+ xiaoheiqaq/DialoGPT-smallharrypotter
1034
+ yahya1994/DialoGPT-small-AOT-Eren
1035
+ yahya1994/DialoGPT-small-DN-L
1036
+ yahya1994/DialoGPT-small-DN-Light
1037
+ yahya1994/DialoGPT-small-DN-Ryuk
1038
+ yahya1994/DialoGPT-small-Gintama-Gintoki
1039
+ yahya1994/DialoGPT-small-Parasyte-Migi
1040
+ yahya1994/DialoGPT-small-ReZero-Rem
1041
+ yahya1994/DialoGPT-small-ReZero-Subaru
1042
+ yahya1994/DialoGPT-small-Ryuk
1043
+ yusufmorsi/georgebot
1044
+ zaydzuhri/lelouch-medium
1045
+ zemi/jakebot
1046
+ zen-satvik/BotGPT-medium-HP
1047
+ zentos/DialoGPT-small-spongebob
1048
+ zinary/DialoGPT-small-rick-new
1049
+ zuto37/DialoGPT-small-sadao
1050
+ Maxwere/DiabloGPT-medium-maxbot
1051
+ Grungle/DialoGPT-medium-butters
1052
+ sadkat/technoai
1053
+ Grungle/DialoGPT-medium-butters2
1054
+ kookyklavicle/sean-diaz-bot
1055
+ kookyklavicle/sean-diaz
1056
+ Aquasp34/DialoGPT-small-aqua1
1057
+ zenham/khemx
1058
+ aryanbhosale/smartharrypotterbot
1059
+ Britain/DialoGPT-small-ZifBotTwoFixed
1060
+ Britain/DialoGPT-small-DanyBotThree
1061
+ infinitylyj/DialogGPT-small-rick
1062
+ infinitylyj/DialogGPT-small-general
1063
+ infinitylyj/DialogGPT-medium-general
1064
+ jackyv/DialoGPT-small-pinocchio
1065
+ Freak55/DialoGPT-small-Phoenix-Wright
1066
+ Britain/DialoGPT-small-DanyBotThreeFixed
1067
+ Britain/DialoGPT-small-DanyBotTwo
1068
+ P4RZ1V4L/DialoGPT-medium-tonystark
1069
+ Britain/DialoGPT-small-DanyBotTwoNew
1070
+ zenham/mskeen_m_e4_16h
1071
+ zenham/khemx_m_e4_16h
1072
+ zenham/wail_m_e4_16h_2k
1073
+ RTM/vilang
1074
+ BeanBoi50404/DialoGPT-small-PeppaPigButBetter
1075
+ nabin19677/small-cartman
1076
+ Prime2911/DialoGPT-small-handsomejack
1077
+ Starry/KARENTRIES
1078
+ dietconk/DialogGPT-small-Orange
1079
+ mafeu/DialoGPT-medium-willem
1080
+ Prime2911/DialoGPT-medium-handsomejack
1081
+ Meowren/DialoGPT-small-Rick-Bot
1082
+ DB13067/Peterbot
1083
+ Savitar/DialoGPT-medium-RickandMorty
1084
+ MolePatrol/Olbot
1085
+ erinchocolate/DialoGPT-small-harrypotter
1086
+ Valouzze/FairuvenIA
1087
+ MehSatho/Tai-medium-Hermione
1088
+ Valouzze/MegaIA
1089
+ Makinitas/DialoGPT-small-RickAndMortyScripts
1090
+ darthrussel/DialoGPT-small-rickandmorty
1091
+ vanilladucky/Friends_chatting_bot
1092
+ vanilladucky/Friends_chatting_bot_redefined
1093
+ chocoduck/Joey_bot
1094
+ duanxingjuan/DialoGPT-medium-DEMON_SLAYER
1095
+ pinkducky/Monica_Bot
1096
+ Starry/HELLORUKAS
1097
+ pinkducky/Rachel_Bot
1098
+ trig/multiverse-third
1099
+ pinkducky/Ross_Bot
1100
+ duanxingjuan/DialoGPT-large-DEMON_SLAYER_v1
1101
+ duanxingjuan/DialoGPT-large-DEMON
1102
+ duanxingjuan/DialoGPT-large-DEMON1
1103
+ issue89/DialoGPT-small-house
1104
+ LeonLi279/DialoGPT-small-harrypotter
1105
+ MolePatrol/DialoGPT-Medium-ConnerBot
1106
+ MolePatrol/DialoGPT-Medium-MoleBot
1107
+ TheDaydreamer/ricky
1108
+ BeamBee/DialoGPT-small-Lavenza
1109
+ Garsic/DialoGPT-medium-pecorine
1110
+ CallForEcho/DialoGPT-small-harrypotter
1111
+ BeamBee/DialoGPT-small-LavenzaNumTwo
1112
+ Meowren/MichaelScottBott
1113
+ shalpin87/dialoGPT-homer-simpson
1114
+ darthrussel/DialoGPT-small-homerbot-halfdata
1115
+ TheGoldenToaster/DialoGPT-medium-Woody
1116
+ bemich/DialoGPT-small-GeorgeCostanza
1117
+ AAAA-4/DialoGPT-small-player_03
1118
+ Teyronebigdick/DialoGPT-small-harrypotter
1119
+ Sammith/DialoGPT-small-miachael
1120
+ Nxtxn01/DialoGPT-small-harrypotter
1121
+ Teyronebigdick/DialoGPT-small-terrydavis
1122
+ mczolly/DialoGPT-small-the-doctor
1123
+ crazypegasus/GPT-JonSnow
1124
+ MrYiRen/DialoGPT-small-harrypotter
1125
+ TropicalJuice/Dialog-PeterGriffin
1126
+ TheGoldenToaster/DialoGPT-medium-Bot
1127
+ MrYiRen/DialoGPT-small-harrypotter2
1128
+ gulgulglut/DialoGPT-small-Rick
1129
+ trev/DialoGPT-small-MLP
1130
+ RAJESHNEMANI/Chatbot_AI
1131
+ lilapapazian/DialoGPT-small-harrypotter
1132
+ Alethea/GPT2-chitchat
1133
+ florentiino/DialoGPT-small-harrypotter
1134
+ NUTELEX/Eva
1135
+ jessicammow/DialoGPT-small-ronswanson
1136
+ MrYiRen/DialoGPT-small-ZC
1137
+ jessicammow/DialoGPT-medium-leslieknope
1138
+ AmbricJohnson5888/death
1139
+ AmbricJohnson5888/claura
1140
+ DarrellTimothy/DialoGPT-small-harrypotter
1141
+ RarePizzaDog/Apes_Bot
1142
+ iyedr8/DialoGPT-small-rick
1143
+ MEDT/ChatBot
1144
+ NonzeroCornet34/DialoGPT-small-hansolo
1145
+ NonzeroCornet34/DialoGPT-small-philbot
1146
+ atomsspawn/DialoGPT-medium-dumbledore
1147
+ florentiino/DialoGPT-small-rick
1148
+ ShibaDeveloper/DialoGPT-small-harrypotter
1149
+ sahilnare78/DialogGPT-medium-harrypotter
1150
+ Garsic/DialoGPT-medium-jill
1151
+ mdm/DialoGPT-small-Kanye
1152
+ ScyKindness/Hatsune_Miku
1153
+ aaaacash/DialoGPT-large-michaelscott
1154
+ AntoDono/DialoGPT-Harry
1155
+ BFMeriem/model
1156
+ BFMeriem/chatbot-model
1157
+ StringCheese/Dialog-small-bigbang
1158
+ jakewillms17/capcake-model
1159
+ Shivierra/DialoGPT-small-technoblade
1160
+ Scaprod/DialoGPT-small-arbiter
1161
+ Tlacaelel/DialoGPT-small-jarvis
1162
+ spuun/kekbot-beta-1
1163
+ Coma/Beter
1164
+ Wavepaw/DialoGPT-medium-WardenIngo
1165
+ Akarsh3053/potter-chat-bot
1166
+ MachineBabs/RickBot
1167
+ MachineBabs/DocBrown
1168
+ spuun/kekbot-beta-1-medium
1169
+ MEDT/Chatbot_Medium
1170
+ tosin/dialogpt_mwoz_idioms
1171
+ tosin/dialogpt_afriwoz_wolof
1172
+ aakhilv/tonystark
1173
+ spuun/kekbot-beta-2-medium
1174
+ xiaoGato/DialoGPT-small-villanelle
1175
+ Jonesy/DialoGPT-small_FG
1176
+ deathknight67/DialoGPT-medium-joshua
1177
+ kyriinx/DialoGPT-small-glyph
1178
+ Jonesy/DialoGPT-medium_FG
1179
+ spuun/kekbot-beta-3-medium
1180
+ Lisia/DialoGPT-small-connor
1181
+ awvik360/DialoGPT-medium-plemons-04262022
1182
+ Jonesy/LisaOnIce
1183
+ kvnaraya/DialoGPT-small-michael
1184
+ Hyperspace/DialoGPT-small-Hyperdrive
1185
+ Azuris/DialoGPT-medium-ekidona
1186
+ aditeyabaral/sonobois
1187
+ Jonesy/HomersNightOut
1188
+ Andrei0086/Chat-small-bot
1189
+ awvik360/UncleRuckus
1190
+ captainswiftfox/rickandmorty
1191
+ radicalrascal/DialoGPT-medium-jimmy
1192
+ dmoz47/DialoGPT-small-peterparker
1193
+ niprestige/GPT-small-DusabeBot
1194
+ Shakerlicious/DialoGPT-small-descentbot
1195
+ atomsspawn/DialoGPT-small-shelbot
1196
+ atomsspawn/DialoGPT-small-sheldon
1197
+ Willow/DialoGPT-medium-willow
1198
+ IsekaiMeta/dapprf
1199
+ farjvr/DialoGPT-small-Mortyfar
1200
+ InSaiyan/DialoGPT-small-harrypotter
1201
+ IsekaiMeta/dapprf3
1202
+ emolyscheisse/DialoGPT-small-mandybot
1203
+ IsekaiMeta/dapprf4
1204
+ qgdmonilla/DialoGPT-small-harrypotter
1205
+ NHStudios/DialoGPT-small-jake
1206
+ Shakerlicious/DialoGPT-small-raquelbot
1207
+ annasham/DialoGPT-small-myneighborTotoro
1208
+ CaptAdorable/RickBot
1209
+ Willow/DialoGPT-large-willow
1210
+ Kabutopusu/DialoGPT-medium-NITWMae
1211
+ HarmlessTarget/DialoGPT-medium-Bender
1212
+ soni69/DialoGPT-medium-holmes
1213
+ captainswiftfox/DialoGPT-small-rick
1214
+ kathywu/DialoGPT-small-kathy
1215
+ mybot/DialoGPT-medium-harrypotter
1216
+ Dedemg1988/DialoGPT-small-michaelscott
1217
+ pedrobaiainin/DialoGPT-small-harrypotter
1218
+ kathywu/DialoGPT-medium-kathy
1219
+ SNCannon/DialoGPT-medium-merc
1220
+ THE-DDLM/DialoGPT-sebastian
1221
+ fatirali/DialoGPT-medium-harrypotter
1222
+ TejasARathod/DialoGPT-medium-BatmanBot
1223
+ Varick/dialo-jarvis
1224
+ Robinsd/HarryBot
1225
+ dipstheman/DialoGPT-small-humanconversation
1226
+ dipstheman/DialoGPT-small-humanconversationpart
1227
+ LinkTheSinger/DialoGPT-small-Kanna
1228
+ LinkTheSinger/DialoGPT-small-Kannav4
1229
+ Robinsd/HarryBot4
1230
+ SomeRandomGuy/tony
1231
+ Meowren/HumanBot
1232
+ marcoperez/DialoGPT-small-rickandmorty
1233
+ LarsBell/DialoGPT-small-billyloomis
1234
+ okwach/mawaidhaChatbot
1235
+ LooksLikeIveLost/DialoGPT-medium-me
1236
+ okwach/mawaidhaChatbot2
1237
+ thebyy/DialoGPT-small-mortyisarick
1238
+ rongina/DialoGPT-small-cartman
1239
+ fransoa/arrombado-dms
1240
+ ionite/DialoGPT-medium-MarkAI
1241
+ ddrmaster1000/DialoGPT-medium-rick
1242
+ PeritusDux/DialoGPT-small-rick
1243
+ HomerChatbot/HomerSimpson
1244
+ t8oo/DialoGPT-small-zeni
1245
+ t8oo/DialoGPT-small-zenigata
1246
+ sexomq/DialoGPT-medium-TeoBot
1247
+ Char135/DialoGPT-medium-sebastian
1248
+ HomerChatbot/DialoGPT-small-HomerSimpson
1249
+ trev/Twilight-Sparkle
1250
+ gigikenneth/family-guy-bot
1251
+ ulises801/DialoGPT-medium-rick
1252
+ fujuta/DialoGPT-medium-HarryPotter
1253
+ fujuta/DialoGPT-medium-RonWeasley
1254
+ fujuta/DialoGPT-medium-HermioneGrander
1255
+ deepparag/Aeona-Beta
1256
+ HomerChatbot/DialoGPT-small-homersimpsonbot
1257
+ redcy/FrasierBotv1
1258
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn
1259
+ natdon/DialoGPT_Michael_Scott
1260
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v3
1261
+ deathmite/DiabloGPT-small-potaru
1262
+ ElMuchoDingDong/DialoGPT-medium-AudreyHepburn_v4
1263
+ DaBaap/Chat-Bot-Batman
1264
+ Iwa/bot
1265
+ badlawyer/DialoGPT-medium-sherlock-bot
1266
+ thanhchauns2/DialoGPT-medium-Luna
1267
+ jayklaws0606/DialoGPT-small-jayBot
1268
+ RUCAIBox/mvp
1269
+ Flem/DialoGPT-medium-alastor
1270
+ keans/DialoGPT-small-highjacker
1271
+ jayklaws0606/dgpt-small-jaybot
1272
+ CodeMaestro/DialoGPT-small-TChalla
1273
+ ElMuchoDingDong/AudreyBotBlenderBot
1274
+ stfuowned/rickfinal
1275
+ DuskSigma/DialogGPTHomerSimpson
1276
+ hireddivas/dialoGPT-small-sonic2
1277
+ N0NAne/DialoGPT-small-harrypotter
1278
+ tinkoff-ai/response-quality-classifier-tiny
1279
+ tinkoff-ai/response-quality-classifier-base
1280
+ tinkoff-ai/response-quality-classifier-large
1281
+ tinkoff-ai/response-toxicity-classifier-base
1282
+ RUCAIBox/mvp-open-dialog
1283
+ RUCAIBox/mtl-open-dialog
1284
+ RUCAIBox/mvp-multi-task
1285
+ Cirilaron/DialoGPT-medium-raiden
1286
+ BlackSamorez/rudialogpt3_medium_based_on_gpt2_2ch
1287
+ lucataco/DialogGPT-med-Rick
1288
+ lucataco/DialoGPT-medium-rafa
1289
+ gloomyworm/DialoGPT-small-ortho
1290
+ kozlovtsev/DialoGPT-medium-harrypotter
1291
+ Cirilaron/DialoGPT-medium-jetstreamsam
1292
+ lucataco/DialoGPT-medium-omar
1293
+ lucataco/DialoGPT-medium-milo
1294
+ daedalus2003/HouseBot
1295
+ SallyXue/DialoGPT-small-harrypotter
1296
+ Averium/DialoGPT-medium-TailsBot
1297
+ nlokam99/ada_sample
1298
+ nlokam99/ada_sample_2
1299
+ nlokam99/ada_sample_3
1300
+ nlokam/adanimals_V1
1301
+ spuun/kekbot-beta-4-medium
1302
+ quirkys/DialoGPT-small-harrypotter
1303
+ markofhope/DialoGPT-medium-HarringtonBot
1304
+ AntoDono/DialoGPT-Bopy-Alpha-1.01
1305
+ Hermite/DialoGPT-large-hermite
1306
+ robinhad/gpt2-uk-conversational
1307
+ Browbon/DialoGPT-small-LucaChangretta
1308
+ gloomyworm/DialoGPT-medium-ortho
1309
+ Browbon/DialoGPT-medium-LucaChangretta
1310
+ Fluffypillow/DialoGPT-small-Rem
1311
+ Hermite/DialoGPT-large-hermite2
1312
+ Bman/DialoGPT-medium-peppapig
1313
+ ZipperXYZ/DialoGPT-medium-TheWorldMachine
1314
+ AlyxTheKitten/DialoGPT-medium-AgedBlaine-2
1315
+ Averium/DialoGPT-medium-TailsBot1.1
1316
+ Elijah629/DialoGPT-mrsanai
1317
+ ZipperXYZ/DialoGPT-medium-TheWorldMachine2
1318
+ damianruel/DialoGPT-medium-MySon
1319
+ ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive
1320
+ Elijah629/DialoGPT-shrek
1321
+ AlyxTheKitten/DialoGPT-medium-Jimmis-2
1322
+ dennis-fast/DialoGPT-ElonMusk
1323
+ Sealgair/DialoGPT-medium-Eyden
1324
+ crystallyzing/DialoGPT-small-nishikiyama
1325
+ crystallyzing/DialoGPT-small-kiryu
1326
+ NikkiTiredAf/DialoGPT-small-billy2
1327
+ Evokus/DialoGPT-small-harrypotter
1328
+ mcimmy/DialoGPT-small-bob
1329
+ Laggrif/DialoGPT-medium-Luke
1330
+ Laggrif/DialoGPT-medium-3PO
1331
+ ZipperXYZ/DialoGPT-medium-TheWorldMachineExpressive2
1332
+ prprakash/DialoGPT-small-TonyStark
1333
+ sexomq/TeoBot-Romanian-medium
1334
+ Bman/DialoGPT-medium-dora
1335
+ Hermite/DialoGPT-large-hermite3
1336
+ Averium/FabioBot
1337
+ arem/DialoGPT-medium-rickandmorty
1338
+ soProf1998/DialoGPT-small-chattyrick
1339
+ soProf1998/DialoGPT-medium-chattyrick
1340
+ Dorin/DialoGPT-small-Rick
1341
+ OptimalHoiboy/DialoGPT-small-kasumai
1342
+ Hartmann/DialoGPT-small-koishikomeiji
1343
+ Konbai/DialoGPT-small-akagi
1344
+ Konbai/DialoGPT-small-akagi2
1345
+ JazzyLucas/DialoGPT-small-TonyStark
1346
+ mystery/DialoGPT-small-pinkiepie
1347
+ sexomq/TeoBot-Romanian-medium2
1348
+ erikycd/chatbot_hadita
1349
+ infinix/Sheldon-bot
1350
+ JamesonSpiff/chatBot_test_model
1351
+ Akito1961/DialoGPT-small-C3PO
1352
+ Naturealbe/DialoGPT-small-Technoblade
1353
+ zR0clu/DialoGPT-medium-Mr.Roboto
1354
+ reso/DialoGPT-medium-v3ga
1355
+ trimox/tryingnewstuff
1356
+ Nakul24/YC_Bot
1357
+ casperthegazer/DiabloGPT-medium-lukedot
1358
+ JamesStratford/PLord-bot-DialoGPT-medium
1359
+ CaptPyrite/DialoGPT-small-cat
1360
+ SafeTorpedo/DialoGPT-small-MichaelBot
1361
+ brianveebee/DialoGPT-medium-bender
1362
+ myynirew/DialoGPT-medium-shouko01
1363
+ myynirew/2-0OKUOHS
1364
+ smmzhu/DialoGPT-medium-sam
1365
+ myynirew/shouko0-3
1366
+ myynirew/dumbbot
1367
+ Lamia/DialoGPT-small-Sundrop
1368
+ ashtrindade/chatbot-stacey
1369
+ tinkoff-ai/ruDialoGPT-small
1370
+ tinkoff-ai/ruDialoGPT-medium
1371
+ 24adamaliv/DialoGPT-medium-Will
1372
+ cybertelx/DialoGPT-small-drunkic0n
1373
+ Rick-C137/DialoGPT-small-rick
1374
+ debyve/dumbbot
1375
+ Amir-UL/JimBot
1376
+ BoxCrab/DialoGPT-small-Strider
1377
+ AbdalK25/DialoGPT-small-TheWiseBot
1378
+ casperthegazer/DialoGT-gandalf-urdot
1379
+ pineappleSoup/DialoGPT-medium-707
1380
+ Nakul24/AD_ChatBot
1381
+ TeaTM/DialoGPT-small-bushcat
1382
+ ionite/DialoGPT-medium-NakaAI
1383
+ Creepton/DDLCYuri-DialoGPT-small
1384
+ TeaTM/DialoGPT-large-bushcat
1385
+ yazinga/DialoGPT-medium-scout
1386
+ throwaway112358112358/DialoGPT-medium-script
1387
+ Jingna/test_hpv_discord
1388
+ anonchickenlegs/sartoshi-bot
1389
+ xander-cross/DialoGPT-small-EvilMortyTheBot
1390
+ Bman/DialoGPT-medium-shrek
1391
+ Yank2901/DialoGPT-small-Rick
1392
+ akshatpandeyme/DialoGPT-small-manpreet
1393
+ Jenwvwmabskvwh/DialoGPT-small-josh444
1394
+ akshatpandeyme/DialoGPT-small-parthiv
1395
+ akshatpandeyme/DialoGPT-small-ParthivBot
1396
+ seeksery/DialoGPT-calig
1397
+ akshatpandeyme/DialoGPT-small-AnyaBot
1398
+ Jordine/shitter
1399
+ model-attribution-challenge/DialoGPT-large
1400
+ seeksery/DialoGPT-calig2
1401
+ obl1t/DialoGPT-medium-Jotaro
1402
+ trickstters/DialoGPT-small-evanbot
1403
+ trickstters/evanbot-gpt
1404
+ AriakimTaiyo/gpt2-chat
1405
+ Yank2901/DialoGPT-small-Harry
1406
+ lizz27/DialoGPT-small-baymax
1407
+ obl1t/DialoGPT-medium-Jolyne
1408
+ seeksery/DialoGPT-calig3
1409
+ Jenwvwmabskvwh/DialoGPT-small-josh445
1410
+ trickstters/evbot2
1411
+ Jenwvwmabskvwh/DialoGPT-small-josh450
1412
+ lizz27/DialoGPT-medium-BaymaxBot
1413
+ soop/DialoGPT-medium-BaymaxBot
1414
+ abelblue3/DialoGPT-medium-baymax
1415
+ priyankac/DialoGPT-medium-BaymaxBot
1416
+ Ironpanther1/Testing
1417
+ tosin/dialogpt_afriwoz_pidgin
1418
+ Anon25/DialoGPT-Medium-BaymaxBot
1419
+ GoldenRedstone/DialoGPT-medium-Phoenix-Wright
1420
+ Primobot/DialoGPT-small-harrypotter
1421
+ Lyem/LyemBotv1
1422
+ JamesSantosxx/DialoGPT-small-harrypotter
1423
+ Lyem/LyemBotv2
1424
+ Ironpanther1/ArtoriaBot
1425
+ Swervin7s/DialoGPT-medium-anakin
1426
+ DogH2O/DialoGPT-small-naruto
1427
+ NoPeanuts/DialoGPT-small-po
1428
+ Gravitygaming/homerai
1429
+ Lyem/LyemBotv3
1430
+ celine45688/LuTing
1431
+ antwortemir/shouko04
1432
+ SebastianS/MetalSebastian
1433
+ notaproblem00/DialoGPT-small-bakugou
1434
+ myodoctor/DIALOGPT-medium-HarryPotterBot
1435
+ aniketface/DialoGPT-medium-elon
1436
+ noiseBase/DialoGPT-small-HarryPotter
1437
+ karan21/DialoGPT-medium-rickandmorty
1438
+ karan21/DialoGPT-medium-guin
1439
+ Sophiejs/DialoGPT-small-BlaineBot
1440
+ skouras/DialoGPT-small-swda
1441
+ skouras/DialoGPT-small-maptask
1442
+ TheodoreAinsley/LindaGold
1443
+ AlbedoAI/DialoGPT-large-Albedo
1444
+ AlbedoAI/DialoGPT-large-Albedo2
1445
+ willmay/DialoGPT-medium-will
1446
+ AlbedoAI/DialoGPT-medium-Albedo
1447
+ chulainn/DialoGPT-medium-Zuko
1448
+ ctoner2653/DialoGPT-medium-RickBoty
1449
+ Number4/DialoGPT-medium-harrypotter
1450
+ yummyhat/DialoGPT-small-spike
1451
+ EllyPony/flutterbot
1452
+ Suryansh-23/DialoGPT-small-MichaelScottOffice
1453
+ Cirilaron/DialoGPT-medium-vergil
1454
+ Izuuk/izuuk
1455
+ shungyan/Diablo-small-harrypotter
1456
+ bhavyasharma/DialoGPT-small-harrypotter
1457
+ nintwentydo/rickbot
1458
+ tylersfoot/DialoGPT-medium-rick
1459
+ EJoftheVern/DialoGPT-medium-shaggy
1460
+ xtraXpert/DialoGPT-small-RickAndMorty2
1461
+ ANIKEThash/DialoGPT-medium-character
1462
+ Noonw/DialoGPT-small-hijackersexurmom
1463
+ fat32man/elon_answers
1464
+ MinhP/DialoGPT-small-themis
1465
+ Noonw/DialoGPT-small-osamaflyplane
1466
+ Noonw/DialoGPT-small-ladenflyplane
1467
+ Noonw/DialoGPT-small-ladenonjet
1468
+ MinhP/DialoGPT-small-franco
1469
+ Karan59/DialoGPT-small-evaModel
1470
+ marblyso/DialoGPT-medium-marblesbagel
1471
+ Jojo17/DialoGPT-small-RickAndMorty
1472
+ deseipel/medium-LucyClarke_
1473
+ DiscordBackup/model0000
1474
+ SirSpiffy/IvanModel
1475
+ woodmtaylor/DialoGPT-small-Heej
1476
+ woodmtaylor/DialoGPT-medium-Heej
1477
+ OctaviusI/marisaV0
1478
+ ChloeMJM/DialoGPT-small-rick
1479
+ JDesignEra/DialoGPT-small-Anya
1480
+ MrE/DialoGPT-medium-SARGER4
1481
+ aarya-c111/DialoGPT-small-Rogers
1482
+ bozlucas/DialoGPT-medium-HermioneBot
1483
+ LasseVKP/DialoGPT-Mogens
1484
+ metaloopa/DialoGPT-medium-Rintaro
1485
+ ingen51/DialoGPT-medium-GPT4
1486
+ Divyesh/DialoGPT-medium-harrypotter
1487
+ Natsuki-Chan/DialoGPT-medium-luz
1488
+ akira2001/DialoGPT-medium-harrypotter
1489
+ osueng02/DialoGPT-small-STAN_BOT
1490
+ osueng02/DialoGPT-medium-STAN_BOT
1491
+ wormed/DialoGPT-small-denai
1492
+ RehanP123/DialoGPT-medium-kermit.old
1493
+ Nakul24/SM_Bot
1494
+ chulainn/DialoGPT-medium-Ragnar
1495
+ aniketface/DialoGPT-product
1496
+ shohanursobuj/DialoGPT
1497
+ marblyso/DialoGPT-medium-hero
1498
+ marblyso/DialoGPT-medium-kel
1499
+ marblyso/DialoGPT-medium-aubrey
1500
+ akil191/small-test-harryakakakaka
1501
+ sanpellegrino/CoryBot
1502
+ Arqhero/DialoGPT-small-adventuretime
1503
+ chulainn/DialoGPT-medium-Tyrion
1504
+ VTG/MentalHealthChatbotv1
1505
+ luminolblue/HomunculusGPT-testbot
1506
+ Paulina354/DialoGPT-small-rickandmorty
1507
+ khuranagarvit019/MentalHealthChatbot
1508
+ VirtualizedTrash/Chatbot
1509
+ pedrocaribe/DialoGPT-medium-LL
1510
+ queenaccila/DialoGPT-small-kashiwagi
1511
+ GarfExit/DialogGPT-medium-707
1512
+ marblyso/DialoGPT-medium-shepherd
1513
+ Spectre29/DialoGPT-small-Kaisa
1514
+ Spectre29/Kaisa-converse-model
1515
+ ZedTheUndead/Rick_fragment
1516
+ marblyso/DialoGPT-medium-mari
1517
+ Delicious/DialoGPT-small-harrypotter
1518
+ BBHKR/DialoGPT-small-jacksparrow
1519
+ Guwon/DialoGPT-small-Quincy
1520
+ epeicher/DialoGPT-small-homer-2
1521
+ timmychanga/DialoGPT-small-ashley
1522
+ mywateriswet/ShuanBot
1523
+ epeicher/DialoGPT-small-flanders
1524
+ Super-McTea/DialoGPT-small-McTea
1525
+ Eronzin/meuBotzindoEron
1526
+ Techdra/DialoGPT-large-theboy
1527
+ Eronzin/DialoGPT-small-Frodo
1528
+ gtgillott/gib
1529
+ AwesomeDWNJ/EmiBot
1530
+ CJ3/DialoGPT-medium-amber3
1531
+ GamerMan02/DialoGPT-medium-gamerbot2
1532
+ GamerMan02/DialoGPT-medium-gamerbot1
1533
+ Insomnic/DialoGPT-small-harrypotter
1534
+ Super-McTea/DialoGPT-small-McTeaV2
1535
+ FelipeJoazeiro/chatbot-morty
1536
+ microsoft/GODEL-v1_1-base-seq2seq
1537
+ microsoft/GODEL-v1_1-large-seq2seq
1538
+ Rencist/DialoGPT-small-rick
1539
+ scorpiofrens/DialoGPT-medium-ergon
1540
+ somemusicnerdwoops/DialoGPT-small-shadow
1541
+ powchang/DialoGPT2-medium-CAiFE
1542
+ ratneshrt/DialoGPT-small-Artico
1543
+ somemusicnerdwoops/DialoGPT-distilgpt2-sonicfandub
1544
+ Tsec-Research/DialoGPT-chandler-penny
1545
+ neonon/DialoGPT-medium-cloy
1546
+ ddae208s/DialoGPT-small-dimitri
1547
+ mossfarmer/VRANAK
1548
+ Matax/Aristrathor3000
1549
+ brownanchovy/Harry
1550
+ Overlrd/DialoGPT-small-cartman
1551
+ epeicher/DialoGPT-large-homer
1552
+ comradesocrates/DialoGPT-medium-stranger
1553
+ Rakublu/DialoGPT-small-yasuo
1554
+ neonon/DialoGPT-medium-htccc
1555
+ Alt41r/gpt-simpson
1556
+ Nimit-Jjw/DialoGPT-chandler-penny
1557
+ Quoc123/DialoGPT-small-AQUA
1558
+ marblyso/DialoGPT-medium-pearl
1559
+ estus2/rick-superu-rick2
1560
+ marblyso/DialoGPT-medium-marina
1561
+ rovenmusic/DialoGPT-small-melodybot
1562
+ deseipel/small-LucyClarke_
1563
+ rovenmusic/DialoGPT-small-melodybotv2
1564
+ rovenmusic/DialoGPT-small-melodybotv3
1565
+ epeicher/DialoGPT-medium-homer
1566
+ andrewkroening/GalaxyFarAway-DialoGPT-HanSolo
1567
+ nams/nams-bot
1568
+ Nicktherat/DialoGPT-medium-endella
1569
+ alfirsaafauzulh/DialoGPT-small-KamuiBastion
1570
+ rovenmusic/DialoGPT-small-melodyv10
1571
+ somesh212/Harry_Potter-BOT
1572
+ somesh212/Harry_Potter_botDialoGPT_Som2
1573
+ jmagine/DialoGPT-small-metahead
1574
+ somesh212/Harry_Potter_botDialoGPT_Som3
1575
+ rovenmusic/DialoGPT-small-melodyvfinal
1576
+ jmagine/DialoGPT-small-jmagine
1577
+ jmagine/DialoGPT-small-funded
1578
+ jmagine/DialoGPT-small-jimj
1579
+ andrewkroening/GalaxyFarAway-DialoGPT-LukeSkywalker
1580
+ andrewkroening/GalaxyFarAway-DialoGPT-Threepio
1581
+ andrewkroening/GalaxyFarAway-DialoGPT-Vader
1582
+ andrewkroening/GalaxyFarAway-DialoGPT-LeiaOrgana
1583
+ andrewkroening/GalaxyFarAway-DialoGPT-Yoda
1584
+ Wizardd/DialoGPT-small-sheldon
1585
+ BenKJH/DialoGPT-small-lucybotasg
1586
+ Ananjas/AwooAI
1587
+ Ananjas/AwooV2
1588
+ kookyklavicle/gpt-sean-diaz
1589
+ kookyklavicle/SeanDiazBot2
1590
+ Ananjas/AwooV3
1591
+ Overlrd/DialoGPT-medium-cartman
1592
+ Ananjas/AwooV6
1593
+ mathecas/HarryPotterBotAI
1594
+ Karina256/DialoGPT-small-dory
1595
+ Tony8657/DialoGPT-small-TonyStarkBot
1596
+ SebastianS/my_mim
1597
+ TFS668/DialoGPT-small-Rick
1598
+ redhoff/DialoGPT-Medium-RedBot
1599
+ FeriVOQ/DialoGPT-small-joshua
1600
+ Triobloid/DialoGPT-small-lianaharrypotter
1601
+ quinnzie/DialoGPT-small-sinister
1602
+ FarziBuilder/DialoGPT-medium-harrypotter
1603
+ sohampatil/DialoGPT-small-mentalchatbot
1604
+ gtkarber/DialoGPT-medium-columbo
1605
+ PaddlePaddle/plato-mini
1606
+ Junkan/DialoGPT-medium-Bilbo
1607
+ ThatSkyFox/DialoGPT-medium-whatsapp
1608
+ Ar4ikov/DialogAgentGPT2
1609
+ reallygoodtechdeals/Bingocat-ai-Dialo-GPT-medium
1610
+ thmauler/crashed
1611
+ OptionaI/DialoGPT-small-beepboopy
1612
+ davebathhews/DialoGPT-OTIS
1613
+ GGOM/SipBotGGOM
1614
+ davebathhews/DialoGPT-OTISBOT
1615
+ GGOM/WillBotGGOM
1616
+ GGOM/ElyasBotGGOM
1617
+ reallygoodtechdeals/steve-ai-Dialo-GPT-medium
1618
+ Crushtoe/DialoGPT-small-vangluss
1619
+ apotempest/DialoGPT-medium-geralt
1620
+ DiogoSabec/DialoGPT-small-joshua
1621
+ WaleedArif/DialoGPT-small-Micheal
1622
+ Crushtoe/DialoGPT-medium-vangluss
1623
+ Crushtoe/GODEL-v1_1-base-seq2seq-vangluss
1624
+ DiogoSabec/BOT
1625
+ Le033/DialoGPT-small-rickmorty
1626
+ Filosofas/DialoGPT-medium-PALPATINE2
1627
+ JadansTower/jobot
1628
+ NTMNathan/DialoGPT-small-harrypotter
1629
+ Ashypaws/DialoGPT-medium-Ashybot
1630
+ wmdosborne/DialoGPT-medium-kyritebot
1631
+ worms3402/DialoGPT-small-automata2
1632
+ Pi3141/DialoGPT-small-elon
1633
+ Grendar/Dialo-GPT-medium-shiro
1634
+ Pi3141/DialoGPT-medium-elon
1635
+ Pi3141/DialoGPT-medium-elon-2
1636
+ JoshuaPawlik/DialoGPT-medium-joshua
1637
+ Pi3141/DialoGPT-medium-elon-3
1638
+ josephthen3320/DialoGPT-small-walter
1639
+ robbiegwald/Rick
1640
+ Gurtej/Drbot
1641
+ Hereward/DialoGPT_medium_ObiWan_Kenobi
1642
+ Giu888/DialoGPT-small-sao
1643
+ Grendar/blenderbot-400M-distill-Shiro
1644
+ keeg8/Book-0-1500
1645
+ keeg8/Book-1500-1700
1646
+ keeg8/Book-1850-1900
1647
+ keeg8/Book-1700-1850
1648
+ karlreimond/DialoGPT-small-harrypotter
1649
+ lenartlola/SpongeBob
1650
+ lenartlola/rick-bot
1651
+ Deedlit/DialoGPT-small-southpark
1652
+ babylasagne/DialoGPT-small-narryuto
1653
+ babylasagne/DialoGPT-small-harry
1654
+ babylasagne/DialoGPT-small-spider
1655
+ babylasagne/DialoGPT-small-batman
1656
+ BradHeffernan/rickNmortyModel
1657
+ UmUDev/DialoGPT-medium-AlexVN
1658
+ ukikunz/gas-kenji-medium
1659
+ ukikunz/gas-kenji
1660
+ Isokeel/DialoGPT-medium-KMbot
1661
+ KakoSi/AcciGPT-smol
1662
+ Spoofed/DiabloGPT-small-peter
1663
+ sophiadt/DialoGPT-medium-707
1664
+ UmUDev/DialoGPT-medium-Alex
1665
+ PygmalionAI/pygmalion-350m
1666
+ sophiadt/DialoGPT-medium-reigen
1667
+ rexfi/DialoGPT-small-peter
1668
+ rexfi/NafezBot-DialoGPT
1669
+ caps1994/chris-bot
1670
+ rexfi/RickyBot
1671
+ allenai/cosmo-xl
1672
+ woodmtaylor/DialoGPT-large-Dumpling
1673
+ rexfi/MikeScottBot
1674
+ apfallinus/RickBot
1675
+ apfallinus/HarryBot
1676
+ apfallinus/MedBot
1677
+ apfallinus/AeonaBot
1678
+ apfallinus/BatmanBot
1679
+ apfallinus/AiBot
1680
+ LostXOR/TotallyNotARobot
1681
+ gachaddict/DialoGPT-medium-ike
1682
+ OctaviusI/staging
1683
+ PygmalionAI/pygmalion-1.3b
1684
+ Terrymir/DialoGPT-medium-Soraka
1685
+ SantiPingui58/DialoGPT-small-hika
1686
+ ss1612/montana-chat
1687
+ MrEmpty/DialoGPT-small-rickandmorty
1688
+ shikiskhakis/DialoGPT-small-blackdoom
1689
+ alexandreteles/GPTChizuru
1690
+ Chae/scottbot_med
1691
+ AhmedMostafa/DialoGPT-small-Rick
1692
+ metkoon/30dollarceo
1693
+ Dinocroth/DialoGPT-medium-Trevor-PhilipsV2
1694
+ metkoon/MatBot
1695
+ SmallQ/DialoGPT-small-Anya
1696
+ bigbossa/aiko6
1697
+ GK123/DialoGPT-medium-hanbot
1698
+ TheHappyDrone/DialoGPT-medium-salesman
1699
+ Pcik/DialoGPT-medium-Jaiden
1700
+ TheHappyDrone/DialoGPT-medium-Nexus-Nova
1701
+ Pcik/DialoGPT-medium-Dante
1702
+ AlmightyDeathCheater/DialoGPT-medium-harrypotter
1703
+ Pcik/DialoGPT-medium-Kirby
1704
+ Starry/COUNTNARC
1705
+ TheHappyDrone/DialoGPT-medium-Nexus-Nova-turing-v2
1706
+ wetwoteraq/DialoGPT-medium-aqua
1707
+ wetwoteraq/DialoGPT-small-peter
1708
+ wetwoteraq/DialoGPT-medium-peter
1709
+ lilexo2/DialoGPT-medium-Monica
1710
+ momo10/DialoGPT-small-harryPotter
1711
+ Antale123/ConorBot
1712
+ shikiskhakis/DialoGPT-small-xemnas
1713
+ Ecook/DialoGPT-medium-Ecook
1714
+ PygmalionAI/pygmalion-2.7b
1715
+ FowlerF/DiscordChatBot
1716
+ JoeRoganfan-69420/DialoGPT-medium-HarryPotterbot
1717
+ dusty310/DialoGPT-medium-Misaki
1718
+ Gurtej/Drbot2
1719
+ Gurtej/Drbot3
1720
+ Gurtej/Drbot4
1721
+ Gurtej/Drbot5
1722
+ Gurtej/Drbot6
1723
+ Gurtej/Drbot7
1724
+ Gurtej/Drbot8
1725
+ Gurtej/Drbot9
1726
+ PygmalionAI/pygmalion-6b
1727
+ Gurtej/Drbot11
1728
+ navygup/Mood-Tracker
1729
+ Maraslumunnus/DialoGPT-small-ivern
1730
+ DAS9051/BatemanChatBot
1731
+ SmallQLALA/DialoGPT-small-Anya
1732
+ RinkaDev/GPT-Peppa-Pig
1733
+ thu-coai/blenderbot-1B-augesc
1734
+ siyaT/DialoGPT-harrypotter-small
1735
+ keircare/DialoGPT-small-RickSanchez
1736
+ shiiiroe/DialoGPT-medium-kirito
1737
+ jdakillah/Rick
1738
+ kielljoy/DialoGPT-small-stupidspecialkay
1739
+ Ashypaws/DialoGPT-medium-Kitaibot
1740
+ jdakillah/RICK-V2
1741
+ jdakillah/Bender
1742
+ jdakillah/Generalbot
1743
+ kielljoy/DialoGPT-medium-ryanbot
1744
+ emre/spanish-dialoGPT
1745
+ vuminhtue/DialoGPT-large-HarryPotter3
1746
+ ralphsorz/DialoGPT-small-samwise
1747
+ SumYin/DialoGPT-small-Homer
1748
+ JamesRoy/DGPT-DC
1749
+ Blizzchor/DialoGPT-medium-HarryBotter
1750
+ gjhghjk/rick
1751
+ gjhghjk/rick2
1752
+ SumYin/ZeroTwo-Medium-DialoGPT
1753
+ Blizzchor/DialoGPT-medium-gamora
1754
+ Mydia2/DialoGPT-small-Flonnealive
1755
+ AL-CT/DialoGPT-small-slayer
1756
+ DhruvShek/Webraft-Ai
1757
+ arno2077/DiabloGPT-small-harrypotter
1758
+ keyonecs/fourept-debique-gpt
1759
+ Blizzchor/DialoGPT-medium-QuillLord
1760
+ callmeclover/Stinger-CONVRS_MODL
1761
+ aminFelah/DialogueGPT-very-small-harryPotter
1762
+ Keijuro/aeris-dialogpt
1763
+ Abdelrahman853/DialoGPT-small-echo
1764
+ Bearfoot/DialoGPT-medium-shrek
1765
+ arthme2/jay
1766
+ arthme2/DialoGPT-medium-Jay
1767
+ 42meow/DialoGPT-medium-42meow
1768
+ Peeepy/Evie
1769
+ HorniFolks/Unicorn
1770
+ waifu-workshop/pygmalion-6b
1771
+ agenttylostudios/DialoGPT-small-Bocchi
1772
+ GregariousJamie/DialoGPT-small-jamie
1773
+ Fuwaguwa/DialoGPT-Medium-AzurLaneMusashi-v8
1774
+ s3nh/DialoGPT-large-Rick
1775
+ s3nh/DialoGPT-large-Morty
1776
+ s3nh/DialoGPT-small-morty
1777
+ Givinghawk/GPT-Morty
1778
+ DhruvShek/swearbot
1779
+ grart/DialoGPT-small-gillion
1780
+ interpixle/Sir_Caladan
1781
+ s3nh/DialoGPT-tony-montana
1782
+ s3nh/DialoGPT-small-harry-potter-goblet-of-fire
1783
+ s3nh/DialoGPT-small-hermione-granger-goblet-of-fire
1784
+ s3nh/DialoGPT-small-woody-toy-story
1785
+ s3nh/DialoGPT-small-buzz-toy-story
1786
+ puj0/DialoGPT-small-joshua
1787
+ julianvd49/DialoGPT-medium-EllieBot
1788
+ Sreyas/DialoGPT-small-elit
1789
+ DiscordRequestsAPI/DialoGPT-medium-NurDeeps
1790
+ MarinHinawa/DialoGPT-medium-Ene
1791
+ polandball/polanball
1792
+ whoami24142/DialoGPT-small-padilha
1793
+ DiscordRequestsAPI/NurDeeps-Bot
1794
+ Vaibhav-rm/GPT2-Shri-v1
1795
+ chrisrowles/DialoGPT-small-chrisrowles
1796
+ espeon98/DialoGPT-kenny-bot
1797
+ espeon98/DialoGPT-kenny-bot-2
1798
+ polandball/GPT-Polen
1799
+ chrisrowles/DialoGPT-medium-chrisrowles
1800
+ DiscordRequestsAPI/NurDeeps-Bot-2
1801
+ steerevo88/DialoGPT-small-baiken
1802
+ akiFQC/japanese-dialogpt-small-aozora
1803
+ Ngao/DialoGPT-small-ngao
1804
+ Mineroero/DialoGPT-medium-M4SOPMOD
1805
+ simple2312/DialoGPT-nayeon
1806
+ nemowet88/DialoGPT-small-ricktest
1807
+ Abraxas3d/house
1808
+ vampiregirl/DialoGPT-medium-lennoxram
1809
+ aisingapore/coherence-momentum
1810
+ simple2312/DialoGPT-Ellie
1811
+ simple2312/DialoGPT-Twice
1812
+ testaws/DialoGPT-small-joshua
1813
+ nemowet88/output-pythia-test
1814
+ Gurtej/Drbot12
1815
+ Gurtej/Drbot13
1816
+ Gurtej/Drbot14
1817
+ Gurtej/Drbot16
1818
+ EZSNoVa/DialogGPT-medium-NoVa
1819
+ mattallio/Archivist-medium-dialoGPT
1820
+ rlatt/DialoGPT-small-RickSanchez
1821
+ Lyforth/DialoGPT-Medium-Maribelle
1822
+ kittenwhiperer/Deadpool
1823
+ KumquatJoe/DialoGPT-medium-MaleToucherBot
1824
+ lmkhoa/GODEL_base_model
1825
+ JamesStratford/Pidrow-bot-DialoGPT-Large-Feb2023
1826
+ LrxLcs/DialogGPT2-SMAL
1827
+ Delcos/internal_chat_model_e2
1828
+ euvu/DialoGPT-small-harrypotter
1829
+ LrxLcs/GPT2-V2
1830
+ LrxLcs/GPT2-Test
1831
+ euvu/euvu-rickbot
1832
+ Weeeeeeeeeeeee00/DialoGPT-small-harrypotter
1833
+ slyslasher24/DialoGPT-Medium-Pondweed
1834
+ slyslasher24/DialoGPT-Small-Pondweed
1835
+ bradydawg/AI-Bot2
1836
+ aisingapore/rumour-detection-twitter
1837
+ RatInChat/Pilup7575
1838
+ rlatt/DialoGPT-large-RickSanchez
1839
+ Kira225784/Klarabot-test
1840
+ bigbossa/DialoGPT-small-aikogirl
1841
+ sckova/DialoGPT-small-joshua
1842
+ sckova/DialoGPT-medium-joshua
1843
+ sckova/DialoGPT-medium
1844
+ Beltenebros/DialoGPT-small-PerionOfGaul
1845
+ Byteno/DialoGPT-medium-glamrockfreddy
1846
+ audreycl/audreycl-testagain
1847
+ aisingapore/Lif3WayAp
1848
+ audreycl/DialoGPT-RoyalPurpleFish
1849
+ audreycl/DialoGPT-RPF
1850
+ Axelajs26/DialoGPT-small-alicetendou
1851
+ Noohance/DialoGPT-medium-noohbot
1852
+ Draptor/DialoGPT-small-coolco
1853
+ David042/DialoGPT-LucasBot
1854
+ Hobospider132/DialoGPT-Mahiru-Proto
1855
+ Draptor/DialoGPT-medium-moto
1856
+ aisingapore/SPANBert
1857
+ JYBX/DialoGPT-small-Penny
1858
+ JYBX/DialoGPT-small-Pennybot
1859
+ aisingapore/RoBERTa-base
1860
+ JYBX/DialoGPT-small-Amybot
1861
+ LuckyBor11/Figure
1862
+ FlyingGrayson0304/Gandalf-stupid-version
1863
+ BlinksFly/Harry_Potter-Ai
1864
+ PhilipN/DialoGPT-small-KeqingBot
1865
+ YTTD/DialoGPT-medium-sou
1866
+ PhilipN/DialoGPT-large-KeqingBot
1867
+ YTTD/DialoGPT-medium-souv2
1868
+ keonju/chat_bot
1869
+ MysteriousAmazon/DialoGPT-medium-alastor
1870
+ mICHPl/MINI_AI
1871
+ rlatt/DialoGPT-large-King-James-Bible-test
1872
+ v3nom1704/DialoGPT-small-potterbot
1873
+ Techcs002/DialoGPT-medium-AboTalkTest
1874
+ MysteriousAmazon/DialoGPT-medium-freddy
1875
+ ICAMPB204/DialoGPT-small-HarryPotter
1876
+ kelvinhang/DialoGPT-medium-badguy
1877
+ tatsumis6/MonikaAI
1878
+ kennethhendricks/DialoGPT-medium-PowPowGaming-Gen1
1879
+ rlatt/DialoGPT-large-King-James-Bible-test-accurate
1880
+ kennethhendricks/DialoGPT-medium-PowPowGaming
1881
+ kelvinhang/DialoGPT-medium-badguy2
1882
+ zami0011/qqpbksdj
1883
+ vladiyudi/Morty-data
1884
+ RazaK18/DialoGPT-small-harrypotter
1885
+ comradesocrates/DialoGPT-large-io
1886
+ kelvinhang/DialoGPT-medium-okakoro
1887
+ Monchic/chatwithkani
1888
+ zami0011/rickdick
1889
+ CallMeJeremy/DialoGPT-medium-THREEPIO
1890
+ Leomas/DialoGPT-medium-Leomas
1891
+ RehanP123/DialoGPT-large-kermit
1892
+ shahules786/Safetybot-T5-base
1893
+ huolongguo10/CDial-GPT2-LCCC-Base-copy
1894
+ yashR4J/TyrionBOT
1895
+ TakoIsATaco/DialoGPT-small-ShinAI
1896
+ MrLamBam/DialoGPT-medium-LUKEBot
1897
+ Zeda/DialoGPT-Medium-ZedaBot
1898
+ princedream/DialoGPT-small-harrypotter
1899
+ shahules786/Safetybot-mt5-base
1900
+ xiaomengdotcom/Chatgpt-harryP
1901
+ ProtonPLUS/Colab
1902
+ YTTD/DialoGPT-medium-saf
1903
+ jasondubon/HubermanGPT-small-v1
1904
+ YTTD/DialoGPT-medium-safv2
1905
+ YTTD/DialoGPT-medium-safv3
1906
+ kennethhendricks/DialoGPT-medium-jared-hendricks-gen1
1907
+ Cohee/pygmalion-6b-pyggyback-v6_40_v8p4_60
1908
+ DiogenesGois/DialoGPT-medium-Rick
1909
+ LordDanielDE/DialoGPT-medium-Hina
1910
+ ITG/DialoGPT-medium-spanish-chitchat
1911
+ kemsa51/DialoGPT-medium-cartman
1912
+ Mogwhy/DialoGPT-medium-Arrobot
1913
+ nRuaif/Pyg6B-V8P2
1914
+ Seer-luma/DialoGPT-small-SeerBot
1915
+ Dinoloverwii/DialoGPT-Sachibot
1916
+ flayeddie/Mike
1917
+ wooldover/krautbot
1918
+ kielljoy/DialoGPT-small-k
1919
+ WAHCLAN/DialoGPT-Medium-DAN
1920
+ ss1612/loki-chat
1921
+ IceBruhOne/mytestcharacter
1922
+ wooldover/pygbot
1923
+ IceBruhOne/DialoGPT-medium-subjectai
1924
+ YukioKoito/DialoGPT-small-ozua
1925
+ gaytrimoh/DialoGPT-small-harrypotter
1926
+ YukioKoito/DialoGPT-small-doog
1927
+ IceBruhOne/DialoGPT-medium-subjectai2
1928
+ custads23/DialoGPT-medium-aubrey
1929
+ HaHaMagpie/DialoGPT-small-phineas
1930
+ Carslo45/DialoGPT-medium-ddlc-monika
1931
+ zl111/ChatDoctor
1932
+ MarinHinawa/DialoGPT-medium-haruka
1933
+ custads23/DialoGPT-medium-basil
1934
+ IceBruhOne/DialoGPT-medium-complexai
1935
+ MarinHinawa/DialoGPT-medium-Shintaro
1936
+ jlsalty9999/DialoGPT-medium-Riddle
1937
+ custads23/DialoGPT-medium-mincy
1938
+ Wtfsquad/DialoGPT-small-pulpfictionVincent
1939
+ ss1612/erika-chatv4
1940
+ WAHCLAN/DialoGPT-Large-DAN
1941
+ Speedemon/jake-peralta-ai
1942
+ Speedemon/cobalt
1943
+ DeliveryBoy/DiabloGPT-medium-Kurisu
1944
+ AbbyRhea/DialoGPT-small-adrienbot
1945
+ monish162/kirthin-waifuu
1946
+ janna42/DialoGPT-small-phoenix
1947
+ AbbyRhea/DialoGPT-medium-AA
1948
+ FrozenSmoothie/DialoGPT-medium-star
1949
+ Fizi12341/astro_bot1234
1950
+ stiGGy/DialoGPT-medium-raymond
1951
+ patthebaker45/DialoGPT-small-Carlbot
1952
+ r4k4n1/DialoGPT-small-joshua
1953
+ Sukul/DialoGPT-small-Harsabot
1954
+ Sukul/DialoGPT-small-Harsabot1
1955
+ hihihotdog/DialoGPT-bot
1956
+ LarsJonasson/pythia-1.4b-deduped-sft-swedish
1957
+ mayaeary/pygmalion-6b-4bit-128g
1958
+ mayaeary/pygmalion-6b_dev-4bit-128g
1959
+ Inhaexpress/DialoGPT-medium-paimon
1960
+ sanyasna517/DialoGPT-medium-Zhongli
1961
+ StephenBrink/DialoGPT-small-will
1962
+ StanleyRoberts/Nix
1963
+ boudchicha/soluzione
1964
+ mayaeary/PPO_Pygway-V8p4_Dev-6b-4bit-128g
1965
+ ToborWinner/DialoGPT-medium-jolly
1966
+ mayaeary/PPO_Pygway-6b-Mix-4bit-128g
1967
+ ayushutkarsh/t3
1968
+ Inhaexpress/DialoGPT-medium-paimon2
1969
+ eepyblanky/DialoGPT-medium-malina
1970
+ eachadea/legacy-ggml-vicuna-13b-4bit
1971
+ eachadea/ggml-gpt4-x-alpaca-13b-native-4bit
1972
+ totallynotbrent/brotGPT
1973
+ Inhaexpress/DialoGPT-medium-harry_potter_ps
1974
+ robintan66/DialoGPT-small-harrypotter
1975
+ MajorCrayon7047/MadboneAssistantGPT-2
1976
+ VennuT/DialoGPT-medium-Alphinaud
1977
+ triple777/annicebot
1978
+ totallynotbrent/aaronGPTalpha
1979
+ Plaaasma/gerald-model
1980
+ yashugupta786/bart_large_xsum_samsum_conv_summarizer
1981
+ eachadea/legacy-ggml-vicuna-7b-4bit
1982
+ ColtonAi/Llmtrain
1983
+ ColtonAi/Chem4
1984
+ IchtacaKemeRaz/favabean
1985
+ Stromello/DialoGPT-medium-ZeroTwo
1986
+ totallynotbrent/brotGPTplus
1987
+ storminstakk/Stormin-Stakk
1988
+ ToddGoldfarb/Cadet-Tiny
1989
+ aghelan3/eggIncubationRepo
1990
+ hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_1024-1b7
1991
+ JosephusCheung/Guanaco
1992
+ raymondho/DialoGPT-small-harry
1993
+ Capitalist/DialoGPT-small-rick
1994
+ gfgddfg/DialoGPT-small-qiu_chat
1995
+ eachadea/ggml-toolpaca-13b-4bit
1996
+ CNR223/DialoGPT-small-MasterO
1997
+ Abigaming75/Bot_wa
1998
+ pranitrai07/DialoGPT-medium-harrypotter
1999
+ IlyaGusev/saiga_7b_lora
2000
+ Ancestral/Dolly_Shygmalion-6b-4bit-128g
2001
+ Ancestral/PPO_Shygmalion-6b-4bit-128g
2002
+ wyskiski/winonabot
2003
+ hcpwr/DialoGPT-medium-samantha
2004
+ Roguwan/DialoGPT-medium-rogu
2005
+ totallynotbrent/aaronGPTplus
2006
+ Ancestral/Dolly_Malion-6b-4bit-128g
2007
+ vantozdad/DialoGPT-medium-Dumbledore
2008
+ Abyss-fyf/DialoGPT-small-discord
2009
+ CrystalzAura/DialoGPT-small-elysia
2010
+ eachadea/ggml-gpt4all-7b-4bit
2011
+ inu-ai/alpaca-guanaco-japanese-gpt-1b
2012
+ Husnul/pepper-bot-morty
2013
+ TheBloke/vicuna-13B-1.1-GPTQ
2014
+ CRD716/ggml-vicuna-1.1-quantized
2015
+ 4bit/pygmalion-6b-4bit-128g
2016
+ Reaver1092/DialoGPT-small-bones
2017
+ Ibnelaiq/Makise-Amadeus-Kurisu-small
2018
+ inu-ai/dolly-japanese-gpt-1b
2019
+ clawrex/DialoGPT-medium-walt
2020
+ IlyaGusev/saiga_13b_lora
2021
+ Zeda/DialoGPT-Large-ZedaBot
2022
+ Ibnelaiq/Makise-Amadeus-Kurisu
2023
+ Jaxon/DialoGPT-medium-kirito
2024
+ glitchie/bb
2025
+ Aqua002/DialoGPT-small-deadpool
2026
+ Aqua002/discord-chatbot
2027
+ lemoniada/Przembot
2028
+ Avitas8485/Dialogpt-small-v1
2029
+ Jprafol/DialoGPT-large-ARCHIBot
2030
+ Jprafol/DialoGPT-large-ARCHIBotV2
2031
+ spitfire4794/ben-ultra
2032
+ IlyaGusev/saiga_30b_lora
2033
+ NbAiLab/nb-gpt-j-6B-norpaca
2034
+ winglian/vicuna-self-reflect-13b
2035
+ 0x044/test-1
2036
+ 0x044/dgpt
2037
+ ss1612/erika-chatv6
2038
+ TestingCoder463632/DialoGPT-small-palpatine
2039
+ Blizzchor/DialoGPT-medium-BarryB
2040
+ sasha0552/pygmalion-6b-f16-ggml
2041
+ kavindu999/BetterEnglishGPT-v1
2042
+ kavindu999/BetterEnglishGPT-v2
2043
+ EnterNameBros/DialoGPT-small-FoxySan
2044
+ OrientalDude/DialoGPT-medium-GOKU
2045
+ Avitas8485/Dialogpt-medium-v1
2046
+ finex/pfe-mohamed-Harry
2047
+ Avitas8485/Dialogpt-medium-finetuned
2048
+ psyamk/DialoGPT-small-harrypotter
2049
+ Jamesonn/DialoGPT-small-jumin
2050
+ CNXT/CNXT
2051
+ Ilangraterol/Dataset_model
2052
+ IlyaGusev/saiga_30b_ggml
2053
+ Locutusque/gpt2-conversational-or-qa
2054
+ TrippingFollowing39/AMOGUS
2055
+ moomoomer/DialoGPT-medium-garfield
2056
+ PygmalionAI/pygmalion-7b
2057
+ Viperxyz/DialoGPT-small-Cartman
2058
+ Neko-Institute-of-Science/pygmalion-7b
2059
+ TehVenom/Pygmalion-7b-Merged-Safetensors
2060
+ BiaDd/DialoGPT-medium-Punko
2061
+ NewBreaker/chatglm-6b-int4
2062
+ TehVenom/Pygmalion-7b-4bit-GPTQ-Safetensors
2063
+ TehVenom/Pygmalion-7b-4bit-Q4_1-GGML
2064
+ userzyzz/piggySharded
2065
+ steinhaug/models-bck
2066
+ blueberrycheesecake/DialoGPT-small-misssophie
2067
+ Imablank/P1GM4L10N-7B-MERGED_WEIGHTS
2068
+ MrToast/idk
2069
+ SouroJ/DialoGPT-medium-Mordecai
2070
+ sasha0552/pygmalion-7b-bf16
2071
+ swajan/DialoGPT-small-Trail-1
2072
+ RobiKenobi/DialoGPT-medium-pete
2073
+ sasha0552/pygmalion-7b-f16-ggml
2074
+ sasha0552/pygmalion-7b-f16
2075
+ winglian/llama-adapter-13b
2076
+ MatLumber/Bisho
2077
+ iconical/MortyChatbotAI
2078
+ swajan/Trail-1
2079
+ swajan/Trail-2
2080
+ Misfit2/DialoGPT-large-Sonic
2081
+ ToddGoldfarb/Cadet-Medium
2082
+ ajpieroni/DiabloGPT-medium-medea
2083
+ AliiaR/DialoGPT-medium-empathetic-dialogues
2084
+ Chun121/ChocolaChat
2085
+ lemoniada/kicerobot
2086
+ Kazeyami-o7/DialoGPT-medium-beterbiffin
2087
+ Elucia/Diluc_Bot
2088
+ Elucia/Diluc_Bot_1.1
2089
+ Elucia/Diluc_Bot_1.2
2090
+ neurofumo/DialoGPT-small-joshua
2091
+ Elucia/Diluc_Bot_1.3
2092
+ GraphicStylz/Stylz
2093
+ naybiblu/ChizuruBot
2094
+ calvindoingstuff/DialoGPT-medium-luffy
2095
+ xZephy/DialoGPT-small-HelperBot
2096
+ crazywombat/DialoGPT-small-abandonware
2097
+ anshengli2/DialoGPT-small-counter-hate
2098
+ sephwalker3/piggy-7b
2099
+ apricxty/DialoGPT-small-chatbot
2100
+ leadmaister/langchain-prompt-master
2101
+ Covriar/DialoGPT-med-kiryu
2102
+ yesuns/DialoGPT-small-yesun
2103
+ davidviriato/DialoGPT-small-joshua
2104
+ VMware/open-llama-0.3T-7B-open-instruct-v1.1
2105
+ prabhguron/DialoGPT-small-harrypotter
2106
+ xHexyy/small-test
2107
+ malteos/bloom-6b4-clp-german-oasst-v0.1
2108
+ Pcik/DialoGPT-medium-Ruby
2109
+ sasha0552/pygmalion-7b-q4_0-ggml
2110
+ sasha0552/pygmalion-7b-q4_1-ggml
2111
+ sasha0552/pygmalion-7b-q5_0-ggml
2112
+ sasha0552/pygmalion-7b-q5_1-ggml
2113
+ sasha0552/pygmalion-7b-q8_0-ggml
2114
+ rjorg543/DialoGPT-small-ben
2115
+ eachadea/ggml-gpt4-x-vicuna-13b
2116
+ Tlethal/DialoGPT-small-harrypotter
2117
+ xHexyy/test2
2118
+ xHexyy/test3
2119
+ ldilov/stablelm-tuned-alpha-7b-4bit-128g-descact-sym-true-sequential
2120
+ AnimusOG/pygmalion-7b-4bit-128g-cuda-2048Token
2121
+ jun-ai/BeethovenBot
2122
+ channashi/DialoGPT-small-rocket
2123
+ biscuitbutb/biscuitbot-dialogpt-model
2124
+ ytrbqrkflbvbhy/DialoGPT-small-me-rus
2125
+ Pruz0/VescGPT
2126
+ IlyaGusev/saiga_7b_ggml
2127
+ IlyaGusev/saiga_13b_ggml
2128
+ TechTay/DialoGPT-small-Luciano
2129
+ BlackBull/yeet
2130
+ WAHCLAN/DialoGPT-Medium-SAM
2131
+ MistyIce/dialog-gpt-Heshan
2132
+ Pruz0/LennGPT
2133
+ Wanfq/MAKER-mwoz-full-kb-t5-base
2134
+ Wanfq/MAKER-mwoz-full-kb-t5-large
2135
+ Wanfq/MAKER-smd-condensed-kb-t5-base
2136
+ Wanfq/MAKER-smd-condensed-kb-t5-large
2137
+ Wanfq/MAKER-camrest-condensed-kb-t5-base
2138
+ Wanfq/MAKER-camrest-condensed-kb-t5-large
2139
+ Wanfq/MAKER-camrest-full-kb-t5-base
2140
+ Wanfq/MAKER-camrest-full-kb-t5-large
2141
+ Wanfq/MAKER-mwoz-condensed-kb-t5-base
2142
+ Wanfq/MAKER-mwoz-condensed-kb-t5-large
2143
+ raphaman/test
2144
+ Pruz0/HaLLGPT
2145
+ Binaryy/blender-bot-distill-finetuned
2146
+ alex297/DialoGPT-small-sparky
2147
+ Pruz0/GeoGPT
2148
+ Pruz0/PruzGPT
2149
+ dorkai/pygmalion-2.7b
2150
+ ikocx-to24/DialoGPT-medium-plankton
2151
+ th3d4nk/llamaModel1
2152
+ PygmalionAI/pygmalion-13b
2153
+ TehVenom/Pygmalion-13b-Merged
2154
+ ivaan01/TFG-Mauri
2155
+ alex297/DialoGPT-medium-fox
2156
+ Crataco/Pygmalion-1.3B-GGML
2157
+ SaintMcMuffins/DialoGPT-small-brain2.0
2158
+ dujade18/DialoGPT-medium-dwightoffice
2159
+ TehVenom/Pygmalion-13b-8bit-GPTQ
2160
+ helloerikaaa/chandlerGPT
2161
+ SaintMcMuffins/Brain2.1
2162
+ kb2c37g/DialoGPT-small-Rick
2163
+ alex297/DialoGPT-small-fox
2164
+ TeraSpace/dialofrednocontext
2165
+ EnterNameBros/DialoGPT-small-Senko
2166
+ EnterNameBros/DialoGPT-small-Senko-san
2167
+ 4bit/pyg-7b
2168
+ EnterNameBros/DialoGPT-small-Senko-san-ver
2169
+ Lumiras/rachbot
2170
+ kevintest1234/DialoGPT-small-harrypotter
2171
+ EnterNameBros/DialoGPT-small-Senko-san-ver-2
2172
+ EnterNameBros/DialoGPT-large-Senko-san-ver-2
2173
+ Delmarfish/Delmar
2174
+ diankymar/kitty
2175
+ TatonkaHF/ruDialoGpt3-medium-finetuned-russian-joke
2176
+ EggsInAJar/DialoGPT-small-MerrickBot
2177
+ DBoi/Mayreel2
2178
+ hosst/FridgeLLM
2179
+ loitran/DialoGPT-medium-peppapig
2180
+ Syamil/DialoGPT-small-pixal
2181
+ Avitas8485/Dialogpt-medium-v2
2182
+ Inhaexpress/DialoGPT-medium-harrypotter
2183
+ loitran/DialoGPT-medium-HarryPotter
2184
+ Syamil/DialoGPT-medium-pixal
2185
+ roykim/ko_chat
2186
+ Syamil/DialoGPT-medium-pixals
2187
+ minhcrafters/DialoGPT-small-Fukuya
2188
+ Warren00/DialoGPT-Med-peppa05a
2189
+ Syamil/DialoGPT-medium-pixalbot
2190
+ LelouchH/DiabloGPT-small-RaidenBot
2191
+ Inhaexpress/DialoGPT-medium-shrek124
2192
+ Inhaexpress/DialoGPT-medium-terra1
2193
+ nascar123/Discordtester000
2194
+ EnterNameBros/Offical-Senko-medium-update
2195
+ EnterNameBros/Offical-Senko-medium-update-2
2196
+ EnterNameBros/Offical-Senko-medium-update-3
2197
+ EnterNameBros/Senko-medium
2198
+ jiezhou1996/test
2199
+ ElMater06/SpaceCore
2200
+ EnterNameBros/Offical-Senko-medium
2201
+ EnterNameBros/Senko-san
2202
+ DBoi/Mayreel
2203
+ VMware/open-llama-0.7T-7B-open-instruct-v1.1
2204
+ Warren00/DialoGPT-Small-Peppa06_053123
2205
+ mpalacio/DialoGPT_ootwl
2206
+ protag07/DialoGPT-small-harrypotter
2207
+ h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2
2208
+ cosimoiaia/Loquace-70m
2209
+ cosimoiaia/Loquace-410m
2210
+ MareNoceda/DialoGPT-medium-Luz
2211
+ GarrisonBot/DialoGPT-medium-herbertgarrison
2212
+ cosimoiaia/Loquace-12B
2213
+ cosimoiaia/Loquace-7B
2214
+ Deojoandco/ahGPT-small-v1
2215
+ PeachHeles/bmo
2216
+ Rays236/DialoGPT-small-harrypotter
2217
+ Deojoandco/ahGPT-small-v2
2218
+ Syamil/DialoGPT-medium-newpixal
2219
+ Coderhuynin/DialoGPT-large-TonyStark
2220
+ SotirisLegkas/final_socratic_dialoGPT
2221
+ ademfatnassi/bonjourGPT-small
2222
+ ikocx-to24/DialoGPT-small-planktongpt2
2223
+ EricYou/RickBot
2224
+ Ayaakaa/DialoGPT-small-Yoisaki-Kanade
2225
+ DoesNoPro/DialoGPT-small-RaidenG
2226
+ rajeshbot/DialoGPT-medium-Harry-to-Hari
2227
+ DoesNoPro/DialoGPT-small-RaidenG2
2228
+ SamsonP/pygmalion-6b-sft
2229
+ Deojoandco/ahDialoGPT-small-v4
2230
+ Syamil/GPTNeo-PIXAL-Model
2231
+ Syamil/GPTNeo-PIXAL-new
2232
+ Lattori/DiabloGPT-small-ConanBot
2233
+ Badzee/DialoGPT-medium-jackbot
2234
+ meowsynth/DialoGPT-small-sophie
2235
+ EnterNameBros/Senko-san-medium-baby
2236
+ Deojoandco/ah-GPT2-v4
2237
+ cosimoiaia/Loquace-20B
2238
+ EnterNameBros/Senko-san-medium-fox
2239
+ MarkyMarx/DialoGPT-medium-jimmybot2
2240
+ DhruvShek/DialoGPT
2241
+ Doge22/DialoGPT-medium-max
2242
+ lyogavin/Anima33B
2243
+ steerevo88/testThotBot
2244
+ steerevo88/workingthotBot
2245
+ YTTD/DialoGPT-medium-keiji
2246
+ MisguidedKerbal/DialoGPT-medium-kerbal
2247
+ Blueify/DialoGPT-small-model-lotr
2248
+ steerevo88/newthotBot
2249
+ paripi/Malishka
2250
+ finex/pfe-mohamed2023-RON
2251
+ DhruvShek/CMDGPT
2252
+ finex/pfe-mohamed2023-Hermione
2253
+ SkylerBlu9/DialoGPT-medium-CitrAI
2254
+ SkylerBlu9/DialoGPT-medium-autismobot
2255
+ MisguidedKerbal/DialoGPT-kerbalV2
2256
+ EnterNameBros/Senko-san-medium-a
2257
+ dderr/testmodel
2258
+ priyanshdahiya/DialoGPT-small-rick
2259
+ Goodnoway/DialoGPT-nerbalV2
2260
+ WompWomp1/DialoGPT-medium-Kirin
2261
+ lyogavin/Anima33B-merged
2262
+ peytonai/DialoGPT-small-wali-joshua
2263
+ MisguidedKerbal/DialoGPT-kerbalV3
2264
+ WompWomp1/DialoGPT-medium-Kaori
2265
+ OmarDiab/DialoGPT-small-Amogus
2266
+ servetier/DialoGPT-large-miguel
2267
+ OmarDiab/DialoGPT-small-Amogus-2
2268
+ steveglover/falcon-7b-instruct-telco-chat
2269
+ Lazycuber/Janemalion-6B
2270
+ Goodnoway/DialoGPT-nerbalV4
2271
+ gvij/gpt-j-6B-alpaca-gpt4
2272
+ papahawk/keya-560m
2273
+ JavRedstone/DialoGPT-small-tesseractist
2274
+ imuncomfortable/DiabloGPT-small-CocoAtarashi
2275
+ Amod/falcon7b-fine-tuned-therapy-merged
2276
+ Oshirigami1980/DialoGPT-medium-Steven
2277
+ Drevanil/DialoGPT-small-try
2278
+ Yaewe/1
2279
+ DataHammer/mozi_emotional_7b
2280
+ udxyz/HarryPotterBot
2281
+ Kasyapa/DialoGPT-medium-hagridbot
2282
+ lyogavin/Anima33B-DPO-Belle-1k
2283
+ JeanL-0/TestingModel-01
2284
+ TejasC2/DialoGPT-TejasBot
2285
+ lyogavin/Anima33B-DPO-Belle-1k-merged
2286
+ InterruptAI/Interrupt-350M
2287
+ Lucideds/Lucideds
2288
+ EnterNameBros/Senko-san-medium-sc
2289
+ EnterNameBros/Senko-san-medium-scl
2290
+ DaddySen/tighnari
2291
+ ettevyemerald/DialoGPT-medium-beomgyu
2292
+ minhcrafters/DialoGPT-small-mindwandering
2293
+ JNDankwah/DialoGPT-small-ThorCB
2294
+ minhcrafters/DialoGPT-medium-Zephirel
2295
+ papahawk/falcon-40b
2296
+ sonntt/DialoGPT-small-mindwandering
2297
+ pundapog/DialoGPT-medium-ethanbot
2298
+ TheBloke/Pygmalion-7B-SuperHOT-8K-GGML
2299
+ TheBloke/Pygmalion-7B-SuperHOT-8K-fp16
2300
+ pobierz69/model-6b-read-desc
2301
+ sidca/Cam
2302
+ EnterNameBros/Senko-san-medium-abc
2303
+ abhi-8/DialoGPT-medium-Michael
2304
+ abhi-8/DialoGPT-medium-Rick
2305
+ abhi-8/DialoGPT-medium-Joshua-twevy
2306
+ spitfire4794/dialogpt-small-rick
2307
+ abhi-8/Joshua-bot
2308
+ Justus-Jonas/Imaginary-Embeddings-Classic
2309
+ Justus-Jonas/Imaginary-Embeddings-SpeakerTokens
2310
+ Justus-Jonas/Imaginary-Embeddings-SpeakerTokens-STP
2311
+ spitfire4794/dialogpt-small-morty
2312
+ Kauru/DialoGPT-medium-Ranni
2313
+ crazydamns/DialoGPT-Johnny2
2314
+ jpandeinge/DialoGPT-medium-Oshiwambo-Bot
2315
+ custads23/pygmalion-1.3b
2316
+ HatCha01/DialoGPT-small-Batman
2317
+ crazydamns/DialoGPT-Johnny3
2318
+ assembleteams/curiouspi
2319
+ Kauru/DialoGPT-medium-Ranniv2
2320
+ SatwikShrivastava/narutoAI-chatbot
2321
+ digitalmax1/max
2322
+ adr2432/small-Joshua-Bot
2323
+ ObsessedCitrus/DialoGPT-small-PeterBot_ChatBot
2324
+ suarkadipa/HubermanGPT-small-v1
2325
+ suarkadipa/HarryPotterGPT-small-v1
2326
+ wevie1978/DialoGPT-medium-Kebb
2327
+ kopeqwerty/DialoGPT-medium-idotbot
2328
+ zelalt/Chatbot_T5-Prmtrs
2329
+ jarvissss/DialoGPT-medium-idotbot
2330
+ Magmadue/DiabloGPT-small-ei
2331
+ nicbull/DialoGPT-small-cryptonic
2332
+ nicbull/DialoGPT-small-cryptonic2
2333
+ chloe0x0/DialoGPT-small-Muty
2334
+ chloe0x0/mutyGPT
2335
+ alexwang05/DialoGPT-small-soph
2336
+ BHAndersonJr/DialoGPT-small-fry
2337
+ timothykim04/DialoGPT-medium-timothykim
2338
+ timothykim04/DialoGPT-medium-harrypotter
2339
+ Luca999/Limitlessai99
2340
+ Madgimmy/DiabloGPT-small-Madgimmy
2341
+ chloe0x0/mutyGPT-v2
2342
+ nuggster/DialoGPT-small-ianbot
2343
+ we1kkk/llama2-hf-qlora-oasst1
2344
+ IlyaGusev/saiga2_7b_lora
2345
+ IlyaGusev/gigasaiga_lora
2346
+ jliu03/JustinBot
2347
+ heliosbrahma/falcon-7b-finetuned-mental-health-conversational
2348
+ drunknmonk/GPT-Chandler
2349
+ jun-ai/llama2-qlora-finetunined-french
2350
+ WompWomp1/DialoGPT-large-Kirin
2351
+ WompWomp1/DialoGPT-large-Kirin-2
2352
+ WompWomp1/DialoGPT-large-Rin
2353
+ or4cl3ai/Aiden_t5
2354
+ jstawski/Llama-2-13b-hf-finetuned-SNG
2355
+ Gelmo/Halouf
2356
+ IlyaGusev/saiga2_13b_lora
2357
+ sophji/DialoGPT-small-GodlyLJ
2358
+ ATrapenard/Discord-Impersonation-Bot
2359
+ hiamitabha/llama2forbittlerobot
2360
+ IlyaGusev/saiga2_7b_gguf
2361
+ IlyaGusev/saiga2_13b_gguf
2362
+ TejasC2/DialoGPT-TejasBot2
2363
+ CNR223/DialoGPT-medium-MalcolmReynold
2364
+ minh-hahaha/DialoGPT-small-harrypotter
2365
+ phucnq1591999/SolanaChatBot
2366
+ marclove/llama-2-7b-chat-functions
2367
+ Sheerapi/test
2368
+ YukioKoito/DialoGPT-small-chibi
2369
+ YukioKoito/DialoGPT-small-twilight
2370
+ amzrana/lora
2371
+ ierhon/basic-chatbot
2372
+ Pula23/Hggjg
2373
+ Focs/DialoGPT-medium-tony-stark
2374
+ Kenobiwan/DialoGPT-small-AizakkuBot2
2375
+ drado/DialoGPT-small-joshua
2376
+ rah-1/Rahulio
2377
+ tanishqvashisht/DialoGPT-small-Joshua
2378
+ Kenobiwan/DialoGPT-small-AizakkuBot3
2379
+ Ridloo/DialogGPT-small-harrypotter
2380
+ dyuhong80/DialoGPT-large-ModerateEffortBombGPT
2381
+ ai-forever/paper_persi_chat
2382
+ paralleldynamix/paralleldynamix-model101
2383
+ kelSidenna/SoftwareRequirements-T5-Base
2384
+ renahime/DialoGPT-medium-umineko
2385
+ Shaun1204/RedGPT-Gormlee
2386
+ diwas7777/HarryBot
2387
+ heliosbrahma/falcon-7b-sharded-bf16-finetuned-mental-health-conversational
2388
+ kelSidenna/SoftwareReq-DialoGPT-medium
2389
+ shanover/medbot-conv
2390
+ J-Wiggler/DialoGPT-medium-Stanley
2391
+ gearski/DialoGPT-small-itskleb
2392
+ wozniakclub/llama-2-7b-medtext-llama2
2393
+ gearski/DialoGPT-medium-itskleb
2394
+ rebornrulz/Rulz-AI
2395
+ Quantsr/DialogGPT-small-Aeris
2396
+ ostorc/rick-sanchez-chatbot
2397
+ nicbull/DialoGPT-medium-nic
2398
+ nicbull/DialoGPT-medium-nic2
2399
+ gorkemgoknar/llama2-7f-moviechatbot-ggml-q4
2400
+ aka-nikko/ainz-ooal-gown
2401
+ llSourcell/medllama2_7b
2402
+ xtuner/Llama-2-7b-qlora-moss-003-sft
2403
+ xtuner/Llama-2-7b-qlora-arxiv-gentitle
2404
+ xtuner/internlm-7b-qlora-arxiv-gentitle
2405
+ xtuner/internlm-7b-qlora-alpaca-enzh
2406
+ xtuner/Baichuan-7B-qlora-arxiv-gentitle
2407
+ xtuner/Baichuan-7B-qlora-alpaca-enzh
2408
+ nicbull/DialoGPT-medium-leric
2409
+ Ian-14/llm13
2410
+ theastro/starkbot
2411
+ yupimrandy/DialoGPT-medium-butcher
2412
+ hclaim/clamgptattempt4
2413
+ yupimrandy/DialoGPT-medium-hughie
2414
+ nekohacker591/google1
2415
+ zhmx31/Mychatbot
2416
+ sk8ingcat/DialoGPT-small-TonyStark
2417
+ SanchoJR/meX
2418
+ xtuner/Qwen-7B-qlora-moss-003-sft
2419
+ xtuner/Qwen-7B-qlora-arxiv-gentitle
2420
+ xtuner/Qwen-7B-qlora-alpaca-enzh
2421
+ xtuner/Qwen-7B-qlora-oasst1
2422
+ xtuner/Baichuan-7B-qlora-oasst1
2423
+ xtuner/internlm-7b-qlora-oasst1
2424
+ 4bit/medllama2_7b
2425
+ JGKD/JangoGPTv1.0
2426
+ kwankwan1000/DialoGPT-small-peppa
2427
+ JGKD/JangoGPTv1.5
2428
+ SoniR/config
2429
+ mjyh/falcon-7b-qlora-sclue-20230601-04-merged
2430
+ sadzip/SiberianPersona-ruGPT-3.5-qlora
2431
+ Wolffire88/DialoGPT-medium-Android16
2432
+ nolly3317/DialoGPT-small-alice
2433
+ feelinrealcute/pym-6b
2434
+ nixsy/AvasLove
2435
+ feelinrealcute/pym-13b7
2436
+ AleksiDu/HarryPotterBot
2437
+ Belcebuzzz/DialoGPT-small-TomoGF
2438
+ xtuner/internlm-7b-qlora-lawyer
2439
+ xtuner/internlm-7b-qlora-colorist
2440
+ xtuner/internlm-7b-qlora-coder
2441
+ xtuner/internlm-7b-qlora-open-platypus
2442
+ xtuner/internlm-7b-qlora-sql
2443
+ inception-mbzuai/jais-13b-chat
2444
+ Fredithefish/Guanaco-3B-Uncensored
2445
+ garrachonr/LlamaDos
2446
+ literallywood/DialoGPT-small-ekansh
2447
+ IALABS/Arturosfastfood
2448
+ javieitor/DialoGPT-medium-Rick
2449
+ Kuduxaaa/ava-small
2450
+ Al-Hathboor-Bikal-ai-2023/SRTIP-GPT-F7B-base
2451
+ L-R/LLmRa-355M
2452
+ Fredithefish/Guanaco-3B-Uncensored-v2
2453
+ xtuner/Llama-2-7b-qlora-colorist
2454
+ KE-AI/basicchatbot-kel
2455
+ josepholiver/TEST_MODEL_1
2456
+ PlaceReporter99/Utility_Bot_Chat
2457
+ J-Wiggler2/Caesar
2458
+ J-Wiggler2/Caesar2
2459
+ matvalan/vittae-cot
2460
+ Dawnstarhunter/DialoGPT-medium-Eveline
2461
+ sahilxyd/DialoGPT-small-joshua
2462
+ EnterNameBros/Senko-san-medium-abcd
2463
+ 6adityaverma/DialoGPT-large-Walter
2464
+ 6adityaverma/DialoGPT-large-Rick
2465
+ IlyaGusev/saiga2_70b_lora
2466
+ AyushK0808/StarWarsBot
2467
+ EnterNameBros/Senko-ai-medium
2468
+ Fredithefish/Guanaco-7B-Uncensored
2469
+ IlyaGusev/saiga2_70b_gguf
2470
+ glassofwine/DialoGPT-medium-johanwine
2471
+ zattio770/120-Days-of-LORA-v2-13B
2472
+ cannice/blenderbot-400M-distill-empathetic
2473
+ Likelihood94/Jackoftrades
2474
+ Hapski/DialoGPT-small-nene
2475
+ Fredithefish/Guanaco-13B-Uncensored
2476
+ kitbear444/DialoGPT-medium-kit
2477
+ SonnyAu/DialoGPT-dumbledore
2478
+ TheBloke/Guanaco-7B-Uncensored-GGUF
2479
+ TheBloke/Guanaco-13B-Uncensored-GGUF
2480
+ TheBloke/Guanaco-7B-Uncensored-GPTQ
2481
+ TheBloke/Guanaco-13B-Uncensored-GPTQ
2482
+ TheBloke/Guanaco-3B-Uncensored-v2-GPTQ
2483
+ TheBloke/Guanaco-3B-Uncensored-v2-GGML
2484
+ Codexister/DialoGPT-medium-KafkaBotV1
2485
+ mfodwo/STUGPT-small-v1
2486
+ asas-ai/jais-13b-chat-8bit
2487
+ SoupChickn/Valeen-DialoGPT
2488
+ Codexister/DialoGPT-medium-KafkaBotV2
2489
+ KoalaAI/OPT-1.3b-Chat
2490
+ Nafaille/nafaille6b
2491
+ DiTy/dialogpt
2492
+ Severus27/BeingWell_llama2_7b
2493
+ rayho/DialoGPT-small-polysoft
2494
+ TuningAI/Llama2_13B_startup_Assistant
2495
+ dipxsy/testmodel
2496
+ dipxsy/Jarvis-small
2497
+ Lazycuber/L2-7b-Chat-Guanaco-Uncensored
2498
+ dipxsy/jarvis-blend
2499
+ TheBloke/Guanaco-13B-Uncensored-AWQ
2500
+ TheBloke/Guanaco-7B-Uncensored-AWQ
2501
+ wstock04/shiddeatorBotV1
2502
+ Boqianshen/llama-2-7b-miniguanaco
2503
+ sebastiantrbl/distilgpt2-finetuned-wikitext2
2504
+ herzlixh/DialoGPTs_HarryFromHogwarts
2505
+ poiccard/jais-13b-chat-adn
2506
+ sebastiantrbl/test-DialoGPT-finetune
2507
+ uffergist/DialoGPT-small-cummy
2508
+ wstock04/shiddeatorBotV3.0
2509
+ wstock04/shiddeatorBotDUMB
2510
+ Applekinz/John
2511
+ Or4cl3/1nsfw
2512
+ sebastiantrbl/DialoGPT-finetuned-daily-dialog
2513
+ LTC-AI-Labs/L2-7b-Base-WVG-Uncensored
2514
+ hussain2030/jais13bchat2
2515
+ subabi/DialoGPT-medium-subabicord
2516
+ marblyso/DialoGPT-medium-collin
2517
+ Crataco/Pygmalion-6B-GGML
2518
+ dipxsy/jl
2519
+ testerhubhai/krnedo
2520
+ IAteSpaghettiForLunch/DialoGPT-medium-GLADoS
2521
+ IAteSpaghettiForLunch/GLADoSBOT
2522
+ Nikolai5592/DialoGPT-Medium-RickBot
2523
+ KuroganeNiello/medium-NebBot
litellm/llms/huggingface_llms_metadata/hf_text_generation_models.txt ADDED
The diff for this file is too large to render. See raw diff
 
litellm/llms/huggingface_restapi.py ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Uses the huggingface text generation inference API
2
+ import os, copy, types
3
+ import json
4
+ from enum import Enum
5
+ import httpx, requests
6
+ from .base import BaseLLM
7
+ import time
8
+ import litellm
9
+ from typing import Callable, Dict, List, Any
10
+ from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, Usage
11
+ from typing import Optional
12
+ from .prompt_templates.factory import prompt_factory, custom_prompt
13
+
14
+ class HuggingfaceError(Exception):
15
+ def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
16
+ self.status_code = status_code
17
+ self.message = message
18
+ if request is not None:
19
+ self.request = request
20
+ else:
21
+ self.request = httpx.Request(method="POST", url="https://api-inference.huggingface.co/models")
22
+ if response is not None:
23
+ self.response = response
24
+ else:
25
+ self.response = httpx.Response(status_code=status_code, request=self.request)
26
+ super().__init__(
27
+ self.message
28
+ ) # Call the base class constructor with the parameters it needs
29
+
30
+ class HuggingfaceConfig():
31
+ """
32
+ Reference: https://huggingface.github.io/text-generation-inference/#/Text%20Generation%20Inference/compat_generate
33
+ """
34
+ best_of: Optional[int] = None
35
+ decoder_input_details: Optional[bool] = None
36
+ details: Optional[bool] = True # enables returning logprobs + best of
37
+ max_new_tokens: Optional[int] = None
38
+ repetition_penalty: Optional[float] = None
39
+ return_full_text: Optional[bool] = False # by default don't return the input as part of the output
40
+ seed: Optional[int] = None
41
+ temperature: Optional[float] = None
42
+ top_k: Optional[int] = None
43
+ top_n_tokens: Optional[int] = None
44
+ top_p: Optional[int] = None
45
+ truncate: Optional[int] = None
46
+ typical_p: Optional[float] = None
47
+ watermark: Optional[bool] = None
48
+
49
+ def __init__(self,
50
+ best_of: Optional[int] = None,
51
+ decoder_input_details: Optional[bool] = None,
52
+ details: Optional[bool] = None,
53
+ max_new_tokens: Optional[int] = None,
54
+ repetition_penalty: Optional[float] = None,
55
+ return_full_text: Optional[bool] = None,
56
+ seed: Optional[int] = None,
57
+ temperature: Optional[float] = None,
58
+ top_k: Optional[int] = None,
59
+ top_n_tokens: Optional[int] = None,
60
+ top_p: Optional[int] = None,
61
+ truncate: Optional[int] = None,
62
+ typical_p: Optional[float] = None,
63
+ watermark: Optional[bool] = None
64
+ ) -> None:
65
+ locals_ = locals()
66
+ for key, value in locals_.items():
67
+ if key != 'self' and value is not None:
68
+ setattr(self.__class__, key, value)
69
+
70
+ @classmethod
71
+ def get_config(cls):
72
+ return {k: v for k, v in cls.__dict__.items()
73
+ if not k.startswith('__')
74
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
75
+ and v is not None}
76
+
77
+ def output_parser(generated_text: str):
78
+ """
79
+ Parse the output text to remove any special characters. In our current approach we just check for ChatML tokens.
80
+
81
+ Initial issue that prompted this - https://github.com/BerriAI/litellm/issues/763
82
+ """
83
+ chat_template_tokens = ["<|assistant|>", "<|system|>", "<|user|>", "<s>", "</s>"]
84
+ for token in chat_template_tokens:
85
+ if generated_text.strip().startswith(token):
86
+ generated_text = generated_text.replace(token, "", 1)
87
+ if generated_text.endswith(token):
88
+ generated_text = generated_text[::-1].replace(token[::-1], "", 1)[::-1]
89
+ return generated_text
90
+
91
+ tgi_models_cache = None
92
+ conv_models_cache = None
93
+ def read_tgi_conv_models():
94
+ try:
95
+ global tgi_models_cache, conv_models_cache
96
+ # Check if the cache is already populated
97
+ # so we don't keep on reading txt file if there are 1k requests
98
+ if (tgi_models_cache is not None) and (conv_models_cache is not None):
99
+ return tgi_models_cache, conv_models_cache
100
+ # If not, read the file and populate the cache
101
+ tgi_models = set()
102
+ script_directory = os.path.dirname(os.path.abspath(__file__))
103
+ # Construct the file path relative to the script's directory
104
+ file_path = os.path.join(script_directory, "huggingface_llms_metadata", "hf_text_generation_models.txt")
105
+
106
+ with open(file_path, 'r') as file:
107
+ for line in file:
108
+ tgi_models.add(line.strip())
109
+
110
+ # Cache the set for future use
111
+ tgi_models_cache = tgi_models
112
+
113
+ # If not, read the file and populate the cache
114
+ file_path = os.path.join(script_directory, "huggingface_llms_metadata", "hf_conversational_models.txt")
115
+ conv_models = set()
116
+ with open(file_path, 'r') as file:
117
+ for line in file:
118
+ conv_models.add(line.strip())
119
+ # Cache the set for future use
120
+ conv_models_cache = conv_models
121
+ return tgi_models, conv_models
122
+ except:
123
+ return set(), set()
124
+
125
+
126
+ def get_hf_task_for_model(model):
127
+ # read text file, cast it to set
128
+ # read the file called "huggingface_llms_metadata/hf_text_generation_models.txt"
129
+ tgi_models, conversational_models = read_tgi_conv_models()
130
+ if model in tgi_models:
131
+ return "text-generation-inference"
132
+ elif model in conversational_models:
133
+ return "conversational"
134
+ elif "roneneldan/TinyStories" in model:
135
+ return None
136
+ else:
137
+ return "text-generation-inference" # default to tgi
138
+
139
+ class Huggingface(BaseLLM):
140
+ _client_session: Optional[httpx.Client] = None
141
+ _aclient_session: Optional[httpx.AsyncClient] = None
142
+
143
+ def __init__(self) -> None:
144
+ super().__init__()
145
+
146
+ def validate_environment(self, api_key, headers):
147
+ default_headers = {
148
+ "content-type": "application/json",
149
+ }
150
+ if api_key and headers is None:
151
+ default_headers["Authorization"] = f"Bearer {api_key}" # Huggingface Inference Endpoint default is to accept bearer tokens
152
+ headers = default_headers
153
+ elif headers:
154
+ headers=headers
155
+ else:
156
+ headers = default_headers
157
+ return headers
158
+
159
+ def convert_to_model_response_object(self,
160
+ completion_response,
161
+ model_response,
162
+ task,
163
+ optional_params,
164
+ encoding,
165
+ input_text,
166
+ model):
167
+ if task == "conversational":
168
+ if len(completion_response["generated_text"]) > 0: # type: ignore
169
+ model_response["choices"][0]["message"][
170
+ "content"
171
+ ] = completion_response["generated_text"] # type: ignore
172
+ elif task == "text-generation-inference":
173
+ if len(completion_response[0]["generated_text"]) > 0:
174
+ model_response["choices"][0]["message"][
175
+ "content"
176
+ ] = output_parser(completion_response[0]["generated_text"])
177
+ ## GETTING LOGPROBS + FINISH REASON
178
+ if "details" in completion_response[0] and "tokens" in completion_response[0]["details"]:
179
+ model_response.choices[0].finish_reason = completion_response[0]["details"]["finish_reason"]
180
+ sum_logprob = 0
181
+ for token in completion_response[0]["details"]["tokens"]:
182
+ if token["logprob"] != None:
183
+ sum_logprob += token["logprob"]
184
+ model_response["choices"][0]["message"]._logprob = sum_logprob
185
+ if "best_of" in optional_params and optional_params["best_of"] > 1:
186
+ if "details" in completion_response[0] and "best_of_sequences" in completion_response[0]["details"]:
187
+ choices_list = []
188
+ for idx, item in enumerate(completion_response[0]["details"]["best_of_sequences"]):
189
+ sum_logprob = 0
190
+ for token in item["tokens"]:
191
+ if token["logprob"] != None:
192
+ sum_logprob += token["logprob"]
193
+ if len(item["generated_text"]) > 0:
194
+ message_obj = Message(content=output_parser(item["generated_text"]), logprobs=sum_logprob)
195
+ else:
196
+ message_obj = Message(content=None)
197
+ choice_obj = Choices(finish_reason=item["finish_reason"], index=idx+1, message=message_obj)
198
+ choices_list.append(choice_obj)
199
+ model_response["choices"].extend(choices_list)
200
+ else:
201
+ if len(completion_response[0]["generated_text"]) > 0:
202
+ model_response["choices"][0]["message"][
203
+ "content"
204
+ ] = output_parser(completion_response[0]["generated_text"])
205
+ ## CALCULATING USAGE
206
+ prompt_tokens = 0
207
+ try:
208
+ prompt_tokens = len(
209
+ encoding.encode(input_text)
210
+ ) ##[TODO] use the llama2 tokenizer here
211
+ except:
212
+ # this should remain non blocking we should not block a response returning if calculating usage fails
213
+ pass
214
+ output_text = model_response["choices"][0]["message"].get("content", "")
215
+ if output_text is not None and len(output_text) > 0:
216
+ completion_tokens = 0
217
+ try:
218
+ completion_tokens = len(
219
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
220
+ ) ##[TODO] use the llama2 tokenizer here
221
+ except:
222
+ # this should remain non blocking we should not block a response returning if calculating usage fails
223
+ pass
224
+ else:
225
+ completion_tokens = 0
226
+
227
+ model_response["created"] = int(time.time())
228
+ model_response["model"] = model
229
+ usage = Usage(
230
+ prompt_tokens=prompt_tokens,
231
+ completion_tokens=completion_tokens,
232
+ total_tokens=prompt_tokens + completion_tokens
233
+ )
234
+ model_response.usage = usage
235
+ model_response._hidden_params["original_response"] = completion_response
236
+ return model_response
237
+
238
+ def completion(self,
239
+ model: str,
240
+ messages: list,
241
+ api_base: Optional[str],
242
+ headers: Optional[dict],
243
+ model_response: ModelResponse,
244
+ print_verbose: Callable,
245
+ encoding,
246
+ api_key,
247
+ logging_obj,
248
+ custom_prompt_dict={},
249
+ acompletion: bool = False,
250
+ optional_params=None,
251
+ litellm_params=None,
252
+ logger_fn=None,
253
+ ):
254
+ super().completion()
255
+ exception_mapping_worked = False
256
+ try:
257
+ headers = self.validate_environment(api_key, headers)
258
+ task = get_hf_task_for_model(model)
259
+ print_verbose(f"{model}, {task}")
260
+ completion_url = ""
261
+ input_text = ""
262
+ if "https" in model:
263
+ completion_url = model
264
+ elif api_base:
265
+ completion_url = api_base
266
+ elif "HF_API_BASE" in os.environ:
267
+ completion_url = os.getenv("HF_API_BASE", "")
268
+ elif "HUGGINGFACE_API_BASE" in os.environ:
269
+ completion_url = os.getenv("HUGGINGFACE_API_BASE", "")
270
+ else:
271
+ completion_url = f"https://api-inference.huggingface.co/models/{model}"
272
+
273
+ ## Load Config
274
+ config=litellm.HuggingfaceConfig.get_config()
275
+ for k, v in config.items():
276
+ if k not in optional_params: # completion(top_k=3) > huggingfaceConfig(top_k=3) <- allows for dynamic variables to be passed in
277
+ optional_params[k] = v
278
+
279
+ ### MAP INPUT PARAMS
280
+ if task == "conversational":
281
+ inference_params = copy.deepcopy(optional_params)
282
+ inference_params.pop("details")
283
+ inference_params.pop("return_full_text")
284
+ past_user_inputs = []
285
+ generated_responses = []
286
+ text = ""
287
+ for message in messages:
288
+ if message["role"] == "user":
289
+ if text != "":
290
+ past_user_inputs.append(text)
291
+ text = message["content"]
292
+ elif message["role"] == "assistant" or message["role"] == "system":
293
+ generated_responses.append(message["content"])
294
+ data = {
295
+ "inputs": {
296
+ "text": text,
297
+ "past_user_inputs": past_user_inputs,
298
+ "generated_responses": generated_responses
299
+ },
300
+ "parameters": inference_params
301
+ }
302
+ input_text = "".join(message["content"] for message in messages)
303
+ elif task == "text-generation-inference":
304
+ # always send "details" and "return_full_text" as params
305
+ if model in custom_prompt_dict:
306
+ # check if the model has a registered custom prompt
307
+ model_prompt_details = custom_prompt_dict[model]
308
+ prompt = custom_prompt(
309
+ role_dict=model_prompt_details.get("roles", None),
310
+ initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
311
+ final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
312
+ messages=messages
313
+ )
314
+ else:
315
+ prompt = prompt_factory(model=model, messages=messages)
316
+ data = {
317
+ "inputs": prompt,
318
+ "parameters": optional_params,
319
+ "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
320
+ }
321
+ input_text = prompt
322
+ else:
323
+ # Non TGI and Conversational llms
324
+ # We need this branch, it removes 'details' and 'return_full_text' from params
325
+ if model in custom_prompt_dict:
326
+ # check if the model has a registered custom prompt
327
+ model_prompt_details = custom_prompt_dict[model]
328
+ prompt = custom_prompt(
329
+ role_dict=model_prompt_details.get("roles", {}),
330
+ initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
331
+ final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
332
+ bos_token=model_prompt_details.get("bos_token", ""),
333
+ eos_token=model_prompt_details.get("eos_token", ""),
334
+ messages=messages,
335
+ )
336
+ else:
337
+ prompt = prompt_factory(model=model, messages=messages)
338
+ inference_params = copy.deepcopy(optional_params)
339
+ inference_params.pop("details")
340
+ inference_params.pop("return_full_text")
341
+ data = {
342
+ "inputs": prompt,
343
+ "parameters": inference_params,
344
+ "stream": True if "stream" in optional_params and optional_params["stream"] == True else False,
345
+ }
346
+ input_text = prompt
347
+ ## LOGGING
348
+ logging_obj.pre_call(
349
+ input=input_text,
350
+ api_key=api_key,
351
+ additional_args={"complete_input_dict": data, "task": task, "headers": headers, "api_base": completion_url, "acompletion": acompletion},
352
+ )
353
+ ## COMPLETION CALL
354
+ if acompletion is True:
355
+ ### ASYNC STREAMING
356
+ if optional_params.get("stream", False):
357
+ return self.async_streaming(logging_obj=logging_obj, api_base=completion_url, data=data, headers=headers, model_response=model_response, model=model) # type: ignore
358
+ else:
359
+ ### ASYNC COMPLETION
360
+ return self.acompletion(api_base=completion_url, data=data, headers=headers, model_response=model_response, task=task, encoding=encoding, input_text=input_text, model=model, optional_params=optional_params) # type: ignore
361
+ ### SYNC STREAMING
362
+ if "stream" in optional_params and optional_params["stream"] == True:
363
+ response = requests.post(
364
+ completion_url,
365
+ headers=headers,
366
+ data=json.dumps(data),
367
+ stream=optional_params["stream"]
368
+ )
369
+ return response.iter_lines()
370
+ ### SYNC COMPLETION
371
+ else:
372
+ response = requests.post(
373
+ completion_url,
374
+ headers=headers,
375
+ data=json.dumps(data)
376
+ )
377
+
378
+ ## Some servers might return streaming responses even though stream was not set to true. (e.g. Baseten)
379
+ is_streamed = False
380
+ if response.__dict__['headers'].get("Content-Type", "") == "text/event-stream":
381
+ is_streamed = True
382
+
383
+ # iterate over the complete streamed response, and return the final answer
384
+ if is_streamed:
385
+ streamed_response = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="huggingface", logging_obj=logging_obj)
386
+ content = ""
387
+ for chunk in streamed_response:
388
+ content += chunk["choices"][0]["delta"]["content"]
389
+ completion_response: List[Dict[str, Any]] = [{"generated_text": content}]
390
+ ## LOGGING
391
+ logging_obj.post_call(
392
+ input=input_text,
393
+ api_key=api_key,
394
+ original_response=completion_response,
395
+ additional_args={"complete_input_dict": data, "task": task},
396
+ )
397
+ else:
398
+ ## LOGGING
399
+ logging_obj.post_call(
400
+ input=input_text,
401
+ api_key=api_key,
402
+ original_response=response.text,
403
+ additional_args={"complete_input_dict": data, "task": task},
404
+ )
405
+ ## RESPONSE OBJECT
406
+ try:
407
+ completion_response = response.json()
408
+ if isinstance(completion_response, dict):
409
+ completion_response = [completion_response]
410
+ except:
411
+ import traceback
412
+ raise HuggingfaceError(
413
+ message=f"Original Response received: {response.text}; Stacktrace: {traceback.format_exc()}", status_code=response.status_code
414
+ )
415
+ print_verbose(f"response: {completion_response}")
416
+ if isinstance(completion_response, dict) and "error" in completion_response:
417
+ print_verbose(f"completion error: {completion_response['error']}")
418
+ print_verbose(f"response.status_code: {response.status_code}")
419
+ raise HuggingfaceError(
420
+ message=completion_response["error"],
421
+ status_code=response.status_code,
422
+ )
423
+ return self.convert_to_model_response_object(
424
+ completion_response=completion_response,
425
+ model_response=model_response,
426
+ task=task,
427
+ optional_params=optional_params,
428
+ encoding=encoding,
429
+ input_text=input_text,
430
+ model=model
431
+ )
432
+ except HuggingfaceError as e:
433
+ exception_mapping_worked = True
434
+ raise e
435
+ except Exception as e:
436
+ if exception_mapping_worked:
437
+ raise e
438
+ else:
439
+ import traceback
440
+ raise HuggingfaceError(status_code=500, message=traceback.format_exc())
441
+
442
+ async def acompletion(self,
443
+ api_base: str,
444
+ data: dict,
445
+ headers: dict,
446
+ model_response: ModelResponse,
447
+ task: str,
448
+ encoding: Any,
449
+ input_text: str,
450
+ model: str,
451
+ optional_params: dict):
452
+ response = None
453
+ try:
454
+ async with httpx.AsyncClient() as client:
455
+ response = await client.post(url=api_base, json=data, headers=headers, timeout=None)
456
+ response_json = response.json()
457
+ if response.status_code != 200:
458
+ raise HuggingfaceError(status_code=response.status_code, message=response.text, request=response.request, response=response)
459
+
460
+ ## RESPONSE OBJECT
461
+ return self.convert_to_model_response_object(completion_response=response_json,
462
+ model_response=model_response,
463
+ task=task,
464
+ encoding=encoding,
465
+ input_text=input_text,
466
+ model=model,
467
+ optional_params=optional_params)
468
+ except Exception as e:
469
+ if isinstance(e,httpx.TimeoutException):
470
+ raise HuggingfaceError(status_code=500, message="Request Timeout Error")
471
+ elif response is not None and hasattr(response, "text"):
472
+ raise HuggingfaceError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
473
+ else:
474
+ raise HuggingfaceError(status_code=500, message=f"{str(e)}")
475
+
476
+ async def async_streaming(self,
477
+ logging_obj,
478
+ api_base: str,
479
+ data: dict,
480
+ headers: dict,
481
+ model_response: ModelResponse,
482
+ model: str):
483
+ async with httpx.AsyncClient() as client:
484
+ response = client.stream(
485
+ "POST",
486
+ url=f"{api_base}",
487
+ json=data,
488
+ headers=headers
489
+ )
490
+ async with response as r:
491
+ if r.status_code != 200:
492
+ raise HuggingfaceError(status_code=r.status_code, message="An error occurred while streaming")
493
+
494
+ streamwrapper = CustomStreamWrapper(completion_stream=r.aiter_lines(), model=model, custom_llm_provider="huggingface",logging_obj=logging_obj)
495
+ async for transformed_chunk in streamwrapper:
496
+ yield transformed_chunk
497
+
498
+ def embedding(self,
499
+ model: str,
500
+ input: list,
501
+ api_key: Optional[str] = None,
502
+ api_base: Optional[str] = None,
503
+ logging_obj=None,
504
+ model_response=None,
505
+ encoding=None,
506
+ ):
507
+ super().embedding()
508
+ headers = self.validate_environment(api_key, headers=None)
509
+ # print_verbose(f"{model}, {task}")
510
+ embed_url = ""
511
+ if "https" in model:
512
+ embed_url = model
513
+ elif api_base:
514
+ embed_url = api_base
515
+ elif "HF_API_BASE" in os.environ:
516
+ embed_url = os.getenv("HF_API_BASE", "")
517
+ elif "HUGGINGFACE_API_BASE" in os.environ:
518
+ embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
519
+ else:
520
+ embed_url = f"https://api-inference.huggingface.co/models/{model}"
521
+
522
+ if "sentence-transformers" in model:
523
+ if len(input) == 0:
524
+ raise HuggingfaceError(status_code=400, message="sentence transformers requires 2+ sentences")
525
+ data = {
526
+ "inputs": {
527
+ "source_sentence": input[0],
528
+ "sentences": [ "That is a happy dog", "That is a very happy person", "Today is a sunny day" ]
529
+ }
530
+ }
531
+ else:
532
+ data = {
533
+ "inputs": input # type: ignore
534
+ }
535
+
536
+ ## LOGGING
537
+ logging_obj.pre_call(
538
+ input=input,
539
+ api_key=api_key,
540
+ additional_args={"complete_input_dict": data},
541
+ )
542
+ ## COMPLETION CALL
543
+ response = requests.post(
544
+ embed_url, headers=headers, data=json.dumps(data)
545
+ )
546
+
547
+
548
+ ## LOGGING
549
+ logging_obj.post_call(
550
+ input=input,
551
+ api_key=api_key,
552
+ additional_args={"complete_input_dict": data},
553
+ original_response=response,
554
+ )
555
+
556
+
557
+ embeddings = response.json()
558
+
559
+ if "error" in embeddings:
560
+ raise HuggingfaceError(status_code=500, message=embeddings['error'])
561
+
562
+ output_data = []
563
+ if "similarities" in embeddings:
564
+ for idx, embedding in embeddings["similarities"]:
565
+ output_data.append(
566
+ {
567
+ "object": "embedding",
568
+ "index": idx,
569
+ "embedding": embedding # flatten list returned from hf
570
+ }
571
+ )
572
+ else:
573
+ for idx, embedding in enumerate(embeddings):
574
+ if isinstance(embedding, float):
575
+ output_data.append(
576
+ {
577
+ "object": "embedding",
578
+ "index": idx,
579
+ "embedding": embedding # flatten list returned from hf
580
+ }
581
+ )
582
+ else:
583
+ output_data.append(
584
+ {
585
+ "object": "embedding",
586
+ "index": idx,
587
+ "embedding": embedding[0][0] # flatten list returned from hf
588
+ }
589
+ )
590
+ model_response["object"] = "list"
591
+ model_response["data"] = output_data
592
+ model_response["model"] = model
593
+ input_tokens = 0
594
+ for text in input:
595
+ input_tokens+=len(encoding.encode(text))
596
+
597
+ model_response["usage"] = {
598
+ "prompt_tokens": input_tokens,
599
+ "total_tokens": input_tokens,
600
+ }
601
+ return model_response
602
+
603
+
604
+
litellm/llms/maritalk.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time, traceback
6
+ from typing import Callable, Optional, List
7
+ from litellm.utils import ModelResponse, Choices, Message, Usage
8
+ import litellm
9
+
10
+ class MaritalkError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ super().__init__(
15
+ self.message
16
+ ) # Call the base class constructor with the parameters it needs
17
+
18
+ class MaritTalkConfig():
19
+ """
20
+ The class `MaritTalkConfig` provides configuration for the MaritTalk's API interface. Here are the parameters:
21
+
22
+ - `max_tokens` (integer): Maximum number of tokens the model will generate as part of the response. Default is 1.
23
+
24
+ - `model` (string): The model used for conversation. Default is 'maritalk'.
25
+
26
+ - `do_sample` (boolean): If set to True, the API will generate a response using sampling. Default is True.
27
+
28
+ - `temperature` (number): A non-negative float controlling the randomness in generation. Lower temperatures result in less random generations. Default is 0.7.
29
+
30
+ - `top_p` (number): Selection threshold for token inclusion based on cumulative probability. Default is 0.95.
31
+
32
+ - `repetition_penalty` (number): Penalty for repetition in the generated conversation. Default is 1.
33
+
34
+ - `stopping_tokens` (list of string): List of tokens where the conversation can be stopped/stopped.
35
+ """
36
+ max_tokens: Optional[int] = None
37
+ model: Optional[str] = None
38
+ do_sample: Optional[bool] = None
39
+ temperature: Optional[float] = None
40
+ top_p: Optional[float] = None
41
+ repetition_penalty: Optional[float] = None
42
+ stopping_tokens: Optional[List[str]] = None
43
+
44
+ def __init__(self,
45
+ max_tokens: Optional[int]=None,
46
+ model: Optional[str] = None,
47
+ do_sample: Optional[bool] = None,
48
+ temperature: Optional[float] = None,
49
+ top_p: Optional[float] = None,
50
+ repetition_penalty: Optional[float] = None,
51
+ stopping_tokens: Optional[List[str]] = None) -> None:
52
+
53
+ locals_ = locals()
54
+ for key, value in locals_.items():
55
+ if key != 'self' and value is not None:
56
+ setattr(self.__class__, key, value)
57
+
58
+ @classmethod
59
+ def get_config(cls):
60
+ return {k: v for k, v in cls.__dict__.items()
61
+ if not k.startswith('__')
62
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
63
+ and v is not None}
64
+
65
+ def validate_environment(api_key):
66
+ headers = {
67
+ "accept": "application/json",
68
+ "content-type": "application/json",
69
+ }
70
+ if api_key:
71
+ headers["Authorization"] = f"Key {api_key}"
72
+ return headers
73
+
74
+ def completion(
75
+ model: str,
76
+ messages: list,
77
+ api_base: str,
78
+ model_response: ModelResponse,
79
+ print_verbose: Callable,
80
+ encoding,
81
+ api_key,
82
+ logging_obj,
83
+ optional_params=None,
84
+ litellm_params=None,
85
+ logger_fn=None,
86
+ ):
87
+ headers = validate_environment(api_key)
88
+ completion_url = api_base
89
+ model = model
90
+
91
+ ## Load Config
92
+ config=litellm.MaritTalkConfig.get_config()
93
+ for k, v in config.items():
94
+ if k not in optional_params: # completion(top_k=3) > maritalk_config(top_k=3) <- allows for dynamic variables to be passed in
95
+ optional_params[k] = v
96
+
97
+ data = {
98
+ "messages": messages,
99
+ **optional_params,
100
+ }
101
+
102
+ ## LOGGING
103
+ logging_obj.pre_call(
104
+ input=messages,
105
+ api_key=api_key,
106
+ additional_args={"complete_input_dict": data},
107
+ )
108
+ ## COMPLETION CALL
109
+ response = requests.post(
110
+ completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
111
+ )
112
+ if "stream" in optional_params and optional_params["stream"] == True:
113
+ return response.iter_lines()
114
+ else:
115
+ ## LOGGING
116
+ logging_obj.post_call(
117
+ input=messages,
118
+ api_key=api_key,
119
+ original_response=response.text,
120
+ additional_args={"complete_input_dict": data},
121
+ )
122
+ print_verbose(f"raw model_response: {response.text}")
123
+ ## RESPONSE OBJECT
124
+ completion_response = response.json()
125
+ if "error" in completion_response:
126
+ raise MaritalkError(
127
+ message=completion_response["error"],
128
+ status_code=response.status_code,
129
+ )
130
+ else:
131
+ try:
132
+ if len(completion_response["answer"]) > 0:
133
+ model_response["choices"][0]["message"]["content"] = completion_response["answer"]
134
+ except Exception as e:
135
+ raise MaritalkError(message=response.text, status_code=response.status_code)
136
+
137
+ ## CALCULATING USAGE
138
+ prompt = "".join(m["content"] for m in messages)
139
+ prompt_tokens = len(
140
+ encoding.encode(prompt)
141
+ )
142
+ completion_tokens = len(
143
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
144
+ )
145
+
146
+ model_response["created"] = int(time.time())
147
+ model_response["model"] = model
148
+ usage = Usage(
149
+ prompt_tokens=prompt_tokens,
150
+ completion_tokens=completion_tokens,
151
+ total_tokens=prompt_tokens + completion_tokens
152
+ )
153
+ model_response.usage = usage
154
+ return model_response
155
+
156
+ def embedding(
157
+ model: str,
158
+ input: list,
159
+ api_key: Optional[str] = None,
160
+ logging_obj=None,
161
+ model_response=None,
162
+ encoding=None,
163
+ ):
164
+ pass
litellm/llms/nlp_cloud.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, Usage
9
+
10
+ class NLPCloudError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ super().__init__(
15
+ self.message
16
+ ) # Call the base class constructor with the parameters it needs
17
+
18
+ class NLPCloudConfig():
19
+ """
20
+ Reference: https://docs.nlpcloud.com/#generation
21
+
22
+ - `max_length` (int): Optional. The maximum number of tokens that the generated text should contain.
23
+
24
+ - `length_no_input` (boolean): Optional. Whether `min_length` and `max_length` should not include the length of the input text.
25
+
26
+ - `end_sequence` (string): Optional. A specific token that should be the end of the generated sequence.
27
+
28
+ - `remove_end_sequence` (boolean): Optional. Whether to remove the `end_sequence` string from the result.
29
+
30
+ - `remove_input` (boolean): Optional. Whether to remove the input text from the result.
31
+
32
+ - `bad_words` (list of strings): Optional. List of tokens that are not allowed to be generated.
33
+
34
+ - `temperature` (float): Optional. Temperature sampling. It modulates the next token probabilities.
35
+
36
+ - `top_p` (float): Optional. Top P sampling. Below 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
37
+
38
+ - `top_k` (int): Optional. Top K sampling. The number of highest probability vocabulary tokens to keep for top k filtering.
39
+
40
+ - `repetition_penalty` (float): Optional. Prevents the same word from being repeated too many times.
41
+
42
+ - `num_beams` (int): Optional. Number of beams for beam search.
43
+
44
+ - `num_return_sequences` (int): Optional. The number of independently computed returned sequences.
45
+ """
46
+ max_length: Optional[int]=None
47
+ length_no_input: Optional[bool]=None
48
+ end_sequence: Optional[str]=None
49
+ remove_end_sequence: Optional[bool]=None
50
+ remove_input: Optional[bool]=None
51
+ bad_words: Optional[list]=None
52
+ temperature: Optional[float]=None
53
+ top_p: Optional[float]=None
54
+ top_k: Optional[int]=None
55
+ repetition_penalty: Optional[float]=None
56
+ num_beams: Optional[int]=None
57
+ num_return_sequences: Optional[int]=None
58
+
59
+
60
+ def __init__(self,
61
+ max_length: Optional[int]=None,
62
+ length_no_input: Optional[bool]=None,
63
+ end_sequence: Optional[str]=None,
64
+ remove_end_sequence: Optional[bool]=None,
65
+ remove_input: Optional[bool]=None,
66
+ bad_words: Optional[list]=None,
67
+ temperature: Optional[float]=None,
68
+ top_p: Optional[float]=None,
69
+ top_k: Optional[int]=None,
70
+ repetition_penalty: Optional[float]=None,
71
+ num_beams: Optional[int]=None,
72
+ num_return_sequences: Optional[int]=None) -> None:
73
+
74
+ locals_ = locals()
75
+ for key, value in locals_.items():
76
+ if key != 'self' and value is not None:
77
+ setattr(self.__class__, key, value)
78
+
79
+ @classmethod
80
+ def get_config(cls):
81
+ return {k: v for k, v in cls.__dict__.items()
82
+ if not k.startswith('__')
83
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
84
+ and v is not None}
85
+
86
+
87
+ def validate_environment(api_key):
88
+ headers = {
89
+ "accept": "application/json",
90
+ "content-type": "application/json",
91
+ }
92
+ if api_key:
93
+ headers["Authorization"] = f"Token {api_key}"
94
+ return headers
95
+
96
+ def completion(
97
+ model: str,
98
+ messages: list,
99
+ api_base: str,
100
+ model_response: ModelResponse,
101
+ print_verbose: Callable,
102
+ encoding,
103
+ api_key,
104
+ logging_obj,
105
+ optional_params=None,
106
+ litellm_params=None,
107
+ logger_fn=None,
108
+ default_max_tokens_to_sample=None,
109
+ ):
110
+ headers = validate_environment(api_key)
111
+
112
+ ## Load Config
113
+ config = litellm.NLPCloudConfig.get_config()
114
+ for k, v in config.items():
115
+ if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
116
+ optional_params[k] = v
117
+
118
+ completion_url_fragment_1 = api_base
119
+ completion_url_fragment_2 = "/generation"
120
+ model = model
121
+ text = " ".join(message["content"] for message in messages)
122
+
123
+ data = {
124
+ "text": text,
125
+ **optional_params,
126
+ }
127
+
128
+ completion_url = completion_url_fragment_1 + model + completion_url_fragment_2
129
+
130
+ ## LOGGING
131
+ logging_obj.pre_call(
132
+ input=text,
133
+ api_key=api_key,
134
+ additional_args={"complete_input_dict": data, "headers": headers, "api_base": completion_url},
135
+ )
136
+ ## COMPLETION CALL
137
+ response = requests.post(
138
+ completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
139
+ )
140
+ if "stream" in optional_params and optional_params["stream"] == True:
141
+ return clean_and_iterate_chunks(response)
142
+ else:
143
+ ## LOGGING
144
+ logging_obj.post_call(
145
+ input=text,
146
+ api_key=api_key,
147
+ original_response=response.text,
148
+ additional_args={"complete_input_dict": data},
149
+ )
150
+ print_verbose(f"raw model_response: {response.text}")
151
+ ## RESPONSE OBJECT
152
+ try:
153
+ completion_response = response.json()
154
+ except:
155
+ raise NLPCloudError(message=response.text, status_code=response.status_code)
156
+ if "error" in completion_response:
157
+ raise NLPCloudError(
158
+ message=completion_response["error"],
159
+ status_code=response.status_code,
160
+ )
161
+ else:
162
+ try:
163
+ if len(completion_response["generated_text"]) > 0:
164
+ model_response["choices"][0]["message"]["content"] = completion_response["generated_text"]
165
+ except:
166
+ raise NLPCloudError(message=json.dumps(completion_response), status_code=response.status_code)
167
+
168
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
169
+ prompt_tokens = completion_response["nb_input_tokens"]
170
+ completion_tokens = completion_response["nb_generated_tokens"]
171
+
172
+ model_response["created"] = int(time.time())
173
+ model_response["model"] = model
174
+ usage = Usage(
175
+ prompt_tokens=prompt_tokens,
176
+ completion_tokens=completion_tokens,
177
+ total_tokens=prompt_tokens + completion_tokens
178
+ )
179
+ model_response.usage = usage
180
+ return model_response
181
+
182
+
183
+ # def clean_and_iterate_chunks(response):
184
+ # def process_chunk(chunk):
185
+ # print(f"received chunk: {chunk}")
186
+ # cleaned_chunk = chunk.decode("utf-8")
187
+ # # Perform further processing based on your needs
188
+ # return cleaned_chunk
189
+
190
+ # for line in response.iter_lines():
191
+ # if line:
192
+ # yield process_chunk(line)
193
+ def clean_and_iterate_chunks(response):
194
+ buffer = b''
195
+
196
+ for chunk in response.iter_content(chunk_size=1024):
197
+ if not chunk:
198
+ break
199
+
200
+ buffer += chunk
201
+ while b'\x00' in buffer:
202
+ buffer = buffer.replace(b'\x00', b'')
203
+ yield buffer.decode('utf-8')
204
+ buffer = b''
205
+
206
+ # No more data expected, yield any remaining data in the buffer
207
+ if buffer:
208
+ yield buffer.decode('utf-8')
209
+
210
+ def embedding():
211
+ # logic for parsing in - calling - parsing out model embedding calls
212
+ pass
litellm/llms/ollama.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests, types
2
+ import json
3
+ import traceback
4
+ from typing import Optional
5
+ import litellm
6
+ import httpx
7
+
8
+ try:
9
+ from async_generator import async_generator, yield_ # optional dependency
10
+ async_generator_imported = True
11
+ except ImportError:
12
+ async_generator_imported = False # this should not throw an error, it will impact the 'import litellm' statement
13
+
14
+ class OllamaError(Exception):
15
+ def __init__(self, status_code, message):
16
+ self.status_code = status_code
17
+ self.message = message
18
+ self.request = httpx.Request(method="POST", url="http://localhost:11434")
19
+ self.response = httpx.Response(status_code=status_code, request=self.request)
20
+ super().__init__(
21
+ self.message
22
+ ) # Call the base class constructor with the parameters it needs
23
+
24
+ class OllamaConfig():
25
+ """
26
+ Reference: https://github.com/jmorganca/ollama/blob/main/docs/api.md#parameters
27
+
28
+ The class `OllamaConfig` provides the configuration for the Ollama's API interface. Below are the parameters:
29
+
30
+ - `mirostat` (int): Enable Mirostat sampling for controlling perplexity. Default is 0, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0. Example usage: mirostat 0
31
+
32
+ - `mirostat_eta` (float): Influences how quickly the algorithm responds to feedback from the generated text. A lower learning rate will result in slower adjustments, while a higher learning rate will make the algorithm more responsive. Default: 0.1. Example usage: mirostat_eta 0.1
33
+
34
+ - `mirostat_tau` (float): Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. Default: 5.0. Example usage: mirostat_tau 5.0
35
+
36
+ - `num_ctx` (int): Sets the size of the context window used to generate the next token. Default: 2048. Example usage: num_ctx 4096
37
+
38
+ - `num_gqa` (int): The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b. Example usage: num_gqa 1
39
+
40
+ - `num_gpu` (int): The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. Example usage: num_gpu 0
41
+
42
+ - `num_thread` (int): Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). Example usage: num_thread 8
43
+
44
+ - `repeat_last_n` (int): Sets how far back for the model to look back to prevent repetition. Default: 64, 0 = disabled, -1 = num_ctx. Example usage: repeat_last_n 64
45
+
46
+ - `repeat_penalty` (float): Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. Default: 1.1. Example usage: repeat_penalty 1.1
47
+
48
+ - `temperature` (float): The temperature of the model. Increasing the temperature will make the model answer more creatively. Default: 0.8. Example usage: temperature 0.7
49
+
50
+ - `stop` (string[]): Sets the stop sequences to use. Example usage: stop "AI assistant:"
51
+
52
+ - `tfs_z` (float): Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. Default: 1. Example usage: tfs_z 1
53
+
54
+ - `num_predict` (int): Maximum number of tokens to predict when generating text. Default: 128, -1 = infinite generation, -2 = fill context. Example usage: num_predict 42
55
+
56
+ - `top_k` (int): Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. Default: 40. Example usage: top_k 40
57
+
58
+ - `top_p` (float): Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. Default: 0.9. Example usage: top_p 0.9
59
+
60
+ - `system` (string): system prompt for model (overrides what is defined in the Modelfile)
61
+
62
+ - `template` (string): the full prompt or prompt template (overrides what is defined in the Modelfile)
63
+ """
64
+ mirostat: Optional[int]=None
65
+ mirostat_eta: Optional[float]=None
66
+ mirostat_tau: Optional[float]=None
67
+ num_ctx: Optional[int]=None
68
+ num_gqa: Optional[int]=None
69
+ num_thread: Optional[int]=None
70
+ repeat_last_n: Optional[int]=None
71
+ repeat_penalty: Optional[float]=None
72
+ temperature: Optional[float]=None
73
+ stop: Optional[list]=None # stop is a list based on this - https://github.com/jmorganca/ollama/pull/442
74
+ tfs_z: Optional[float]=None
75
+ num_predict: Optional[int]=None
76
+ top_k: Optional[int]=None
77
+ top_p: Optional[float]=None
78
+ system: Optional[str]=None
79
+ template: Optional[str]=None
80
+
81
+ def __init__(self,
82
+ mirostat: Optional[int]=None,
83
+ mirostat_eta: Optional[float]=None,
84
+ mirostat_tau: Optional[float]=None,
85
+ num_ctx: Optional[int]=None,
86
+ num_gqa: Optional[int]=None,
87
+ num_thread: Optional[int]=None,
88
+ repeat_last_n: Optional[int]=None,
89
+ repeat_penalty: Optional[float]=None,
90
+ temperature: Optional[float]=None,
91
+ stop: Optional[list]=None,
92
+ tfs_z: Optional[float]=None,
93
+ num_predict: Optional[int]=None,
94
+ top_k: Optional[int]=None,
95
+ top_p: Optional[float]=None,
96
+ system: Optional[str]=None,
97
+ template: Optional[str]=None) -> None:
98
+ locals_ = locals()
99
+ for key, value in locals_.items():
100
+ if key != 'self' and value is not None:
101
+ setattr(self.__class__, key, value)
102
+
103
+ @classmethod
104
+ def get_config(cls):
105
+ return {k: v for k, v in cls.__dict__.items()
106
+ if not k.startswith('__')
107
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
108
+ and v is not None}
109
+
110
+
111
+ # ollama implementation
112
+ def get_ollama_response_stream(
113
+ api_base="http://localhost:11434",
114
+ model="llama2",
115
+ prompt="Why is the sky blue?",
116
+ optional_params=None,
117
+ logging_obj=None,
118
+ ):
119
+ if api_base.endswith("/api/generate"):
120
+ url = api_base
121
+ else:
122
+ url = f"{api_base}/api/generate"
123
+
124
+ ## Load Config
125
+ config=litellm.OllamaConfig.get_config()
126
+ for k, v in config.items():
127
+ if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
128
+ optional_params[k] = v
129
+
130
+ data = {
131
+ "model": model,
132
+ "prompt": prompt,
133
+ **optional_params
134
+ }
135
+ ## LOGGING
136
+ logging_obj.pre_call(
137
+ input=None,
138
+ api_key=None,
139
+ additional_args={"api_base": url, "complete_input_dict": data},
140
+ )
141
+ session = requests.Session()
142
+
143
+ with session.post(url, json=data, stream=True) as resp:
144
+ if resp.status_code != 200:
145
+ raise OllamaError(status_code=resp.status_code, message=resp.text)
146
+ for line in resp.iter_lines():
147
+ if line:
148
+ try:
149
+ json_chunk = line.decode("utf-8")
150
+ chunks = json_chunk.split("\n")
151
+ for chunk in chunks:
152
+ if chunk.strip() != "":
153
+ j = json.loads(chunk)
154
+ if "error" in j:
155
+ completion_obj = {
156
+ "role": "assistant",
157
+ "content": "",
158
+ "error": j
159
+ }
160
+ yield completion_obj
161
+ if "response" in j:
162
+ completion_obj = {
163
+ "role": "assistant",
164
+ "content": "",
165
+ }
166
+ completion_obj["content"] = j["response"]
167
+ yield completion_obj
168
+ except Exception as e:
169
+ traceback.print_exc()
170
+ session.close()
171
+
172
+ if async_generator_imported:
173
+ # ollama implementation
174
+ @async_generator
175
+ async def async_get_ollama_response_stream(
176
+ api_base="http://localhost:11434",
177
+ model="llama2",
178
+ prompt="Why is the sky blue?",
179
+ optional_params=None,
180
+ logging_obj=None,
181
+ ):
182
+ url = f"{api_base}/api/generate"
183
+
184
+ ## Load Config
185
+ config=litellm.OllamaConfig.get_config()
186
+ for k, v in config.items():
187
+ if k not in optional_params: # completion(top_k=3) > cohere_config(top_k=3) <- allows for dynamic variables to be passed in
188
+ optional_params[k] = v
189
+
190
+ data = {
191
+ "model": model,
192
+ "prompt": prompt,
193
+ **optional_params
194
+ }
195
+ ## LOGGING
196
+ logging_obj.pre_call(
197
+ input=None,
198
+ api_key=None,
199
+ additional_args={"api_base": url, "complete_input_dict": data},
200
+ )
201
+ session = requests.Session()
202
+
203
+ with session.post(url, json=data, stream=True) as resp:
204
+ if resp.status_code != 200:
205
+ raise OllamaError(status_code=resp.status_code, message=resp.text)
206
+ for line in resp.iter_lines():
207
+ if line:
208
+ try:
209
+ json_chunk = line.decode("utf-8")
210
+ chunks = json_chunk.split("\n")
211
+ for chunk in chunks:
212
+ if chunk.strip() != "":
213
+ j = json.loads(chunk)
214
+ if "error" in j:
215
+ completion_obj = {
216
+ "role": "assistant",
217
+ "content": "",
218
+ "error": j
219
+ }
220
+ await yield_({"choices": [{"delta": completion_obj}]})
221
+ if "response" in j:
222
+ completion_obj = {
223
+ "role": "assistant",
224
+ "content": "",
225
+ }
226
+ completion_obj["content"] = j["response"]
227
+ await yield_({"choices": [{"delta": completion_obj}]})
228
+ except Exception as e:
229
+ import logging
230
+ logging.debug(f"Error decoding JSON: {e}")
231
+ session.close()
litellm/llms/oobabooga.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ from litellm.utils import ModelResponse, Usage
8
+ from .prompt_templates.factory import prompt_factory, custom_prompt
9
+
10
+ class OobaboogaError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ super().__init__(
15
+ self.message
16
+ ) # Call the base class constructor with the parameters it needs
17
+
18
+ def validate_environment(api_key):
19
+ headers = {
20
+ "accept": "application/json",
21
+ "content-type": "application/json",
22
+ }
23
+ if api_key:
24
+ headers["Authorization"] = f"Token {api_key}"
25
+ return headers
26
+
27
+ def completion(
28
+ model: str,
29
+ messages: list,
30
+ api_base: Optional[str],
31
+ model_response: ModelResponse,
32
+ print_verbose: Callable,
33
+ encoding,
34
+ api_key,
35
+ logging_obj,
36
+ custom_prompt_dict={},
37
+ optional_params=None,
38
+ litellm_params=None,
39
+ logger_fn=None,
40
+ default_max_tokens_to_sample=None,
41
+ ):
42
+ headers = validate_environment(api_key)
43
+ if "https" in model:
44
+ completion_url = model
45
+ elif api_base:
46
+ completion_url = api_base
47
+ else:
48
+ raise OobaboogaError(status_code=404, message="API Base not set. Set one via completion(..,api_base='your-api-url')")
49
+ model = model
50
+ if model in custom_prompt_dict:
51
+ # check if the model has a registered custom prompt
52
+ model_prompt_details = custom_prompt_dict[model]
53
+ prompt = custom_prompt(
54
+ role_dict=model_prompt_details["roles"],
55
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
56
+ final_prompt_value=model_prompt_details["final_prompt_value"],
57
+ messages=messages
58
+ )
59
+ else:
60
+ prompt = prompt_factory(model=model, messages=messages)
61
+
62
+ completion_url = completion_url + "/api/v1/generate"
63
+ data = {
64
+ "prompt": prompt,
65
+ **optional_params,
66
+ }
67
+ ## LOGGING
68
+ logging_obj.pre_call(
69
+ input=prompt,
70
+ api_key=api_key,
71
+ additional_args={"complete_input_dict": data},
72
+ )
73
+ ## COMPLETION CALL
74
+ response = requests.post(
75
+ completion_url, headers=headers, data=json.dumps(data), stream=optional_params["stream"] if "stream" in optional_params else False
76
+ )
77
+ if "stream" in optional_params and optional_params["stream"] == True:
78
+ return response.iter_lines()
79
+ else:
80
+ ## LOGGING
81
+ logging_obj.post_call(
82
+ input=prompt,
83
+ api_key=api_key,
84
+ original_response=response.text,
85
+ additional_args={"complete_input_dict": data},
86
+ )
87
+ print_verbose(f"raw model_response: {response.text}")
88
+ ## RESPONSE OBJECT
89
+ try:
90
+ completion_response = response.json()
91
+ except:
92
+ raise OobaboogaError(message=response.text, status_code=response.status_code)
93
+ if "error" in completion_response:
94
+ raise OobaboogaError(
95
+ message=completion_response["error"],
96
+ status_code=response.status_code,
97
+ )
98
+ else:
99
+ try:
100
+ model_response["choices"][0]["message"]["content"] = completion_response['results'][0]['text']
101
+ except:
102
+ raise OobaboogaError(message=json.dumps(completion_response), status_code=response.status_code)
103
+
104
+ ## CALCULATING USAGE
105
+ prompt_tokens = len(
106
+ encoding.encode(prompt)
107
+ )
108
+ completion_tokens = len(
109
+ encoding.encode(model_response["choices"][0]["message"]["content"])
110
+ )
111
+
112
+ model_response["created"] = int(time.time())
113
+ model_response["model"] = model
114
+ usage = Usage(
115
+ prompt_tokens=prompt_tokens,
116
+ completion_tokens=completion_tokens,
117
+ total_tokens=prompt_tokens + completion_tokens
118
+ )
119
+ model_response.usage = usage
120
+ return model_response
121
+
122
+ def embedding():
123
+ # logic for parsing in - calling - parsing out model embedding calls
124
+ pass
litellm/llms/openai.py ADDED
@@ -0,0 +1,590 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional, Union, Any
2
+ import types, time, json
3
+ import httpx
4
+ from .base import BaseLLM
5
+ from litellm.utils import ModelResponse, Choices, Message, CustomStreamWrapper, convert_to_model_response_object, Usage
6
+ from typing import Callable, Optional
7
+ import aiohttp, requests
8
+ import litellm
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+ from openai import OpenAI, AsyncOpenAI
11
+
12
+ class OpenAIError(Exception):
13
+ def __init__(self, status_code, message, request: Optional[httpx.Request]=None, response: Optional[httpx.Response]=None):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ if request:
17
+ self.request = request
18
+ else:
19
+ self.request = httpx.Request(method="POST", url="https://api.openai.com/v1")
20
+ if response:
21
+ self.response = response
22
+ else:
23
+ self.response = httpx.Response(status_code=status_code, request=self.request)
24
+ super().__init__(
25
+ self.message
26
+ ) # Call the base class constructor with the parameters it needs
27
+
28
+
29
+ class OpenAIConfig():
30
+ """
31
+ Reference: https://platform.openai.com/docs/api-reference/chat/create
32
+
33
+ The class `OpenAIConfig` provides configuration for the OpenAI's Chat API interface. Below are the parameters:
34
+
35
+ - `frequency_penalty` (number or null): Defaults to 0. Allows a value between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, thereby minimizing repetition.
36
+
37
+ - `function_call` (string or object): This optional parameter controls how the model calls functions.
38
+
39
+ - `functions` (array): An optional parameter. It is a list of functions for which the model may generate JSON inputs.
40
+
41
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
42
+
43
+ - `max_tokens` (integer or null): This optional parameter helps to set the maximum number of tokens to generate in the chat completion.
44
+
45
+ - `n` (integer or null): This optional parameter helps to set how many chat completion choices to generate for each input message.
46
+
47
+ - `presence_penalty` (number or null): Defaults to 0. It penalizes new tokens based on if they appear in the text so far, hence increasing the model's likelihood to talk about new topics.
48
+
49
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
50
+
51
+ - `temperature` (number or null): Defines the sampling temperature to use, varying between 0 and 2.
52
+
53
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
54
+ """
55
+ frequency_penalty: Optional[int]=None
56
+ function_call: Optional[Union[str, dict]]=None
57
+ functions: Optional[list]=None
58
+ logit_bias: Optional[dict]=None
59
+ max_tokens: Optional[int]=None
60
+ n: Optional[int]=None
61
+ presence_penalty: Optional[int]=None
62
+ stop: Optional[Union[str, list]]=None
63
+ temperature: Optional[int]=None
64
+ top_p: Optional[int]=None
65
+
66
+ def __init__(self,
67
+ frequency_penalty: Optional[int]=None,
68
+ function_call: Optional[Union[str, dict]]=None,
69
+ functions: Optional[list]=None,
70
+ logit_bias: Optional[dict]=None,
71
+ max_tokens: Optional[int]=None,
72
+ n: Optional[int]=None,
73
+ presence_penalty: Optional[int]=None,
74
+ stop: Optional[Union[str, list]]=None,
75
+ temperature: Optional[int]=None,
76
+ top_p: Optional[int]=None,) -> None:
77
+
78
+ locals_ = locals()
79
+ for key, value in locals_.items():
80
+ if key != 'self' and value is not None:
81
+ setattr(self.__class__, key, value)
82
+
83
+ @classmethod
84
+ def get_config(cls):
85
+ return {k: v for k, v in cls.__dict__.items()
86
+ if not k.startswith('__')
87
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
88
+ and v is not None}
89
+
90
+ class OpenAITextCompletionConfig():
91
+ """
92
+ Reference: https://platform.openai.com/docs/api-reference/completions/create
93
+
94
+ The class `OpenAITextCompletionConfig` provides configuration for the OpenAI's text completion API interface. Below are the parameters:
95
+
96
+ - `best_of` (integer or null): This optional parameter generates server-side completions and returns the one with the highest log probability per token.
97
+
98
+ - `echo` (boolean or null): This optional parameter will echo back the prompt in addition to the completion.
99
+
100
+ - `frequency_penalty` (number or null): Defaults to 0. It is a numbers from -2.0 to 2.0, where positive values decrease the model's likelihood to repeat the same line.
101
+
102
+ - `logit_bias` (map): This optional parameter modifies the likelihood of specified tokens appearing in the completion.
103
+
104
+ - `logprobs` (integer or null): This optional parameter includes the log probabilities on the most likely tokens as well as the chosen tokens.
105
+
106
+ - `max_tokens` (integer or null): This optional parameter sets the maximum number of tokens to generate in the completion.
107
+
108
+ - `n` (integer or null): This optional parameter sets how many completions to generate for each prompt.
109
+
110
+ - `presence_penalty` (number or null): Defaults to 0 and can be between -2.0 and 2.0. Positive values increase the model's likelihood to talk about new topics.
111
+
112
+ - `stop` (string / array / null): Specifies up to 4 sequences where the API will stop generating further tokens.
113
+
114
+ - `suffix` (string or null): Defines the suffix that comes after a completion of inserted text.
115
+
116
+ - `temperature` (number or null): This optional parameter defines the sampling temperature to use.
117
+
118
+ - `top_p` (number or null): An alternative to sampling with temperature, used for nucleus sampling.
119
+ """
120
+ best_of: Optional[int]=None
121
+ echo: Optional[bool]=None
122
+ frequency_penalty: Optional[int]=None
123
+ logit_bias: Optional[dict]=None
124
+ logprobs: Optional[int]=None
125
+ max_tokens: Optional[int]=None
126
+ n: Optional[int]=None
127
+ presence_penalty: Optional[int]=None
128
+ stop: Optional[Union[str, list]]=None
129
+ suffix: Optional[str]=None
130
+ temperature: Optional[float]=None
131
+ top_p: Optional[float]=None
132
+
133
+ def __init__(self,
134
+ best_of: Optional[int]=None,
135
+ echo: Optional[bool]=None,
136
+ frequency_penalty: Optional[int]=None,
137
+ logit_bias: Optional[dict]=None,
138
+ logprobs: Optional[int]=None,
139
+ max_tokens: Optional[int]=None,
140
+ n: Optional[int]=None,
141
+ presence_penalty: Optional[int]=None,
142
+ stop: Optional[Union[str, list]]=None,
143
+ suffix: Optional[str]=None,
144
+ temperature: Optional[float]=None,
145
+ top_p: Optional[float]=None) -> None:
146
+ locals_ = locals()
147
+ for key, value in locals_.items():
148
+ if key != 'self' and value is not None:
149
+ setattr(self.__class__, key, value)
150
+
151
+ @classmethod
152
+ def get_config(cls):
153
+ return {k: v for k, v in cls.__dict__.items()
154
+ if not k.startswith('__')
155
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
156
+ and v is not None}
157
+
158
+ class OpenAIChatCompletion(BaseLLM):
159
+
160
+ def __init__(self) -> None:
161
+ super().__init__()
162
+
163
+ def completion(self,
164
+ model_response: ModelResponse,
165
+ timeout: float,
166
+ model: Optional[str]=None,
167
+ messages: Optional[list]=None,
168
+ print_verbose: Optional[Callable]=None,
169
+ api_key: Optional[str]=None,
170
+ api_base: Optional[str]=None,
171
+ acompletion: bool = False,
172
+ logging_obj=None,
173
+ optional_params=None,
174
+ litellm_params=None,
175
+ logger_fn=None,
176
+ headers: Optional[dict]=None,
177
+ custom_prompt_dict: dict={},
178
+ client=None
179
+ ):
180
+ super().completion()
181
+ exception_mapping_worked = False
182
+ try:
183
+ if headers:
184
+ optional_params["extra_headers"] = headers
185
+ if model is None or messages is None:
186
+ raise OpenAIError(status_code=422, message=f"Missing model or messages")
187
+
188
+ if not isinstance(timeout, float):
189
+ raise OpenAIError(status_code=422, message=f"Timeout needs to be a float")
190
+
191
+ for _ in range(2): # if call fails due to alternating messages, retry with reformatted message
192
+ data = {
193
+ "model": model,
194
+ "messages": messages,
195
+ **optional_params
196
+ }
197
+
198
+ ## LOGGING
199
+ logging_obj.pre_call(
200
+ input=messages,
201
+ api_key=api_key,
202
+ additional_args={"headers": headers, "api_base": api_base, "acompletion": acompletion, "complete_input_dict": data},
203
+ )
204
+
205
+ try:
206
+ max_retries = data.pop("max_retries", 2)
207
+ if acompletion is True:
208
+ if optional_params.get("stream", False):
209
+ return self.async_streaming(logging_obj=logging_obj, data=data, model=model, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
210
+ else:
211
+ return self.acompletion(data=data, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
212
+ elif optional_params.get("stream", False):
213
+ return self.streaming(logging_obj=logging_obj, data=data, model=model, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries)
214
+ else:
215
+ if not isinstance(max_retries, int):
216
+ raise OpenAIError(status_code=422, message="max retries must be an int")
217
+ if client is None:
218
+ openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
219
+ else:
220
+ openai_client = client
221
+ response = openai_client.chat.completions.create(**data) # type: ignore
222
+ logging_obj.post_call(
223
+ input=None,
224
+ api_key=api_key,
225
+ original_response=response,
226
+ additional_args={"complete_input_dict": data},
227
+ )
228
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
229
+ except Exception as e:
230
+ if "Conversation roles must alternate user/assistant" in str(e) or "user and assistant roles should be alternating" in str(e):
231
+ # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
232
+ new_messages = []
233
+ for i in range(len(messages)-1):
234
+ new_messages.append(messages[i])
235
+ if messages[i]["role"] == messages[i+1]["role"]:
236
+ if messages[i]["role"] == "user":
237
+ new_messages.append({"role": "assistant", "content": ""})
238
+ else:
239
+ new_messages.append({"role": "user", "content": ""})
240
+ new_messages.append(messages[-1])
241
+ messages = new_messages
242
+ elif "Last message must have role `user`" in str(e):
243
+ new_messages = messages
244
+ new_messages.append({"role": "user", "content": ""})
245
+ messages = new_messages
246
+ else:
247
+ raise e
248
+ except OpenAIError as e:
249
+ exception_mapping_worked = True
250
+ raise e
251
+ except Exception as e:
252
+ raise e
253
+
254
+ async def acompletion(self,
255
+ data: dict,
256
+ model_response: ModelResponse,
257
+ timeout: float,
258
+ api_key: Optional[str]=None,
259
+ api_base: Optional[str]=None,
260
+ client=None,
261
+ max_retries=None,
262
+ ):
263
+ response = None
264
+ try:
265
+ if client is None:
266
+ openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
267
+ else:
268
+ openai_aclient = client
269
+ response = await openai_aclient.chat.completions.create(**data)
270
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response)
271
+ except Exception as e:
272
+ if response and hasattr(response, "text"):
273
+ raise OpenAIError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
274
+ else:
275
+ if type(e).__name__ == "ReadTimeout":
276
+ raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
277
+ else:
278
+ raise OpenAIError(status_code=500, message=f"{str(e)}")
279
+
280
+ def streaming(self,
281
+ logging_obj,
282
+ timeout: float,
283
+ data: dict,
284
+ model: str,
285
+ api_key: Optional[str]=None,
286
+ api_base: Optional[str]=None,
287
+ client = None,
288
+ max_retries=None
289
+ ):
290
+ if client is None:
291
+ openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
292
+ else:
293
+ openai_client = client
294
+ response = openai_client.chat.completions.create(**data)
295
+ streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
296
+ return streamwrapper
297
+
298
+ async def async_streaming(self,
299
+ logging_obj,
300
+ timeout: float,
301
+ data: dict,
302
+ model: str,
303
+ api_key: Optional[str]=None,
304
+ api_base: Optional[str]=None,
305
+ client=None,
306
+ max_retries=None,
307
+ ):
308
+ response = None
309
+ try:
310
+ if client is None:
311
+ openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
312
+ else:
313
+ openai_aclient = client
314
+ response = await openai_aclient.chat.completions.create(**data)
315
+ streamwrapper = CustomStreamWrapper(completion_stream=response, model=model, custom_llm_provider="openai",logging_obj=logging_obj)
316
+ async for transformed_chunk in streamwrapper:
317
+ yield transformed_chunk
318
+ except Exception as e: # need to exception handle here. async exceptions don't get caught in sync functions.
319
+ if response is not None and hasattr(response, "text"):
320
+ raise OpenAIError(status_code=500, message=f"{str(e)}\n\nOriginal Response: {response.text}")
321
+ else:
322
+ if type(e).__name__ == "ReadTimeout":
323
+ raise OpenAIError(status_code=408, message=f"{type(e).__name__}")
324
+ else:
325
+ raise OpenAIError(status_code=500, message=f"{str(e)}")
326
+ async def aembedding(
327
+ self,
328
+ data: dict,
329
+ model_response: ModelResponse,
330
+ timeout: float,
331
+ api_key: Optional[str]=None,
332
+ api_base: Optional[str]=None,
333
+ client=None,
334
+ max_retries=None,
335
+ ):
336
+ response = None
337
+ try:
338
+ if client is None:
339
+ openai_aclient = AsyncOpenAI(api_key=api_key, base_url=api_base, http_client=litellm.aclient_session, timeout=timeout, max_retries=max_retries)
340
+ else:
341
+ openai_aclient = client
342
+ response = await openai_aclient.embeddings.create(**data) # type: ignore
343
+ return response
344
+ except Exception as e:
345
+ raise e
346
+ def embedding(self,
347
+ model: str,
348
+ input: list,
349
+ timeout: float,
350
+ api_key: Optional[str] = None,
351
+ api_base: Optional[str] = None,
352
+ model_response: Optional[litellm.utils.EmbeddingResponse] = None,
353
+ logging_obj=None,
354
+ optional_params=None,
355
+ client=None,
356
+ aembedding=None,
357
+ ):
358
+ super().embedding()
359
+ exception_mapping_worked = False
360
+ try:
361
+ model = model
362
+ data = {
363
+ "model": model,
364
+ "input": input,
365
+ **optional_params
366
+ }
367
+ max_retries = data.pop("max_retries", 2)
368
+ if not isinstance(max_retries, int):
369
+ raise OpenAIError(status_code=422, message="max retries must be an int")
370
+ if aembedding == True:
371
+ response = self.aembedding(data=data, model_response=model_response, api_base=api_base, api_key=api_key, timeout=timeout, client=client, max_retries=max_retries) # type: ignore
372
+ return response
373
+ if client is None:
374
+ openai_client = OpenAI(api_key=api_key, base_url=api_base, http_client=litellm.client_session, timeout=timeout, max_retries=max_retries)
375
+ else:
376
+ openai_client = client
377
+ ## LOGGING
378
+ logging_obj.pre_call(
379
+ input=input,
380
+ api_key=api_key,
381
+ additional_args={"complete_input_dict": data, "api_base": api_base},
382
+ )
383
+
384
+ ## COMPLETION CALL
385
+ response = openai_client.embeddings.create(**data) # type: ignore
386
+ ## LOGGING
387
+ logging_obj.post_call(
388
+ input=input,
389
+ api_key=api_key,
390
+ additional_args={"complete_input_dict": data},
391
+ original_response=response,
392
+ )
393
+
394
+ return convert_to_model_response_object(response_object=json.loads(response.model_dump_json()), model_response_object=model_response, response_type="embedding") # type: ignore
395
+ except OpenAIError as e:
396
+ exception_mapping_worked = True
397
+ raise e
398
+ except Exception as e:
399
+ if exception_mapping_worked:
400
+ raise e
401
+ else:
402
+ import traceback
403
+ raise OpenAIError(status_code=500, message=traceback.format_exc())
404
+
405
+
406
+ class OpenAITextCompletion(BaseLLM):
407
+ _client_session: httpx.Client
408
+
409
+ def __init__(self) -> None:
410
+ super().__init__()
411
+ self._client_session = self.create_client_session()
412
+
413
+ def validate_environment(self, api_key):
414
+ headers = {
415
+ "content-type": "application/json",
416
+ }
417
+ if api_key:
418
+ headers["Authorization"] = f"Bearer {api_key}"
419
+ return headers
420
+
421
+ def convert_to_model_response_object(self, response_object: Optional[dict]=None, model_response_object: Optional[ModelResponse]=None):
422
+ try:
423
+ ## RESPONSE OBJECT
424
+ if response_object is None or model_response_object is None:
425
+ raise ValueError("Error in response object format")
426
+ choice_list=[]
427
+ for idx, choice in enumerate(response_object["choices"]):
428
+ message = Message(content=choice["text"], role="assistant")
429
+ choice = Choices(finish_reason=choice["finish_reason"], index=idx, message=message)
430
+ choice_list.append(choice)
431
+ model_response_object.choices = choice_list
432
+
433
+ if "usage" in response_object:
434
+ model_response_object.usage = response_object["usage"]
435
+
436
+ if "id" in response_object:
437
+ model_response_object.id = response_object["id"]
438
+
439
+ if "model" in response_object:
440
+ model_response_object.model = response_object["model"]
441
+
442
+ model_response_object._hidden_params["original_response"] = response_object # track original response, if users make a litellm.text_completion() request, we can return the original response
443
+ return model_response_object
444
+ except Exception as e:
445
+ raise e
446
+
447
+ def completion(self,
448
+ model_response: ModelResponse,
449
+ api_key: str,
450
+ model: str,
451
+ messages: list,
452
+ print_verbose: Optional[Callable]=None,
453
+ api_base: Optional[str]=None,
454
+ logging_obj=None,
455
+ acompletion: bool = False,
456
+ optional_params=None,
457
+ litellm_params=None,
458
+ logger_fn=None,
459
+ headers: Optional[dict]=None):
460
+ super().completion()
461
+ exception_mapping_worked = False
462
+ try:
463
+ if headers is None:
464
+ headers = self.validate_environment(api_key=api_key)
465
+ if model is None or messages is None:
466
+ raise OpenAIError(status_code=422, message=f"Missing model or messages")
467
+
468
+ api_base = f"{api_base}/completions"
469
+
470
+ if len(messages)>0 and "content" in messages[0] and type(messages[0]["content"]) == list:
471
+ prompt = messages[0]["content"]
472
+ else:
473
+ prompt = " ".join([message["content"] for message in messages]) # type: ignore
474
+
475
+ data = {
476
+ "model": model,
477
+ "prompt": prompt,
478
+ **optional_params
479
+ }
480
+
481
+ ## LOGGING
482
+ logging_obj.pre_call(
483
+ input=messages,
484
+ api_key=api_key,
485
+ additional_args={"headers": headers, "api_base": api_base, "complete_input_dict": data},
486
+ )
487
+ if acompletion == True:
488
+ if optional_params.get("stream", False):
489
+ return self.async_streaming(logging_obj=logging_obj, api_base=api_base, data=data, headers=headers, model_response=model_response, model=model)
490
+ else:
491
+ return self.acompletion(api_base=api_base, data=data, headers=headers, model_response=model_response, prompt=prompt, api_key=api_key, logging_obj=logging_obj, model=model) # type: ignore
492
+ elif optional_params.get("stream", False):
493
+ return self.streaming(logging_obj=logging_obj, api_base=api_base, data=data, headers=headers, model_response=model_response, model=model)
494
+ else:
495
+ response = httpx.post(
496
+ url=f"{api_base}",
497
+ json=data,
498
+ headers=headers,
499
+ )
500
+ if response.status_code != 200:
501
+ raise OpenAIError(status_code=response.status_code, message=response.text)
502
+
503
+ ## LOGGING
504
+ logging_obj.post_call(
505
+ input=prompt,
506
+ api_key=api_key,
507
+ original_response=response,
508
+ additional_args={
509
+ "headers": headers,
510
+ "api_base": api_base,
511
+ },
512
+ )
513
+
514
+ ## RESPONSE OBJECT
515
+ return self.convert_to_model_response_object(response_object=response.json(), model_response_object=model_response)
516
+ except Exception as e:
517
+ raise e
518
+
519
+ async def acompletion(self,
520
+ logging_obj,
521
+ api_base: str,
522
+ data: dict,
523
+ headers: dict,
524
+ model_response: ModelResponse,
525
+ prompt: str,
526
+ api_key: str,
527
+ model: str):
528
+ async with httpx.AsyncClient() as client:
529
+ response = await client.post(api_base, json=data, headers=headers, timeout=litellm.request_timeout)
530
+ response_json = response.json()
531
+ if response.status_code != 200:
532
+ raise OpenAIError(status_code=response.status_code, message=response.text)
533
+
534
+ ## LOGGING
535
+ logging_obj.post_call(
536
+ input=prompt,
537
+ api_key=api_key,
538
+ original_response=response,
539
+ additional_args={
540
+ "headers": headers,
541
+ "api_base": api_base,
542
+ },
543
+ )
544
+
545
+ ## RESPONSE OBJECT
546
+ return self.convert_to_model_response_object(response_object=response_json, model_response_object=model_response)
547
+
548
+ def streaming(self,
549
+ logging_obj,
550
+ api_base: str,
551
+ data: dict,
552
+ headers: dict,
553
+ model_response: ModelResponse,
554
+ model: str
555
+ ):
556
+ with httpx.stream(
557
+ url=f"{api_base}",
558
+ json=data,
559
+ headers=headers,
560
+ method="POST",
561
+ timeout=litellm.request_timeout
562
+ ) as response:
563
+ if response.status_code != 200:
564
+ raise OpenAIError(status_code=response.status_code, message=response.text)
565
+
566
+ streamwrapper = CustomStreamWrapper(completion_stream=response.iter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
567
+ for transformed_chunk in streamwrapper:
568
+ yield transformed_chunk
569
+
570
+ async def async_streaming(self,
571
+ logging_obj,
572
+ api_base: str,
573
+ data: dict,
574
+ headers: dict,
575
+ model_response: ModelResponse,
576
+ model: str):
577
+ client = httpx.AsyncClient()
578
+ async with client.stream(
579
+ url=f"{api_base}",
580
+ json=data,
581
+ headers=headers,
582
+ method="POST",
583
+ timeout=litellm.request_timeout
584
+ ) as response:
585
+ if response.status_code != 200:
586
+ raise OpenAIError(status_code=response.status_code, message=response.text)
587
+
588
+ streamwrapper = CustomStreamWrapper(completion_stream=response.aiter_lines(), model=model, custom_llm_provider="text-completion-openai",logging_obj=logging_obj)
589
+ async for transformed_chunk in streamwrapper:
590
+ yield transformed_chunk
litellm/llms/palm.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types, traceback, copy
2
+ import json
3
+ from enum import Enum
4
+ import time
5
+ from typing import Callable, Optional
6
+ from litellm.utils import ModelResponse, get_secret, Choices, Message, Usage
7
+ import litellm
8
+ import sys, httpx
9
+
10
+ class PalmError(Exception):
11
+ def __init__(self, status_code, message):
12
+ self.status_code = status_code
13
+ self.message = message
14
+ self.request = httpx.Request(method="POST", url="https://developers.generativeai.google/api/python/google/generativeai/chat")
15
+ self.response = httpx.Response(status_code=status_code, request=self.request)
16
+ super().__init__(
17
+ self.message
18
+ ) # Call the base class constructor with the parameters it needs
19
+
20
+ class PalmConfig():
21
+ """
22
+ Reference: https://developers.generativeai.google/api/python/google/generativeai/chat
23
+
24
+ The class `PalmConfig` provides configuration for the Palm's API interface. Here are the parameters:
25
+
26
+ - `context` (string): Text that should be provided to the model first, to ground the response. This could be a prompt to guide the model's responses.
27
+
28
+ - `examples` (list): Examples of what the model should generate. They are treated identically to conversation messages except that they take precedence over the history in messages if the total input size exceeds the model's input_token_limit.
29
+
30
+ - `temperature` (float): Controls the randomness of the output. Must be positive. Higher values produce a more random and varied response. A temperature of zero will be deterministic.
31
+
32
+ - `candidate_count` (int): Maximum number of generated response messages to return. This value must be between [1, 8], inclusive. Only unique candidates are returned.
33
+
34
+ - `top_k` (int): The API uses combined nucleus and top-k sampling. `top_k` sets the maximum number of tokens to sample from on each step.
35
+
36
+ - `top_p` (float): The API uses combined nucleus and top-k sampling. `top_p` configures the nucleus sampling. It sets the maximum cumulative probability of tokens to sample from.
37
+
38
+ - `max_output_tokens` (int): Sets the maximum number of tokens to be returned in the output
39
+ """
40
+ context: Optional[str]=None
41
+ examples: Optional[list]=None
42
+ temperature: Optional[float]=None
43
+ candidate_count: Optional[int]=None
44
+ top_k: Optional[int]=None
45
+ top_p: Optional[float]=None
46
+ max_output_tokens: Optional[int]=None
47
+
48
+ def __init__(self,
49
+ context: Optional[str]=None,
50
+ examples: Optional[list]=None,
51
+ temperature: Optional[float]=None,
52
+ candidate_count: Optional[int]=None,
53
+ top_k: Optional[int]=None,
54
+ top_p: Optional[float]=None,
55
+ max_output_tokens: Optional[int]=None) -> None:
56
+
57
+ locals_ = locals()
58
+ for key, value in locals_.items():
59
+ if key != 'self' and value is not None:
60
+ setattr(self.__class__, key, value)
61
+
62
+ @classmethod
63
+ def get_config(cls):
64
+ return {k: v for k, v in cls.__dict__.items()
65
+ if not k.startswith('__')
66
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
67
+ and v is not None}
68
+
69
+
70
+
71
+ def completion(
72
+ model: str,
73
+ messages: list,
74
+ model_response: ModelResponse,
75
+ print_verbose: Callable,
76
+ api_key,
77
+ encoding,
78
+ logging_obj,
79
+ optional_params=None,
80
+ litellm_params=None,
81
+ logger_fn=None,
82
+ ):
83
+ try:
84
+ import google.generativeai as palm
85
+ except:
86
+ raise Exception("Importing google.generativeai failed, please run 'pip install -q google-generativeai")
87
+ palm.configure(api_key=api_key)
88
+
89
+ model = model
90
+
91
+ ## Load Config
92
+ inference_params = copy.deepcopy(optional_params)
93
+ inference_params.pop("stream", None) # palm does not support streaming, so we handle this by fake streaming in main.py
94
+ config = litellm.PalmConfig.get_config()
95
+ for k, v in config.items():
96
+ if k not in inference_params: # completion(top_k=3) > palm_config(top_k=3) <- allows for dynamic variables to be passed in
97
+ inference_params[k] = v
98
+
99
+ prompt = ""
100
+ for message in messages:
101
+ if "role" in message:
102
+ if message["role"] == "user":
103
+ prompt += (
104
+ f"{message['content']}"
105
+ )
106
+ else:
107
+ prompt += (
108
+ f"{message['content']}"
109
+ )
110
+ else:
111
+ prompt += f"{message['content']}"
112
+
113
+ ## LOGGING
114
+ logging_obj.pre_call(
115
+ input=prompt,
116
+ api_key="",
117
+ additional_args={"complete_input_dict": {"inference_params": inference_params}},
118
+ )
119
+ ## COMPLETION CALL
120
+ try:
121
+ response = palm.generate_text(prompt=prompt, **inference_params)
122
+ except Exception as e:
123
+ raise PalmError(
124
+ message=str(e),
125
+ status_code=500,
126
+ )
127
+
128
+ ## LOGGING
129
+ logging_obj.post_call(
130
+ input=prompt,
131
+ api_key="",
132
+ original_response=response,
133
+ additional_args={"complete_input_dict": {}},
134
+ )
135
+ print_verbose(f"raw model_response: {response}")
136
+ ## RESPONSE OBJECT
137
+ completion_response = response
138
+ try:
139
+ choices_list = []
140
+ for idx, item in enumerate(completion_response.candidates):
141
+ if len(item["output"]) > 0:
142
+ message_obj = Message(content=item["output"])
143
+ else:
144
+ message_obj = Message(content=None)
145
+ choice_obj = Choices(index=idx+1, message=message_obj)
146
+ choices_list.append(choice_obj)
147
+ model_response["choices"] = choices_list
148
+ except Exception as e:
149
+ traceback.print_exc()
150
+ raise PalmError(message=traceback.format_exc(), status_code=response.status_code)
151
+
152
+ try:
153
+ completion_response = model_response["choices"][0]["message"].get("content")
154
+ except:
155
+ raise PalmError(status_code=400, message=f"No response received. Original response - {response}")
156
+
157
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
158
+ prompt_tokens = len(
159
+ encoding.encode(prompt)
160
+ )
161
+ completion_tokens = len(
162
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
163
+ )
164
+
165
+ model_response["created"] = int(time.time())
166
+ model_response["model"] = "palm/" + model
167
+ usage = Usage(
168
+ prompt_tokens=prompt_tokens,
169
+ completion_tokens=completion_tokens,
170
+ total_tokens=prompt_tokens + completion_tokens
171
+ )
172
+ model_response.usage = usage
173
+ return model_response
174
+
175
+ def embedding():
176
+ # logic for parsing in - calling - parsing out model embedding calls
177
+ pass
litellm/llms/petals.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, Usage
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+
11
+ class PetalsError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ super().__init__(
16
+ self.message
17
+ ) # Call the base class constructor with the parameters it needs
18
+
19
+ class PetalsConfig():
20
+ """
21
+ Reference: https://github.com/petals-infra/chat.petals.dev#post-apiv1generate
22
+ The `PetalsConfig` class encapsulates the configuration for the Petals API. The properties of this class are described below:
23
+
24
+ - `max_length` (integer): This represents the maximum length of the generated text (including the prefix) in tokens.
25
+
26
+ - `max_new_tokens` (integer): This represents the maximum number of newly generated tokens (excluding the prefix).
27
+
28
+ The generation parameters are compatible with `.generate()` from Hugging Face's Transformers library:
29
+
30
+ - `do_sample` (boolean, optional): If set to 0 (default), the API runs greedy generation. If set to 1, the API performs sampling using the parameters below:
31
+
32
+ - `temperature` (float, optional): This value sets the temperature for sampling.
33
+
34
+ - `top_k` (integer, optional): This value sets the limit for top-k sampling.
35
+
36
+ - `top_p` (float, optional): This value sets the limit for top-p (nucleus) sampling.
37
+
38
+ - `repetition_penalty` (float, optional): This helps apply the repetition penalty during text generation, as discussed in this paper.
39
+ """
40
+ max_length: Optional[int]=None
41
+ max_new_tokens: Optional[int]=litellm.max_tokens # petals requires max tokens to be set
42
+ do_sample: Optional[bool]=None
43
+ temperature: Optional[float]=None
44
+ top_k: Optional[int]=None
45
+ top_p: Optional[float]=None
46
+ repetition_penalty: Optional[float]=None
47
+
48
+ def __init__(self,
49
+ max_length: Optional[int]=None,
50
+ max_new_tokens: Optional[int]=litellm.max_tokens, # petals requires max tokens to be set
51
+ do_sample: Optional[bool]=None,
52
+ temperature: Optional[float]=None,
53
+ top_k: Optional[int]=None,
54
+ top_p: Optional[float]=None,
55
+ repetition_penalty: Optional[float]=None) -> None:
56
+ locals_ = locals()
57
+ for key, value in locals_.items():
58
+ if key != 'self' and value is not None:
59
+ setattr(self.__class__, key, value)
60
+
61
+ @classmethod
62
+ def get_config(cls):
63
+ return {k: v for k, v in cls.__dict__.items()
64
+ if not k.startswith('__')
65
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
66
+ and v is not None}
67
+
68
+ def completion(
69
+ model: str,
70
+ messages: list,
71
+ api_base: Optional[str],
72
+ model_response: ModelResponse,
73
+ print_verbose: Callable,
74
+ encoding,
75
+ logging_obj,
76
+ optional_params=None,
77
+ stream=False,
78
+ litellm_params=None,
79
+ logger_fn=None,
80
+ ):
81
+ ## Load Config
82
+ config = litellm.PetalsConfig.get_config()
83
+ for k, v in config.items():
84
+ if k not in optional_params: # completion(top_k=3) > petals_config(top_k=3) <- allows for dynamic variables to be passed in
85
+ optional_params[k] = v
86
+
87
+ if model in litellm.custom_prompt_dict:
88
+ # check if the model has a registered custom prompt
89
+ model_prompt_details = litellm.custom_prompt_dict[model]
90
+ prompt = custom_prompt(
91
+ role_dict=model_prompt_details["roles"],
92
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
93
+ final_prompt_value=model_prompt_details["final_prompt_value"],
94
+ messages=messages
95
+ )
96
+ else:
97
+ prompt = prompt_factory(model=model, messages=messages)
98
+
99
+ if api_base:
100
+ ## LOGGING
101
+ logging_obj.pre_call(
102
+ input=prompt,
103
+ api_key="",
104
+ additional_args={"complete_input_dict": optional_params, "api_base": api_base},
105
+ )
106
+ data = {
107
+ "model": model,
108
+ "inputs": prompt,
109
+ **optional_params
110
+ }
111
+
112
+ ## COMPLETION CALL
113
+ response = requests.post(api_base, data=data)
114
+
115
+ ## LOGGING
116
+ logging_obj.post_call(
117
+ input=prompt,
118
+ api_key="",
119
+ original_response=response.text,
120
+ additional_args={"complete_input_dict": optional_params},
121
+ )
122
+
123
+ ## RESPONSE OBJECT
124
+ try:
125
+ output_text = response.json()["outputs"]
126
+ except Exception as e:
127
+ PetalsError(status_code=response.status_code, message=str(e))
128
+
129
+ else:
130
+ try:
131
+ import torch
132
+ from transformers import AutoTokenizer
133
+ from petals import AutoDistributedModelForCausalLM # type: ignore
134
+ except:
135
+ raise Exception(
136
+ "Importing torch, transformers, petals failed\nTry pip installing petals \npip install git+https://github.com/bigscience-workshop/petals"
137
+ )
138
+
139
+ model = model
140
+
141
+ tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, add_bos_token=False)
142
+ model_obj = AutoDistributedModelForCausalLM.from_pretrained(model)
143
+
144
+ ## LOGGING
145
+ logging_obj.pre_call(
146
+ input=prompt,
147
+ api_key="",
148
+ additional_args={"complete_input_dict": optional_params},
149
+ )
150
+
151
+ ## COMPLETION CALL
152
+ inputs = tokenizer(prompt, return_tensors="pt")["input_ids"]
153
+
154
+ # optional params: max_new_tokens=1,temperature=0.9, top_p=0.6
155
+ outputs = model_obj.generate(inputs, **optional_params)
156
+
157
+ ## LOGGING
158
+ logging_obj.post_call(
159
+ input=prompt,
160
+ api_key="",
161
+ original_response=outputs,
162
+ additional_args={"complete_input_dict": optional_params},
163
+ )
164
+ ## RESPONSE OBJECT
165
+ output_text = tokenizer.decode(outputs[0])
166
+
167
+ if len(output_text) > 0:
168
+ model_response["choices"][0]["message"]["content"] = output_text
169
+
170
+ prompt_tokens = len(
171
+ encoding.encode(prompt)
172
+ )
173
+ completion_tokens = len(
174
+ encoding.encode(model_response["choices"][0]["message"].get("content"))
175
+ )
176
+
177
+ model_response["created"] = int(time.time())
178
+ model_response["model"] = model
179
+ usage = Usage(
180
+ prompt_tokens=prompt_tokens,
181
+ completion_tokens=completion_tokens,
182
+ total_tokens=prompt_tokens + completion_tokens
183
+ )
184
+ model_response.usage = usage
185
+ return model_response
186
+
187
+ def embedding():
188
+ # logic for parsing in - calling - parsing out model embedding calls
189
+ pass
litellm/llms/prompt_templates/factory.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ import requests, traceback
3
+ import json
4
+ from jinja2 import Template, exceptions, Environment, meta
5
+ from typing import Optional
6
+
7
+ def default_pt(messages):
8
+ return " ".join(message["content"] for message in messages)
9
+
10
+ # alpaca prompt template - for models like mythomax, etc.
11
+ def alpaca_pt(messages):
12
+ prompt = custom_prompt(
13
+ role_dict={
14
+ "system": {
15
+ "pre_message": "### Instruction:\n",
16
+ "post_message": "\n\n",
17
+ },
18
+ "user": {
19
+ "pre_message": "### Instruction:\n",
20
+ "post_message": "\n\n",
21
+ },
22
+ "assistant": {
23
+ "pre_message": "### Response:\n",
24
+ "post_message": "\n\n"
25
+ }
26
+ },
27
+ bos_token="<s>",
28
+ eos_token="</s>",
29
+ messages=messages
30
+ )
31
+ return prompt
32
+
33
+ # Llama2 prompt template
34
+ def llama_2_chat_pt(messages):
35
+ prompt = custom_prompt(
36
+ role_dict={
37
+ "system": {
38
+ "pre_message": "[INST] <<SYS>>\n",
39
+ "post_message": "\n<</SYS>>\n [/INST]\n"
40
+ },
41
+ "user": { # follow this format https://github.com/facebookresearch/llama/blob/77062717054710e352a99add63d160274ce670c6/llama/generation.py#L348
42
+ "pre_message": "[INST] ",
43
+ "post_message": " [/INST]\n"
44
+ },
45
+ "assistant": {
46
+ "post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
47
+ }
48
+ },
49
+ messages=messages,
50
+ bos_token="<s>",
51
+ eos_token="</s>"
52
+ )
53
+ return prompt
54
+
55
+ def ollama_pt(model, messages): # https://github.com/jmorganca/ollama/blob/af4cf55884ac54b9e637cd71dadfe9b7a5685877/docs/modelfile.md#template
56
+
57
+ if "instruct" in model:
58
+ prompt = custom_prompt(
59
+ role_dict={
60
+ "system": {
61
+ "pre_message": "### System:\n",
62
+ "post_message": "\n"
63
+ },
64
+ "user": {
65
+ "pre_message": "### User:\n",
66
+ "post_message": "\n",
67
+ },
68
+ "assistant": {
69
+ "pre_message": "### Response:\n",
70
+ "post_message": "\n",
71
+ }
72
+ },
73
+ final_prompt_value="### Response:",
74
+ messages=messages
75
+ )
76
+ else:
77
+ prompt = "".join(m["content"] for m in messages)
78
+ return prompt
79
+
80
+ def mistral_instruct_pt(messages):
81
+ prompt = custom_prompt(
82
+ initial_prompt_value="<s>",
83
+ role_dict={
84
+ "system": {
85
+ "pre_message": "[INST]",
86
+ "post_message": "[/INST]"
87
+ },
88
+ "user": {
89
+ "pre_message": "[INST]",
90
+ "post_message": "[/INST]"
91
+ },
92
+ "assistant": {
93
+ "pre_message": "[INST]",
94
+ "post_message": "[/INST]"
95
+ }
96
+ },
97
+ final_prompt_value="</s>",
98
+ messages=messages
99
+ )
100
+ return prompt
101
+
102
+ # Falcon prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
103
+ def falcon_instruct_pt(messages):
104
+ prompt = ""
105
+ for message in messages:
106
+ if message["role"] == "system":
107
+ prompt += message["content"]
108
+ else:
109
+ prompt += message['role']+":"+ message["content"].replace("\r\n", "\n").replace("\n\n", "\n")
110
+ prompt += "\n\n"
111
+
112
+ return prompt
113
+
114
+ def falcon_chat_pt(messages):
115
+ prompt = ""
116
+ for message in messages:
117
+ if message["role"] == "system":
118
+ prompt += "System: " + message["content"]
119
+ elif message["role"] == "assistant":
120
+ prompt += "Falcon: " + message["content"]
121
+ elif message["role"] == "user":
122
+ prompt += "User: " + message["content"]
123
+
124
+ return prompt
125
+
126
+ # MPT prompt template - from https://github.com/lm-sys/FastChat/blob/main/fastchat/conversation.py#L110
127
+ def mpt_chat_pt(messages):
128
+ prompt = ""
129
+ for message in messages:
130
+ if message["role"] == "system":
131
+ prompt += "<|im_start|>system" + message["content"] + "<|im_end|>" + "\n"
132
+ elif message["role"] == "assistant":
133
+ prompt += "<|im_start|>assistant" + message["content"] + "<|im_end|>" + "\n"
134
+ elif message["role"] == "user":
135
+ prompt += "<|im_start|>user" + message["content"] + "<|im_end|>" + "\n"
136
+ return prompt
137
+
138
+ # WizardCoder prompt template - https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0#prompt-format
139
+ def wizardcoder_pt(messages):
140
+ prompt = ""
141
+ for message in messages:
142
+ if message["role"] == "system":
143
+ prompt += message["content"] + "\n\n"
144
+ elif message["role"] == "user": # map to 'Instruction'
145
+ prompt += "### Instruction:\n" + message["content"] + "\n\n"
146
+ elif message["role"] == "assistant": # map to 'Response'
147
+ prompt += "### Response:\n" + message["content"] + "\n\n"
148
+ return prompt
149
+
150
+ # Phind-CodeLlama prompt template - https://huggingface.co/Phind/Phind-CodeLlama-34B-v2#how-to-prompt-the-model
151
+ def phind_codellama_pt(messages):
152
+ prompt = ""
153
+ for message in messages:
154
+ if message["role"] == "system":
155
+ prompt += "### System Prompt\n" + message["content"] + "\n\n"
156
+ elif message["role"] == "user":
157
+ prompt += "### User Message\n" + message["content"] + "\n\n"
158
+ elif message["role"] == "assistant":
159
+ prompt += "### Assistant\n" + message["content"] + "\n\n"
160
+ return prompt
161
+
162
+ def hf_chat_template(model: str, messages: list):
163
+ ## get the tokenizer config from huggingface
164
+ def _get_tokenizer_config(hf_model_name):
165
+ url = f"https://huggingface.co/{hf_model_name}/raw/main/tokenizer_config.json"
166
+ # Make a GET request to fetch the JSON data
167
+ response = requests.get(url)
168
+ if response.status_code == 200:
169
+ # Parse the JSON data
170
+ tokenizer_config = json.loads(response.content)
171
+ return {"status": "success", "tokenizer": tokenizer_config}
172
+ else:
173
+ return {"status": "failure"}
174
+ tokenizer_config = _get_tokenizer_config(model)
175
+ if tokenizer_config["status"] == "failure" or "chat_template" not in tokenizer_config["tokenizer"]:
176
+ raise Exception("No chat template found")
177
+ ## read the bos token, eos token and chat template from the json
178
+ tokenizer_config = tokenizer_config["tokenizer"]
179
+ bos_token = tokenizer_config["bos_token"]
180
+ eos_token = tokenizer_config["eos_token"]
181
+ chat_template = tokenizer_config["chat_template"]
182
+
183
+ def raise_exception(message):
184
+ raise Exception(f"Error message - {message}")
185
+
186
+ # Create a template object from the template text
187
+ env = Environment()
188
+ env.globals['raise_exception'] = raise_exception
189
+ template = env.from_string(chat_template)
190
+
191
+ def _is_system_in_template():
192
+ try:
193
+ # Try rendering the template with a system message
194
+ response = template.render(messages=[{"role": "system", "content": "test"}], eos_token= "<eos>", bos_token= "<bos>")
195
+ return True
196
+
197
+ # This will be raised if Jinja attempts to render the system message and it can't
198
+ except:
199
+ return False
200
+
201
+ try:
202
+ # Render the template with the provided values
203
+ if _is_system_in_template():
204
+ rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=messages)
205
+ else:
206
+ # treat a system message as a user message, if system not in template
207
+ try:
208
+ reformatted_messages = []
209
+ for message in messages:
210
+ if message["role"] == "system":
211
+ reformatted_messages.append({"role": "user", "content": message["content"]})
212
+ else:
213
+ reformatted_messages.append(message)
214
+ rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=reformatted_messages)
215
+ except Exception as e:
216
+ if "Conversation roles must alternate user/assistant" in str(e):
217
+ # reformat messages to ensure user/assistant are alternating, if there's either 2 consecutive 'user' messages or 2 consecutive 'assistant' message, add a blank 'user' or 'assistant' message to ensure compatibility
218
+ new_messages = []
219
+ for i in range(len(reformatted_messages)-1):
220
+ new_messages.append(reformatted_messages[i])
221
+ if reformatted_messages[i]["role"] == reformatted_messages[i+1]["role"]:
222
+ if reformatted_messages[i]["role"] == "user":
223
+ new_messages.append({"role": "assistant", "content": ""})
224
+ else:
225
+ new_messages.append({"role": "user", "content": ""})
226
+ new_messages.append(reformatted_messages[-1])
227
+ rendered_text = template.render(bos_token=bos_token, eos_token=eos_token, messages=new_messages)
228
+ return rendered_text
229
+ except:
230
+ raise Exception("Error rendering template")
231
+
232
+ # Anthropic template
233
+ def claude_2_1_pt(messages: list): # format - https://docs.anthropic.com/claude/docs/how-to-use-system-prompts
234
+ class AnthropicConstants(Enum):
235
+ HUMAN_PROMPT = "\n\nHuman: "
236
+ AI_PROMPT = "\n\nAssistant: "
237
+
238
+ prompt = ""
239
+ for idx, message in enumerate(messages): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
240
+ if message["role"] == "user":
241
+ prompt += (
242
+ f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
243
+ )
244
+ elif message["role"] == "system":
245
+ prompt += (
246
+ f"{message['content']}"
247
+ )
248
+ else:
249
+ prompt += (
250
+ f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
251
+ )
252
+ if idx == 0 and message["role"] == "assistant": # ensure the prompt always starts with `\n\nHuman: `
253
+ prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
254
+ prompt += f"{AnthropicConstants.AI_PROMPT.value}"
255
+ return prompt
256
+
257
+ def anthropic_pt(messages: list): # format - https://docs.anthropic.com/claude/reference/complete_post
258
+ class AnthropicConstants(Enum):
259
+ HUMAN_PROMPT = "\n\nHuman: "
260
+ AI_PROMPT = "\n\nAssistant: "
261
+
262
+ prompt = ""
263
+ for idx, message in enumerate(messages): # needs to start with `\n\nHuman: ` and end with `\n\nAssistant: `
264
+ if message["role"] == "user":
265
+ prompt += (
266
+ f"{AnthropicConstants.HUMAN_PROMPT.value}{message['content']}"
267
+ )
268
+ elif message["role"] == "system":
269
+ prompt += (
270
+ f"{AnthropicConstants.HUMAN_PROMPT.value}<admin>{message['content']}</admin>"
271
+ )
272
+ else:
273
+ prompt += (
274
+ f"{AnthropicConstants.AI_PROMPT.value}{message['content']}"
275
+ )
276
+ if idx == 0 and message["role"] == "assistant": # ensure the prompt always starts with `\n\nHuman: `
277
+ prompt = f"{AnthropicConstants.HUMAN_PROMPT.value}" + prompt
278
+ prompt += f"{AnthropicConstants.AI_PROMPT.value}"
279
+ return prompt
280
+
281
+ # Function call template
282
+ def function_call_prompt(messages: list, functions: list):
283
+ function_prompt = "The following functions are available to you:"
284
+ for function in functions:
285
+ function_prompt += f"""\n{function}\n"""
286
+
287
+ function_added_to_prompt = False
288
+ for message in messages:
289
+ if "system" in message["role"]:
290
+ message['content'] += f"""{function_prompt}"""
291
+ function_added_to_prompt = True
292
+
293
+ if function_added_to_prompt == False:
294
+ messages.append({'role': 'system', 'content': f"""{function_prompt}"""})
295
+
296
+ return messages
297
+
298
+
299
+ # Custom prompt template
300
+ def custom_prompt(role_dict: dict, messages: list, initial_prompt_value: str="", final_prompt_value: str="", bos_token: str="", eos_token: str=""):
301
+ prompt = bos_token + initial_prompt_value
302
+ bos_open = True
303
+ ## a bos token is at the start of a system / human message
304
+ ## an eos token is at the end of the assistant response to the message
305
+ for message in messages:
306
+ role = message["role"]
307
+
308
+ if role in ["system", "human"] and not bos_open:
309
+ prompt += bos_token
310
+ bos_open = True
311
+
312
+ pre_message_str = role_dict[role]["pre_message"] if role in role_dict and "pre_message" in role_dict[role] else ""
313
+ post_message_str = role_dict[role]["post_message"] if role in role_dict and "post_message" in role_dict[role] else ""
314
+ prompt += pre_message_str + message["content"] + post_message_str
315
+
316
+ if role == "assistant":
317
+ prompt += eos_token
318
+ bos_open = False
319
+
320
+ prompt += final_prompt_value
321
+ return prompt
322
+
323
+ def prompt_factory(model: str, messages: list, custom_llm_provider: Optional[str]=None):
324
+ original_model_name = model
325
+ model = model.lower()
326
+ if custom_llm_provider == "ollama":
327
+ return ollama_pt(model=model, messages=messages)
328
+ elif custom_llm_provider == "anthropic":
329
+ if "claude-2.1" in model:
330
+ return claude_2_1_pt(messages=messages)
331
+ else:
332
+ return anthropic_pt(messages=messages)
333
+
334
+ try:
335
+ if "meta-llama/llama-2" in model and "chat" in model:
336
+ return llama_2_chat_pt(messages=messages)
337
+ elif "tiiuae/falcon" in model: # Note: for the instruct models, it's best to use a User: .., Assistant:.. approach in your prompt template.
338
+ if model == "tiiuae/falcon-180B-chat":
339
+ return falcon_chat_pt(messages=messages)
340
+ elif "instruct" in model:
341
+ return falcon_instruct_pt(messages=messages)
342
+ elif "mosaicml/mpt" in model:
343
+ if "chat" in model:
344
+ return mpt_chat_pt(messages=messages)
345
+ elif "codellama/codellama" in model:
346
+ if "instruct" in model:
347
+ return llama_2_chat_pt(messages=messages) # https://huggingface.co/blog/codellama#conversational-instructions
348
+ elif "wizardlm/wizardcoder" in model:
349
+ return wizardcoder_pt(messages=messages)
350
+ elif "phind/phind-codellama" in model:
351
+ return phind_codellama_pt(messages=messages)
352
+ elif "togethercomputer/llama-2" in model and ("instruct" in model or "chat" in model):
353
+ return llama_2_chat_pt(messages=messages)
354
+ elif model in ["gryphe/mythomax-l2-13b", "gryphe/mythomix-l2-13b", "gryphe/mythologic-l2-13b"]:
355
+ return alpaca_pt(messages=messages)
356
+ else:
357
+ return hf_chat_template(original_model_name, messages)
358
+ except:
359
+ return default_pt(messages=messages) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2)
360
+
litellm/llms/replicate.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ import requests
4
+ import time
5
+ from typing import Callable, Optional
6
+ from litellm.utils import ModelResponse, Usage
7
+ import litellm
8
+ import httpx
9
+ from .prompt_templates.factory import prompt_factory, custom_prompt
10
+
11
+ class ReplicateError(Exception):
12
+ def __init__(self, status_code, message):
13
+ self.status_code = status_code
14
+ self.message = message
15
+ self.request = httpx.Request(method="POST", url="https://api.replicate.com/v1/deployments")
16
+ self.response = httpx.Response(status_code=status_code, request=self.request)
17
+ super().__init__(
18
+ self.message
19
+ ) # Call the base class constructor with the parameters it needs
20
+
21
+ class ReplicateConfig():
22
+ """
23
+ Reference: https://replicate.com/meta/llama-2-70b-chat/api
24
+ - `prompt` (string): The prompt to send to the model.
25
+
26
+ - `system_prompt` (string): The system prompt to send to the model. This is prepended to the prompt and helps guide system behavior. Default value: `You are a helpful assistant`.
27
+
28
+ - `max_new_tokens` (integer): Maximum number of tokens to generate. Typically, a word is made up of 2-3 tokens. Default value: `128`.
29
+
30
+ - `min_new_tokens` (integer): Minimum number of tokens to generate. To disable, set to `-1`. A word is usually 2-3 tokens. Default value: `-1`.
31
+
32
+ - `temperature` (number): Adjusts the randomness of outputs. Values greater than 1 increase randomness, 0 is deterministic, and 0.75 is a reasonable starting value. Default value: `0.75`.
33
+
34
+ - `top_p` (number): During text decoding, it samples from the top `p` percentage of most likely tokens. Reduce this to ignore less probable tokens. Default value: `0.9`.
35
+
36
+ - `top_k` (integer): During text decoding, samples from the top `k` most likely tokens. Reduce this to ignore less probable tokens. Default value: `50`.
37
+
38
+ - `stop_sequences` (string): A comma-separated list of sequences to stop generation at. For example, inputting '<end>,<stop>' will cease generation at the first occurrence of either 'end' or '<stop>'.
39
+
40
+ - `seed` (integer): This is the seed for the random generator. Leave it blank to randomize the seed.
41
+
42
+ - `debug` (boolean): If set to `True`, it provides debugging output in logs.
43
+
44
+ Please note that Replicate's mapping of these parameters can be inconsistent across different models, indicating that not all of these parameters may be available for use with all models.
45
+ """
46
+ system_prompt: Optional[str]=None
47
+ max_new_tokens: Optional[int]=None
48
+ min_new_tokens: Optional[int]=None
49
+ temperature: Optional[int]=None
50
+ top_p: Optional[int]=None
51
+ top_k: Optional[int]=None
52
+ stop_sequences: Optional[str]=None
53
+ seed: Optional[int]=None
54
+ debug: Optional[bool]=None
55
+
56
+ def __init__(self,
57
+ system_prompt: Optional[str]=None,
58
+ max_new_tokens: Optional[int]=None,
59
+ min_new_tokens: Optional[int]=None,
60
+ temperature: Optional[int]=None,
61
+ top_p: Optional[int]=None,
62
+ top_k: Optional[int]=None,
63
+ stop_sequences: Optional[str]=None,
64
+ seed: Optional[int]=None,
65
+ debug: Optional[bool]=None) -> None:
66
+ locals_ = locals()
67
+ for key, value in locals_.items():
68
+ if key != 'self' and value is not None:
69
+ setattr(self.__class__, key, value)
70
+
71
+ @classmethod
72
+ def get_config(cls):
73
+ return {k: v for k, v in cls.__dict__.items()
74
+ if not k.startswith('__')
75
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
76
+ and v is not None}
77
+
78
+
79
+
80
+ # Function to start a prediction and get the prediction URL
81
+ def start_prediction(version_id, input_data, api_token, api_base, logging_obj, print_verbose):
82
+ base_url = api_base
83
+ if "deployments" in version_id:
84
+ print_verbose("\nLiteLLM: Request to custom replicate deployment")
85
+ version_id = version_id.replace("deployments/", "")
86
+ base_url = f"https://api.replicate.com/v1/deployments/{version_id}"
87
+ print_verbose(f"Deployment base URL: {base_url}\n")
88
+
89
+ headers = {
90
+ "Authorization": f"Token {api_token}",
91
+ "Content-Type": "application/json"
92
+ }
93
+
94
+ initial_prediction_data = {
95
+ "version": version_id,
96
+ "input": input_data,
97
+ }
98
+
99
+ ## LOGGING
100
+ logging_obj.pre_call(
101
+ input=input_data["prompt"],
102
+ api_key="",
103
+ additional_args={"complete_input_dict": initial_prediction_data, "headers": headers, "api_base": base_url},
104
+ )
105
+
106
+ response = requests.post(f"{base_url}/predictions", json=initial_prediction_data, headers=headers)
107
+ if response.status_code == 201:
108
+ response_data = response.json()
109
+ return response_data.get("urls", {}).get("get")
110
+ else:
111
+ raise ReplicateError(response.status_code, f"Failed to start prediction {response.text}")
112
+
113
+ # Function to handle prediction response (non-streaming)
114
+ def handle_prediction_response(prediction_url, api_token, print_verbose):
115
+ output_string = ""
116
+ headers = {
117
+ "Authorization": f"Token {api_token}",
118
+ "Content-Type": "application/json"
119
+ }
120
+
121
+ status = ""
122
+ logs = ""
123
+ while True and (status not in ["succeeded", "failed", "canceled"]):
124
+ print_verbose(f"replicate: polling endpoint: {prediction_url}")
125
+ time.sleep(0.5)
126
+ response = requests.get(prediction_url, headers=headers)
127
+ if response.status_code == 200:
128
+ response_data = response.json()
129
+ if "output" in response_data:
130
+ output_string = "".join(response_data['output'])
131
+ print_verbose(f"Non-streamed output:{output_string}")
132
+ status = response_data.get('status', None)
133
+ logs = response_data.get("logs", "")
134
+ if status == "failed":
135
+ replicate_error = response_data.get("error", "")
136
+ raise ReplicateError(status_code=400, message=f"Error: {replicate_error}, \nReplicate logs:{logs}")
137
+ else:
138
+ # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
139
+ print_verbose("Replicate: Failed to fetch prediction status and output.")
140
+ return output_string, logs
141
+
142
+ # Function to handle prediction response (streaming)
143
+ def handle_prediction_response_streaming(prediction_url, api_token, print_verbose):
144
+ previous_output = ""
145
+ output_string = ""
146
+
147
+ headers = {
148
+ "Authorization": f"Token {api_token}",
149
+ "Content-Type": "application/json"
150
+ }
151
+ status = ""
152
+ while True and (status not in ["succeeded", "failed", "canceled"]):
153
+ time.sleep(0.5) # prevent being rate limited by replicate
154
+ print_verbose(f"replicate: polling endpoint: {prediction_url}")
155
+ response = requests.get(prediction_url, headers=headers)
156
+ if response.status_code == 200:
157
+ response_data = response.json()
158
+ status = response_data['status']
159
+ if "output" in response_data:
160
+ output_string = "".join(response_data['output'])
161
+ new_output = output_string[len(previous_output):]
162
+ print_verbose(f"New chunk: {new_output}")
163
+ yield {"output": new_output, "status": status}
164
+ previous_output = output_string
165
+ status = response_data['status']
166
+ if status == "failed":
167
+ replicate_error = response_data.get("error", "")
168
+ raise ReplicateError(status_code=400, message=f"Error: {replicate_error}")
169
+ else:
170
+ # this can fail temporarily but it does not mean the replicate request failed, replicate request fails when status=="failed"
171
+ print_verbose(f"Replicate: Failed to fetch prediction status and output.{response.status_code}{response.text}")
172
+
173
+
174
+ # Function to extract version ID from model string
175
+ def model_to_version_id(model):
176
+ if ":" in model:
177
+ split_model = model.split(":")
178
+ return split_model[1]
179
+ return model
180
+
181
+ # Main function for prediction completion
182
+ def completion(
183
+ model: str,
184
+ messages: list,
185
+ api_base: str,
186
+ model_response: ModelResponse,
187
+ print_verbose: Callable,
188
+ logging_obj,
189
+ api_key,
190
+ encoding,
191
+ custom_prompt_dict={},
192
+ optional_params=None,
193
+ litellm_params=None,
194
+ logger_fn=None,
195
+ ):
196
+ # Start a prediction and get the prediction URL
197
+ version_id = model_to_version_id(model)
198
+ ## Load Config
199
+ config = litellm.ReplicateConfig.get_config()
200
+ for k, v in config.items():
201
+ if k not in optional_params: # completion(top_k=3) > replicate_config(top_k=3) <- allows for dynamic variables to be passed in
202
+ optional_params[k] = v
203
+
204
+ system_prompt = None
205
+ if optional_params is not None and "supports_system_prompt" in optional_params:
206
+ supports_sys_prompt = optional_params.pop("supports_system_prompt")
207
+ else:
208
+ supports_sys_prompt = False
209
+
210
+ if supports_sys_prompt:
211
+ for i in range(len(messages)):
212
+ if messages[i]["role"] == "system":
213
+ first_sys_message = messages.pop(i)
214
+ system_prompt = first_sys_message["content"]
215
+ break
216
+
217
+ if model in custom_prompt_dict:
218
+ # check if the model has a registered custom prompt
219
+ model_prompt_details = custom_prompt_dict[model]
220
+ prompt = custom_prompt(
221
+ role_dict=model_prompt_details.get("roles", {}),
222
+ initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
223
+ final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
224
+ bos_token=model_prompt_details.get("bos_token", ""),
225
+ eos_token=model_prompt_details.get("eos_token", ""),
226
+ messages=messages,
227
+ )
228
+ else:
229
+ prompt = prompt_factory(model=model, messages=messages)
230
+
231
+ # If system prompt is supported, and a system prompt is provided, use it
232
+ if system_prompt is not None:
233
+ input_data = {
234
+ "prompt": prompt,
235
+ "system_prompt": system_prompt
236
+ }
237
+ # Otherwise, use the prompt as is
238
+ else:
239
+ input_data = {
240
+ "prompt": prompt,
241
+ **optional_params
242
+ }
243
+
244
+
245
+ ## COMPLETION CALL
246
+ ## Replicate Compeltion calls have 2 steps
247
+ ## Step1: Start Prediction: gets a prediction url
248
+ ## Step2: Poll prediction url for response
249
+ ## Step2: is handled with and without streaming
250
+ model_response["created"] = int(time.time()) # for pricing this must remain right before calling api
251
+ prediction_url = start_prediction(version_id, input_data, api_key, api_base, logging_obj=logging_obj, print_verbose=print_verbose)
252
+ print_verbose(prediction_url)
253
+
254
+ # Handle the prediction response (streaming or non-streaming)
255
+ if "stream" in optional_params and optional_params["stream"] == True:
256
+ print_verbose("streaming request")
257
+ return handle_prediction_response_streaming(prediction_url, api_key, print_verbose)
258
+ else:
259
+ result, logs = handle_prediction_response(prediction_url, api_key, print_verbose)
260
+ model_response["ended"] = time.time() # for pricing this must remain right after calling api
261
+ ## LOGGING
262
+ logging_obj.post_call(
263
+ input=prompt,
264
+ api_key="",
265
+ original_response=result,
266
+ additional_args={"complete_input_dict": input_data,"logs": logs, "api_base": prediction_url, },
267
+ )
268
+
269
+ print_verbose(f"raw model_response: {result}")
270
+
271
+ if len(result) == 0: # edge case, where result from replicate is empty
272
+ result = " "
273
+
274
+ ## Building RESPONSE OBJECT
275
+ if len(result) > 1:
276
+ model_response["choices"][0]["message"]["content"] = result
277
+
278
+ # Calculate usage
279
+ prompt_tokens = len(encoding.encode(prompt))
280
+ completion_tokens = len(encoding.encode(model_response["choices"][0]["message"].get("content", "")))
281
+ model_response["model"] = "replicate/" + model
282
+ usage = Usage(
283
+ prompt_tokens=prompt_tokens,
284
+ completion_tokens=completion_tokens,
285
+ total_tokens=prompt_tokens + completion_tokens
286
+ )
287
+ model_response.usage = usage
288
+ return model_response
289
+
290
+
291
+ # # Example usage:
292
+ # response = completion(
293
+ # api_key="",
294
+ # messages=[{"content": "good morning"}],
295
+ # model="replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
296
+ # model_response=ModelResponse(),
297
+ # print_verbose=print,
298
+ # logging_obj=print, # stub logging_obj
299
+ # optional_params={"stream": False}
300
+ # )
301
+
302
+ # print(response)
litellm/llms/sagemaker.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ from enum import Enum
3
+ import json
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ from litellm.utils import ModelResponse, get_secret, Usage
9
+ import sys
10
+ from copy import deepcopy
11
+ import httpx
12
+
13
+ class SagemakerError(Exception):
14
+ def __init__(self, status_code, message):
15
+ self.status_code = status_code
16
+ self.message = message
17
+ self.request = httpx.Request(method="POST", url="https://us-west-2.console.aws.amazon.com/sagemaker")
18
+ self.response = httpx.Response(status_code=status_code, request=self.request)
19
+ super().__init__(
20
+ self.message
21
+ ) # Call the base class constructor with the parameters it needs
22
+
23
+ class SagemakerConfig():
24
+ """
25
+ Reference: https://d-uuwbxj1u4cnu.studio.us-west-2.sagemaker.aws/jupyter/default/lab/workspaces/auto-q/tree/DemoNotebooks/meta-textgeneration-llama-2-7b-SDK_1.ipynb
26
+ """
27
+ max_new_tokens: Optional[int]=None
28
+ top_p: Optional[float]=None
29
+ temperature: Optional[float]=None
30
+ return_full_text: Optional[bool]=None
31
+
32
+ def __init__(self,
33
+ max_new_tokens: Optional[int]=None,
34
+ top_p: Optional[float]=None,
35
+ temperature: Optional[float]=None,
36
+ return_full_text: Optional[bool]=None) -> None:
37
+ locals_ = locals()
38
+ for key, value in locals_.items():
39
+ if key != 'self' and value is not None:
40
+ setattr(self.__class__, key, value)
41
+
42
+ @classmethod
43
+ def get_config(cls):
44
+ return {k: v for k, v in cls.__dict__.items()
45
+ if not k.startswith('__')
46
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
47
+ and v is not None}
48
+
49
+ """
50
+ SAGEMAKER AUTH Keys/Vars
51
+ os.environ['AWS_ACCESS_KEY_ID'] = ""
52
+ os.environ['AWS_SECRET_ACCESS_KEY'] = ""
53
+ """
54
+
55
+ # set os.environ['AWS_REGION_NAME'] = <your-region_name>
56
+
57
+ def completion(
58
+ model: str,
59
+ messages: list,
60
+ model_response: ModelResponse,
61
+ print_verbose: Callable,
62
+ encoding,
63
+ logging_obj,
64
+ optional_params=None,
65
+ litellm_params=None,
66
+ logger_fn=None,
67
+ ):
68
+ import boto3
69
+
70
+ # pop aws_secret_access_key, aws_access_key_id, aws_region_name from kwargs, since completion calls fail with them
71
+ aws_secret_access_key = optional_params.pop("aws_secret_access_key", None)
72
+ aws_access_key_id = optional_params.pop("aws_access_key_id", None)
73
+ aws_region_name = optional_params.pop("aws_region_name", None)
74
+
75
+ if aws_access_key_id != None:
76
+ # uses auth params passed to completion
77
+ # aws_access_key_id is not None, assume user is trying to auth using litellm.completion
78
+ client = boto3.client(
79
+ service_name="sagemaker-runtime",
80
+ aws_access_key_id=aws_access_key_id,
81
+ aws_secret_access_key=aws_secret_access_key,
82
+ region_name=aws_region_name,
83
+ )
84
+ else:
85
+ # aws_access_key_id is None, assume user is trying to auth using env variables
86
+ # boto3 automaticaly reads env variables
87
+
88
+ # we need to read region name from env
89
+ # I assume majority of users use .env for auth
90
+ region_name = (
91
+ get_secret("AWS_REGION_NAME") or
92
+ "us-west-2" # default to us-west-2 if user not specified
93
+ )
94
+ client = boto3.client(
95
+ service_name="sagemaker-runtime",
96
+ region_name=region_name,
97
+ )
98
+
99
+ # pop streaming if it's in the optional params as 'stream' raises an error with sagemaker
100
+ inference_params = deepcopy(optional_params)
101
+ inference_params.pop("stream", None)
102
+
103
+ ## Load Config
104
+ config = litellm.SagemakerConfig.get_config()
105
+ for k, v in config.items():
106
+ if k not in inference_params: # completion(top_k=3) > sagemaker_config(top_k=3) <- allows for dynamic variables to be passed in
107
+ inference_params[k] = v
108
+
109
+ model = model
110
+ prompt = ""
111
+ for message in messages:
112
+ if "role" in message:
113
+ if message["role"] == "user":
114
+ prompt += (
115
+ f"{message['content']}"
116
+ )
117
+ else:
118
+ prompt += (
119
+ f"{message['content']}"
120
+ )
121
+ else:
122
+ prompt += f"{message['content']}"
123
+
124
+ data = json.dumps({
125
+ "inputs": prompt,
126
+ "parameters": inference_params
127
+ }).encode('utf-8')
128
+
129
+ ## LOGGING
130
+ request_str = f"""
131
+ response = client.invoke_endpoint(
132
+ EndpointName={model},
133
+ ContentType="application/json",
134
+ Body={data},
135
+ CustomAttributes="accept_eula=true",
136
+ )
137
+ """ # type: ignore
138
+ logging_obj.pre_call(
139
+ input=prompt,
140
+ api_key="",
141
+ additional_args={"complete_input_dict": data, "request_str": request_str},
142
+ )
143
+ ## COMPLETION CALL
144
+ response = client.invoke_endpoint(
145
+ EndpointName=model,
146
+ ContentType="application/json",
147
+ Body=data,
148
+ CustomAttributes="accept_eula=true",
149
+ )
150
+ response = response["Body"].read().decode("utf8")
151
+ ## LOGGING
152
+ logging_obj.post_call(
153
+ input=prompt,
154
+ api_key="",
155
+ original_response=response,
156
+ additional_args={"complete_input_dict": data},
157
+ )
158
+ print_verbose(f"raw model_response: {response}")
159
+ ## RESPONSE OBJECT
160
+ completion_response = json.loads(response)
161
+ try:
162
+ completion_response_choices = completion_response[0]
163
+ if "generation" in completion_response_choices:
164
+ model_response["choices"][0]["message"]["content"] = completion_response_choices["generation"]
165
+ elif "generated_text" in completion_response_choices:
166
+ model_response["choices"][0]["message"]["content"] = completion_response_choices["generated_text"]
167
+ except:
168
+ raise SagemakerError(message=f"LiteLLM Error: Unable to parse sagemaker RAW RESPONSE {json.dumps(completion_response)}", status_code=500)
169
+
170
+ ## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
171
+ prompt_tokens = len(
172
+ encoding.encode(prompt)
173
+ )
174
+ completion_tokens = len(
175
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
176
+ )
177
+
178
+ model_response["created"] = int(time.time())
179
+ model_response["model"] = model
180
+ usage = Usage(
181
+ prompt_tokens=prompt_tokens,
182
+ completion_tokens=completion_tokens,
183
+ total_tokens=prompt_tokens + completion_tokens
184
+ )
185
+ model_response.usage = usage
186
+ return model_response
187
+
188
+ def embedding():
189
+ # logic for parsing in - calling - parsing out model embedding calls
190
+ pass
litellm/llms/together_ai.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, types
2
+ import json
3
+ from enum import Enum
4
+ import requests
5
+ import time
6
+ from typing import Callable, Optional
7
+ import litellm
8
+ import httpx
9
+ from litellm.utils import ModelResponse, Usage
10
+ from .prompt_templates.factory import prompt_factory, custom_prompt
11
+
12
+ class TogetherAIError(Exception):
13
+ def __init__(self, status_code, message):
14
+ self.status_code = status_code
15
+ self.message = message
16
+ self.request = httpx.Request(method="POST", url="https://api.together.xyz/inference")
17
+ self.response = httpx.Response(status_code=status_code, request=self.request)
18
+ super().__init__(
19
+ self.message
20
+ ) # Call the base class constructor with the parameters it needs
21
+
22
+ class TogetherAIConfig():
23
+ """
24
+ Reference: https://docs.together.ai/reference/inference
25
+
26
+ The class `TogetherAIConfig` provides configuration for the TogetherAI's API interface. Here are the parameters:
27
+
28
+ - `max_tokens` (int32, required): The maximum number of tokens to generate.
29
+
30
+ - `stop` (string, optional): A string sequence that will truncate (stop) the inference text output. For example, "\n\n" will stop generation as soon as the model generates two newlines.
31
+
32
+ - `temperature` (float, optional): A decimal number that determines the degree of randomness in the response. A value of 1 will always yield the same output. A temperature less than 1 favors more correctness and is appropriate for question answering or summarization. A value greater than 1 introduces more randomness in the output.
33
+
34
+ - `top_p` (float, optional): The `top_p` (nucleus) parameter is used to dynamically adjust the number of choices for each predicted token based on the cumulative probabilities. It specifies a probability threshold, below which all less likely tokens are filtered out. This technique helps to maintain diversity and generate more fluent and natural-sounding text.
35
+
36
+ - `top_k` (int32, optional): The `top_k` parameter is used to limit the number of choices for the next predicted word or token. It specifies the maximum number of tokens to consider at each step, based on their probability of occurrence. This technique helps to speed up the generation process and can improve the quality of the generated text by focusing on the most likely options.
37
+
38
+ - `repetition_penalty` (float, optional): A number that controls the diversity of generated text by reducing the likelihood of repeated sequences. Higher values decrease repetition.
39
+
40
+ - `logprobs` (int32, optional): This parameter is not described in the prompt.
41
+ """
42
+ max_tokens: Optional[int]=None
43
+ stop: Optional[str]=None
44
+ temperature:Optional[int]=None
45
+ top_p: Optional[float]=None
46
+ top_k: Optional[int]=None
47
+ repetition_penalty: Optional[float]=None
48
+ logprobs: Optional[int]=None
49
+
50
+ def __init__(self,
51
+ max_tokens: Optional[int]=None,
52
+ stop: Optional[str]=None,
53
+ temperature:Optional[int]=None,
54
+ top_p: Optional[float]=None,
55
+ top_k: Optional[int]=None,
56
+ repetition_penalty: Optional[float]=None,
57
+ logprobs: Optional[int]=None) -> None:
58
+ locals_ = locals()
59
+ for key, value in locals_.items():
60
+ if key != 'self' and value is not None:
61
+ setattr(self.__class__, key, value)
62
+
63
+ @classmethod
64
+ def get_config(cls):
65
+ return {k: v for k, v in cls.__dict__.items()
66
+ if not k.startswith('__')
67
+ and not isinstance(v, (types.FunctionType, types.BuiltinFunctionType, classmethod, staticmethod))
68
+ and v is not None}
69
+
70
+
71
+ def validate_environment(api_key):
72
+ if api_key is None:
73
+ raise ValueError(
74
+ "Missing TogetherAI API Key - A call is being made to together_ai but no key is set either in the environment variables or via params"
75
+ )
76
+ headers = {
77
+ "accept": "application/json",
78
+ "content-type": "application/json",
79
+ "Authorization": "Bearer " + api_key,
80
+ }
81
+ return headers
82
+
83
+ def completion(
84
+ model: str,
85
+ messages: list,
86
+ api_base: str,
87
+ model_response: ModelResponse,
88
+ print_verbose: Callable,
89
+ encoding,
90
+ api_key,
91
+ logging_obj,
92
+ custom_prompt_dict={},
93
+ optional_params=None,
94
+ litellm_params=None,
95
+ logger_fn=None,
96
+ ):
97
+ headers = validate_environment(api_key)
98
+
99
+ ## Load Config
100
+ config = litellm.TogetherAIConfig.get_config()
101
+ for k, v in config.items():
102
+ if k not in optional_params: # completion(top_k=3) > togetherai_config(top_k=3) <- allows for dynamic variables to be passed in
103
+ optional_params[k] = v
104
+
105
+ print_verbose(f"CUSTOM PROMPT DICT: {custom_prompt_dict}; model: {model}")
106
+ if model in custom_prompt_dict:
107
+ # check if the model has a registered custom prompt
108
+ model_prompt_details = custom_prompt_dict[model]
109
+ prompt = custom_prompt(
110
+ role_dict=model_prompt_details.get("roles", {}),
111
+ initial_prompt_value=model_prompt_details.get("initial_prompt_value", ""),
112
+ final_prompt_value=model_prompt_details.get("final_prompt_value", ""),
113
+ bos_token=model_prompt_details.get("bos_token", ""),
114
+ eos_token=model_prompt_details.get("eos_token", ""),
115
+ messages=messages,
116
+ )
117
+ else:
118
+ prompt = prompt_factory(model=model, messages=messages)
119
+
120
+ data = {
121
+ "model": model,
122
+ "prompt": prompt,
123
+ "request_type": "language-model-inference",
124
+ **optional_params,
125
+ }
126
+
127
+ ## LOGGING
128
+ logging_obj.pre_call(
129
+ input=prompt,
130
+ api_key=api_key,
131
+ additional_args={"complete_input_dict": data, "headers": headers, "api_base": api_base},
132
+ )
133
+ ## COMPLETION CALL
134
+ if (
135
+ "stream_tokens" in optional_params
136
+ and optional_params["stream_tokens"] == True
137
+ ):
138
+ response = requests.post(
139
+ api_base,
140
+ headers=headers,
141
+ data=json.dumps(data),
142
+ stream=optional_params["stream_tokens"],
143
+ )
144
+ return response.iter_lines()
145
+ else:
146
+ response = requests.post(
147
+ api_base,
148
+ headers=headers,
149
+ data=json.dumps(data)
150
+ )
151
+ ## LOGGING
152
+ logging_obj.post_call(
153
+ input=prompt,
154
+ api_key=api_key,
155
+ original_response=response.text,
156
+ additional_args={"complete_input_dict": data},
157
+ )
158
+ print_verbose(f"raw model_response: {response.text}")
159
+ ## RESPONSE OBJECT
160
+ if response.status_code != 200:
161
+ raise TogetherAIError(
162
+ status_code=response.status_code, message=response.text
163
+ )
164
+ completion_response = response.json()
165
+
166
+ if "error" in completion_response:
167
+ raise TogetherAIError(
168
+ message=json.dumps(completion_response),
169
+ status_code=response.status_code,
170
+ )
171
+ elif "error" in completion_response["output"]:
172
+ raise TogetherAIError(
173
+ message=json.dumps(completion_response["output"]), status_code=response.status_code
174
+ )
175
+
176
+ if len(completion_response["output"]["choices"][0]["text"]) > 0:
177
+ model_response["choices"][0]["message"]["content"] = completion_response["output"]["choices"][0]["text"]
178
+
179
+ ## CALCULATING USAGE
180
+ prompt_tokens = len(encoding.encode(prompt))
181
+ completion_tokens = len(
182
+ encoding.encode(model_response["choices"][0]["message"].get("content", ""))
183
+ )
184
+ if "finish_reason" in completion_response["output"]["choices"][0]:
185
+ model_response.choices[0].finish_reason = completion_response["output"]["choices"][0]["finish_reason"]
186
+ model_response["created"] = int(time.time())
187
+ model_response["model"] = model
188
+ usage = Usage(
189
+ prompt_tokens=prompt_tokens,
190
+ completion_tokens=completion_tokens,
191
+ total_tokens=prompt_tokens + completion_tokens
192
+ )
193
+ model_response.usage = usage
194
+ return model_response
195
+
196
+ def embedding():
197
+ # logic for parsing in - calling - parsing out model embedding calls
198
+ pass
litellm/llms/tokenizers/anthropic_tokenizer.json ADDED
The diff for this file is too large to render. See raw diff