|
import sys, os |
|
import traceback |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
import os, io |
|
|
|
sys.path.insert( |
|
0, os.path.abspath("../..") |
|
) |
|
import pytest |
|
import litellm |
|
from litellm import embedding, completion, completion_cost, Timeout |
|
from litellm import RateLimitError |
|
|
|
|
|
litellm.cache = None |
|
litellm.success_callback = [] |
|
user_message = "Write a short poem about the sky" |
|
messages = [{"content": user_message, "role": "user"}] |
|
|
|
|
|
def logger_fn(user_model_dict): |
|
print(f"user_model_dict: {user_model_dict}") |
|
|
|
|
|
@pytest.fixture(autouse=True) |
|
def reset_callbacks(): |
|
print("\npytest fixture - resetting callbacks") |
|
litellm.success_callback = [] |
|
litellm._async_success_callback = [] |
|
litellm.failure_callback = [] |
|
litellm.callbacks = [] |
|
|
|
|
|
def test_completion_custom_provider_model_name(): |
|
try: |
|
litellm.cache = None |
|
response = completion( |
|
model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", |
|
messages=messages, |
|
logger_fn=logger_fn, |
|
) |
|
|
|
print(response) |
|
print(response["choices"][0]["finish_reason"]) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_claude(): |
|
litellm.set_verbose = True |
|
litellm.cache = None |
|
litellm.AnthropicConfig(max_tokens_to_sample=200, metadata={"user_id": "1224"}) |
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": """You are an upbeat, enthusiastic personal fitness coach named Sam. Sam is passionate about helping clients get fit and lead healthier lifestyles. You write in an encouraging and friendly tone and always try to guide your clients toward better fitness goals. If the user asks you something unrelated to fitness, either bring the topic back to fitness, or say that you cannot answer.""", |
|
}, |
|
{"content": user_message, "role": "user"}, |
|
] |
|
try: |
|
|
|
response = completion( |
|
model="claude-instant-1", |
|
messages=messages, |
|
request_timeout=10, |
|
) |
|
|
|
print(response) |
|
print(response.usage) |
|
print(response.usage.completion_tokens) |
|
print(response["usage"]["completion_tokens"]) |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_mistral_api(): |
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="mistral/mistral-tiny", |
|
max_tokens=5, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": "Hey, how's it going?", |
|
} |
|
], |
|
) |
|
|
|
print(response) |
|
|
|
cost = litellm.completion_cost(completion_response=response) |
|
print("cost to make mistral completion=", cost) |
|
assert cost > 0.0 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
test_completion_mistral_api() |
|
|
|
|
|
def test_completion_claude2_1(): |
|
try: |
|
print("claude2.1 test request") |
|
messages = [ |
|
{ |
|
"role": "system", |
|
"content": "Your goal is generate a joke on the topic user gives", |
|
}, |
|
{"role": "assistant", "content": "Hi, how can i assist you today?"}, |
|
{"role": "user", "content": "Generate a 3 liner joke for me"}, |
|
] |
|
|
|
response = completion( |
|
model="claude-2.1", messages=messages, request_timeout=10, max_tokens=10 |
|
) |
|
|
|
print(response) |
|
print(response.usage) |
|
print(response.usage.completion_tokens) |
|
print(response["usage"]["completion_tokens"]) |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import openai |
|
|
|
|
|
def test_completion_gpt4_turbo(): |
|
try: |
|
response = completion( |
|
model="gpt-4-1106-preview", |
|
messages=messages, |
|
max_tokens=10, |
|
) |
|
print(response) |
|
except openai.RateLimitError: |
|
print("got a rate liimt error") |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip(reason="this test is flaky") |
|
def test_completion_gpt4_vision(): |
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="gpt-4-vision-preview", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "Whats in this image?"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
) |
|
print(response) |
|
except openai.RateLimitError: |
|
print("got a rate liimt error") |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure_gpt4_vision(): |
|
|
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="azure/gpt-4-vision", |
|
timeout=1, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "Whats in this image?"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": "https://avatars.githubusercontent.com/u/29436595?v=4" |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
base_url="https://gpt-4-vision-resource.openai.azure.com/", |
|
api_key=os.getenv("AZURE_VISION_API_KEY"), |
|
) |
|
print(response) |
|
except openai.APITimeoutError: |
|
print("got a timeout error") |
|
pass |
|
except openai.RateLimitError: |
|
print("got a rate liimt error") |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip(reason="this test is flaky") |
|
def test_completion_perplexity_api(): |
|
try: |
|
|
|
messages = [ |
|
{"role": "system", "content": "You're a good bot"}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
] |
|
response = completion( |
|
model="mistral-7b-instruct", |
|
messages=messages, |
|
api_base="https://api.perplexity.ai", |
|
) |
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip(reason="this test is flaky") |
|
def test_completion_perplexity_api_2(): |
|
try: |
|
|
|
messages = [ |
|
{"role": "system", "content": "You're a good bot"}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
] |
|
response = completion(model="perplexity/mistral-7b-instruct", messages=messages) |
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
""" |
|
HF Tests we should pass |
|
- TGI: |
|
- Pro Inference API |
|
- Deployed Endpoint |
|
- Coversational |
|
- Free Inference API |
|
- Deployed Endpoint |
|
- Neither TGI or Coversational |
|
- Free Inference API |
|
- Deployed Endpoint |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
def test_get_hf_task_for_model(): |
|
model = "glaiveai/glaive-coder-7b" |
|
model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) |
|
print(f"model:{model}, model type: {model_type}") |
|
assert model_type == "text-generation-inference" |
|
|
|
model = "meta-llama/Llama-2-7b-hf" |
|
model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) |
|
print(f"model:{model}, model type: {model_type}") |
|
assert model_type == "text-generation-inference" |
|
|
|
model = "facebook/blenderbot-400M-distill" |
|
model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) |
|
print(f"model:{model}, model type: {model_type}") |
|
assert model_type == "conversational" |
|
|
|
model = "facebook/blenderbot-3B" |
|
model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) |
|
print(f"model:{model}, model type: {model_type}") |
|
assert model_type == "conversational" |
|
|
|
|
|
model = "roneneldan/TinyStories-3M" |
|
model_type = litellm.llms.huggingface_restapi.get_hf_task_for_model(model) |
|
print(f"model:{model}, model type: {model_type}") |
|
assert model_type == None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def hf_test_completion_tgi(): |
|
|
|
try: |
|
response = completion( |
|
model="huggingface/HuggingFaceH4/zephyr-7b-beta", |
|
messages=[{"content": "Hello, how are you?", "role": "user"}], |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_cohere(): |
|
try: |
|
litellm.CohereConfig(max_tokens=1000, stop_sequences=["a"]) |
|
response = completion( |
|
model="command-nightly", messages=messages, logger_fn=logger_fn |
|
) |
|
|
|
print(response) |
|
response_str = response["choices"][0]["message"]["content"] |
|
response_str_2 = response.choices[0].message.content |
|
if type(response_str) != str: |
|
pytest.fail(f"Error occurred: {e}") |
|
if type(response_str_2) != str: |
|
pytest.fail(f"Error occurred: {e}") |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_openai(): |
|
try: |
|
litellm.set_verbose = True |
|
print(f"api key: {os.environ['OPENAI_API_KEY']}") |
|
litellm.api_key = os.environ["OPENAI_API_KEY"] |
|
response = completion( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
max_tokens=10, |
|
request_timeout=1, |
|
metadata={"hi": "bye"}, |
|
) |
|
print("This is the response object\n", response) |
|
|
|
response_str = response["choices"][0]["message"]["content"] |
|
response_str_2 = response.choices[0].message.content |
|
|
|
cost = completion_cost(completion_response=response) |
|
print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}") |
|
assert response_str == response_str_2 |
|
assert type(response_str) == str |
|
assert len(response_str) > 1 |
|
|
|
litellm.api_key = None |
|
except Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_text_openai(): |
|
try: |
|
|
|
response = completion(model="gpt-3.5-turbo-instruct", messages=messages) |
|
print(response["choices"][0]["message"]["content"]) |
|
except Exception as e: |
|
print(e) |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def custom_callback( |
|
kwargs, |
|
completion_response, |
|
start_time, |
|
end_time, |
|
): |
|
|
|
try: |
|
print("LITELLM: in custom callback function") |
|
print("\nkwargs\n", kwargs) |
|
model = kwargs["model"] |
|
messages = kwargs["messages"] |
|
user = kwargs.get("user") |
|
|
|
|
|
|
|
print( |
|
f""" |
|
Model: {model}, |
|
Messages: {messages}, |
|
User: {user}, |
|
Seed: {kwargs["seed"]}, |
|
temperature: {kwargs["temperature"]}, |
|
""" |
|
) |
|
|
|
assert kwargs["user"] == "ishaans app" |
|
assert kwargs["model"] == "gpt-3.5-turbo-1106" |
|
assert kwargs["seed"] == 12 |
|
assert kwargs["temperature"] == 0.5 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
def test_completion_openai_with_optional_params(): |
|
|
|
|
|
|
|
|
|
try: |
|
litellm.set_verbose = True |
|
litellm.success_callback = [custom_callback] |
|
response = completion( |
|
model="gpt-3.5-turbo-1106", |
|
messages=[ |
|
{"role": "user", "content": "respond in valid, json - what is the day"} |
|
], |
|
temperature=0.5, |
|
top_p=0.1, |
|
seed=12, |
|
response_format={"type": "json_object"}, |
|
logit_bias=None, |
|
user="ishaans app", |
|
) |
|
|
|
|
|
print(response) |
|
litellm.success_callback = [] |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_logprobs(): |
|
""" |
|
This function is used to test the litellm.completion logprobs functionality. |
|
|
|
Parameters: |
|
None |
|
|
|
Returns: |
|
None |
|
""" |
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": "what is the time"}], |
|
temperature=0.5, |
|
top_p=0.1, |
|
seed=12, |
|
logit_bias=None, |
|
user="ishaans app", |
|
logprobs=True, |
|
top_logprobs=3, |
|
) |
|
|
|
|
|
print(response) |
|
print(len(response.choices[0].logprobs["content"][0]["top_logprobs"])) |
|
assert "logprobs" in response.choices[0] |
|
assert "content" in response.choices[0]["logprobs"] |
|
assert len(response.choices[0].logprobs["content"][0]["top_logprobs"]) == 3 |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_logprobs_stream(): |
|
""" |
|
This function is used to test the litellm.completion logprobs functionality. |
|
|
|
Parameters: |
|
None |
|
|
|
Returns: |
|
None |
|
""" |
|
try: |
|
litellm.set_verbose = False |
|
response = completion( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": "what is the time"}], |
|
temperature=0.5, |
|
top_p=0.1, |
|
seed=12, |
|
max_tokens=5, |
|
logit_bias=None, |
|
user="ishaans app", |
|
logprobs=True, |
|
top_logprobs=3, |
|
stream=True, |
|
) |
|
|
|
|
|
print(response) |
|
|
|
found_logprob = False |
|
for chunk in response: |
|
|
|
print(chunk) |
|
if "logprobs" in chunk.choices[0]: |
|
|
|
assert len(chunk.choices[0].logprobs.content[0].top_logprobs) == 3 |
|
found_logprob = True |
|
break |
|
print(chunk) |
|
assert found_logprob == True |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_openai_litellm_key(): |
|
try: |
|
litellm.set_verbose = True |
|
litellm.num_retries = 0 |
|
litellm.api_key = os.environ["OPENAI_API_KEY"] |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = "" |
|
import openai |
|
|
|
openai.api_key = "" |
|
|
|
|
|
response = completion( |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
temperature=0.5, |
|
top_p=0.1, |
|
max_tokens=10, |
|
user="ishaan_dev@berri.ai", |
|
) |
|
|
|
print(response) |
|
|
|
|
|
os.environ["OPENAI_API_KEY"] = litellm.api_key |
|
|
|
|
|
litellm.api_key = None |
|
except Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_ollama_hosted(): |
|
try: |
|
litellm.request_timeout = 20 |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="ollama/phi", |
|
messages=messages, |
|
max_tokens=2, |
|
api_base="https://test-ollama-endpoint.onrender.com", |
|
) |
|
|
|
print(response) |
|
except openai.APITimeoutError as e: |
|
print("got a timeout error. Passed ! ") |
|
litellm.request_timeout = None |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_openrouter1(): |
|
try: |
|
response = completion( |
|
model="openrouter/google/palm-2-chat-bison", |
|
messages=messages, |
|
max_tokens=5, |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_hf_model_no_provider(): |
|
try: |
|
response = completion( |
|
model="WizardLM/WizardLM-70B-V1.0", |
|
messages=messages, |
|
max_tokens=5, |
|
) |
|
|
|
print(response) |
|
pytest.fail(f"Error occurred: {e}") |
|
except Exception as e: |
|
pass |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_anyscale_with_functions(): |
|
function1 = [ |
|
{ |
|
"name": "get_current_weather", |
|
"description": "Get the current weather in a given location", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "The city and state, e.g. San Francisco, CA", |
|
}, |
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, |
|
}, |
|
"required": ["location"], |
|
}, |
|
} |
|
] |
|
try: |
|
messages = [{"role": "user", "content": "What is the weather like in Boston?"}] |
|
response = completion( |
|
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1", |
|
messages=messages, |
|
functions=function1, |
|
) |
|
|
|
print(response) |
|
|
|
cost = litellm.completion_cost(completion_response=response) |
|
print("cost to make anyscale completion=", cost) |
|
assert cost > 0.0 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure_key_completion_arg(): |
|
|
|
|
|
|
|
|
|
old_key = os.environ["AZURE_API_KEY"] |
|
os.environ.pop("AZURE_API_KEY", None) |
|
try: |
|
print("azure gpt-3.5 test\n\n") |
|
litellm.set_verbose = True |
|
|
|
response = completion( |
|
model="azure/chatgpt-v-2", |
|
messages=messages, |
|
api_key=old_key, |
|
max_tokens=10, |
|
) |
|
print(f"response: {response}") |
|
|
|
print("Hidden Params", response._hidden_params) |
|
assert response._hidden_params["custom_llm_provider"] == "azure" |
|
os.environ["AZURE_API_KEY"] = old_key |
|
except Exception as e: |
|
os.environ["AZURE_API_KEY"] = old_key |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
async def test_re_use_azure_async_client(): |
|
try: |
|
print("azure gpt-3.5 ASYNC with clie nttest\n\n") |
|
litellm.set_verbose = True |
|
import openai |
|
|
|
client = openai.AsyncAzureOpenAI( |
|
azure_endpoint=os.environ["AZURE_API_BASE"], |
|
api_key=os.environ["AZURE_API_KEY"], |
|
api_version="2023-07-01-preview", |
|
) |
|
|
|
for _ in range(3): |
|
response = await litellm.acompletion( |
|
model="azure/chatgpt-v-2", messages=messages, client=client |
|
) |
|
print(f"response: {response}") |
|
except Exception as e: |
|
pytest.fail("got Exception", e) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_re_use_openaiClient(): |
|
try: |
|
print("gpt-3.5 with client test\n\n") |
|
litellm.set_verbose = True |
|
import openai |
|
|
|
client = openai.OpenAI( |
|
api_key=os.environ["OPENAI_API_KEY"], |
|
) |
|
|
|
for _ in range(2): |
|
response = litellm.completion( |
|
model="gpt-3.5-turbo", messages=messages, client=client |
|
) |
|
print(f"response: {response}") |
|
except Exception as e: |
|
pytest.fail("got Exception", e) |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure(): |
|
try: |
|
print("azure gpt-3.5 test\n\n") |
|
litellm.set_verbose = False |
|
|
|
response = completion( |
|
model="azure/chatgpt-v-2", |
|
messages=messages, |
|
api_key="os.environ/AZURE_API_KEY", |
|
) |
|
print(f"response: {response}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(response) |
|
|
|
cost = completion_cost(completion_response=response) |
|
assert cost > 0.0 |
|
print("Cost for azure completion request", cost) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_azure_openai_ad_token(): |
|
|
|
|
|
|
|
def tester( |
|
kwargs, |
|
): |
|
print(kwargs["additional_args"]) |
|
if kwargs["additional_args"]["headers"]["Authorization"] != "Bearer gm": |
|
pytest.fail("AZURE AD TOKEN Passed but not set in request header") |
|
return |
|
|
|
litellm.input_callback = [tester] |
|
try: |
|
response = litellm.completion( |
|
model="azure/chatgpt-v-2", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": "what is your name", |
|
}, |
|
], |
|
azure_ad_token="gm", |
|
) |
|
print("azure ad token respoonse\n") |
|
print(response) |
|
litellm.input_callback = [] |
|
except: |
|
litellm.input_callback = [] |
|
pass |
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure2(): |
|
|
|
try: |
|
print("azure gpt-3.5 test\n\n") |
|
litellm.set_verbose = False |
|
api_base = os.environ["AZURE_API_BASE"] |
|
api_key = os.environ["AZURE_API_KEY"] |
|
api_version = os.environ["AZURE_API_VERSION"] |
|
|
|
os.environ["AZURE_API_BASE"] = "" |
|
os.environ["AZURE_API_VERSION"] = "" |
|
os.environ["AZURE_API_KEY"] = "" |
|
|
|
|
|
response = completion( |
|
model="azure/chatgpt-v-2", |
|
messages=messages, |
|
api_base=api_base, |
|
api_key=api_key, |
|
api_version=api_version, |
|
max_tokens=10, |
|
) |
|
|
|
|
|
print(response) |
|
|
|
os.environ["AZURE_API_BASE"] = api_base |
|
os.environ["AZURE_API_VERSION"] = api_version |
|
os.environ["AZURE_API_KEY"] = api_key |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure3(): |
|
|
|
try: |
|
print("azure gpt-3.5 test\n\n") |
|
litellm.set_verbose = True |
|
litellm.api_base = os.environ["AZURE_API_BASE"] |
|
litellm.api_key = os.environ["AZURE_API_KEY"] |
|
litellm.api_version = os.environ["AZURE_API_VERSION"] |
|
|
|
os.environ["AZURE_API_BASE"] = "" |
|
os.environ["AZURE_API_VERSION"] = "" |
|
os.environ["AZURE_API_KEY"] = "" |
|
|
|
|
|
response = completion( |
|
model="azure/chatgpt-v-2", |
|
messages=messages, |
|
max_tokens=10, |
|
) |
|
|
|
|
|
print(response) |
|
|
|
os.environ["AZURE_API_BASE"] = litellm.api_base |
|
os.environ["AZURE_API_VERSION"] = litellm.api_version |
|
os.environ["AZURE_API_KEY"] = litellm.api_key |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure_with_litellm_key(): |
|
try: |
|
print("azure gpt-3.5 test\n\n") |
|
import openai |
|
|
|
|
|
litellm.api_type = "azure" |
|
litellm.api_base = os.environ["AZURE_API_BASE"] |
|
litellm.api_version = os.environ["AZURE_API_VERSION"] |
|
litellm.api_key = os.environ["AZURE_API_KEY"] |
|
|
|
|
|
os.environ["AZURE_API_BASE"] = "" |
|
os.environ["AZURE_API_VERSION"] = "" |
|
os.environ["AZURE_API_KEY"] = "" |
|
|
|
|
|
openai.api_type = "" |
|
openai.api_base = "gm" |
|
openai.api_version = "333" |
|
openai.api_key = "ymca" |
|
|
|
response = completion( |
|
model="azure/chatgpt-v-2", |
|
messages=messages, |
|
) |
|
|
|
print(response) |
|
|
|
|
|
os.environ["AZURE_API_BASE"] = litellm.api_base |
|
os.environ["AZURE_API_VERSION"] = litellm.api_version |
|
os.environ["AZURE_API_KEY"] = litellm.api_key |
|
|
|
|
|
litellm.api_type = None |
|
litellm.api_base = None |
|
litellm.api_version = None |
|
litellm.api_key = None |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_azure_deployment_id(): |
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
deployment_id="chatgpt-v-2", |
|
model="gpt-3.5-turbo", |
|
messages=messages, |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip(reason="replicate endpoints take +2 mins just for this request") |
|
def test_completion_replicate_vicuna(): |
|
print("TESTING REPLICATE") |
|
litellm.set_verbose = True |
|
model_name = "replicate/meta/llama-2-7b-chat:f1d50bb24186c52daae319ca8366e53debdaa9e0ae7ff976e918df752732ccc4" |
|
try: |
|
response = completion( |
|
model=model_name, |
|
messages=messages, |
|
temperature=0.5, |
|
top_k=20, |
|
repetition_penalty=1, |
|
min_tokens=1, |
|
seed=-1, |
|
max_tokens=2, |
|
) |
|
print(response) |
|
|
|
response_str = response["choices"][0]["message"]["content"] |
|
print("RESPONSE STRING\n", response_str) |
|
if type(response_str) != str: |
|
pytest.fail(f"Error occurred: {e}") |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_replicate_custom_prompt_dict(): |
|
litellm.set_verbose = True |
|
model_name = "replicate/meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0" |
|
litellm.register_prompt_template( |
|
model="replicate/meta/llama-2-7b-chat:13c3cdee13ee059ab779f0291d29054dab00a47dad8261375654de5540165fb0", |
|
initial_prompt_value="You are a good assistant", |
|
roles={ |
|
"system": { |
|
"pre_message": "[INST] <<SYS>>\n", |
|
"post_message": "\n<</SYS>>\n [/INST]\n", |
|
}, |
|
"user": { |
|
"pre_message": "[INST] ", |
|
"post_message": " [/INST]", |
|
}, |
|
"assistant": { |
|
"pre_message": "\n", |
|
"post_message": "\n", |
|
}, |
|
}, |
|
final_prompt_value="Now answer as best you can:", |
|
) |
|
response = completion( |
|
model=model_name, |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": "what is yc write 1 paragraph", |
|
} |
|
], |
|
num_retries=3, |
|
) |
|
print(f"response: {response}") |
|
litellm.custom_prompt_dict = {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_together_ai(): |
|
model_name = "together_ai/togethercomputer/CodeLlama-13b-Instruct" |
|
try: |
|
messages = [ |
|
{"role": "user", "content": "Who are you"}, |
|
{"role": "assistant", "content": "I am your helpful assistant."}, |
|
{"role": "user", "content": "Tell me a joke"}, |
|
] |
|
response = completion( |
|
model=model_name, |
|
messages=messages, |
|
max_tokens=256, |
|
n=1, |
|
logger_fn=logger_fn, |
|
) |
|
|
|
print(response) |
|
cost = completion_cost(completion_response=response) |
|
assert cost > 0.0 |
|
print( |
|
"Cost for completion call together-computer/llama-2-70b: ", |
|
f"${float(cost):.10f}", |
|
) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
def test_completion_together_ai_mixtral(): |
|
model_name = "together_ai/DiscoResearch/DiscoLM-mixtral-8x7b-v2" |
|
try: |
|
messages = [ |
|
{"role": "user", "content": "Who are you"}, |
|
{"role": "assistant", "content": "I am your helpful assistant."}, |
|
{"role": "user", "content": "Tell me a joke"}, |
|
] |
|
response = completion( |
|
model=model_name, |
|
messages=messages, |
|
max_tokens=256, |
|
n=1, |
|
logger_fn=logger_fn, |
|
) |
|
|
|
print(response) |
|
cost = completion_cost(completion_response=response) |
|
assert cost > 0.0 |
|
print( |
|
"Cost for completion call together-computer/llama-2-70b: ", |
|
f"${float(cost):.10f}", |
|
) |
|
except litellm.Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_together_ai_yi_chat(): |
|
litellm.set_verbose = True |
|
model_name = "together_ai/zero-one-ai/Yi-34B-Chat" |
|
try: |
|
messages = [ |
|
{"role": "user", "content": "What llm are you?"}, |
|
] |
|
response = completion(model=model_name, messages=messages, max_tokens=5) |
|
|
|
print(response) |
|
cost = completion_cost(completion_response=response) |
|
assert cost > 0.0 |
|
print( |
|
"Cost for completion call together-computer/llama-2-70b: ", |
|
f"${float(cost):.10f}", |
|
) |
|
except litellm.Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_customprompt_together_ai(): |
|
try: |
|
litellm.set_verbose = False |
|
litellm.num_retries = 0 |
|
print("in test_customprompt_together_ai") |
|
print(litellm.success_callback) |
|
print(litellm._async_success_callback) |
|
response = completion( |
|
model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", |
|
messages=messages, |
|
roles={ |
|
"system": { |
|
"pre_message": "<|im_start|>system\n", |
|
"post_message": "<|im_end|>", |
|
}, |
|
"assistant": { |
|
"pre_message": "<|im_start|>assistant\n", |
|
"post_message": "<|im_end|>", |
|
}, |
|
"user": { |
|
"pre_message": "<|im_start|>user\n", |
|
"post_message": "<|im_end|>", |
|
}, |
|
}, |
|
) |
|
print(response) |
|
except litellm.exceptions.Timeout as e: |
|
print(f"Timeout Error") |
|
pass |
|
except Exception as e: |
|
print(f"ERROR TYPE {type(e)}") |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_sagemaker(): |
|
try: |
|
print("testing sagemaker") |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", |
|
messages=messages, |
|
temperature=0.2, |
|
max_tokens=80, |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_chat_sagemaker(): |
|
try: |
|
messages = [{"role": "user", "content": "Hey, how's it going?"}] |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4", |
|
messages=messages, |
|
max_tokens=100, |
|
temperature=0.7, |
|
stream=True, |
|
) |
|
|
|
complete_response = "" |
|
for chunk in response: |
|
complete_response += chunk.choices[0].delta.content or "" |
|
print(f"complete_response: {complete_response}") |
|
assert len(complete_response) > 0 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_chat_sagemaker_mistral(): |
|
try: |
|
messages = [{"role": "user", "content": "Hey, how's it going?"}] |
|
|
|
response = completion( |
|
model="sagemaker/jumpstart-dft-hf-llm-mistral-7b-instruct", |
|
messages=messages, |
|
max_tokens=100, |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"An error occurred: {str(e)}") |
|
|
|
|
|
|
|
def test_completion_bedrock_titan(): |
|
try: |
|
response = completion( |
|
model="bedrock/amazon.titan-tg1-large", |
|
messages=messages, |
|
temperature=0.2, |
|
max_tokens=200, |
|
top_p=0.8, |
|
logger_fn=logger_fn, |
|
) |
|
|
|
print(response) |
|
except RateLimitError: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_bedrock_claude(): |
|
print("calling claude") |
|
try: |
|
response = completion( |
|
model="anthropic.claude-instant-v1", |
|
messages=messages, |
|
max_tokens=10, |
|
temperature=0.1, |
|
logger_fn=logger_fn, |
|
) |
|
|
|
print(response) |
|
except RateLimitError: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_bedrock_cohere(): |
|
print("calling bedrock cohere") |
|
litellm.set_verbose = True |
|
try: |
|
response = completion( |
|
model="bedrock/cohere.command-text-v14", |
|
messages=[{"role": "user", "content": "hi"}], |
|
temperature=0.1, |
|
max_tokens=10, |
|
stream=True, |
|
) |
|
|
|
print(response) |
|
for chunk in response: |
|
print(chunk) |
|
except RateLimitError: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_with_fallbacks(): |
|
print(f"RUNNING TEST COMPLETION WITH FALLBACKS - test_completion_with_fallbacks") |
|
fallbacks = ["gpt-3.5-turbo", "gpt-3.5-turbo", "command-nightly"] |
|
try: |
|
response = completion( |
|
model="bad-model", messages=messages, force_timeout=120, fallbacks=fallbacks |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
def test_completion_anyscale_api(): |
|
try: |
|
|
|
messages = [ |
|
{"role": "system", "content": "You're a good bot"}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
] |
|
response = completion( |
|
model="anyscale/meta-llama/Llama-2-7b-chat-hf", |
|
messages=messages, |
|
) |
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_azure_cloudflare_api(): |
|
litellm.set_verbose = True |
|
try: |
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": "How do I output all files in a directory using Python?", |
|
}, |
|
] |
|
response = completion( |
|
model="azure/gpt-turbo", |
|
messages=messages, |
|
base_url=os.getenv("CLOUDFLARE_AZURE_BASE_URL"), |
|
api_key=os.getenv("AZURE_FRANCE_API_KEY"), |
|
) |
|
print(f"response: {response}") |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
traceback.print_exc() |
|
pass |
|
|
|
|
|
test_azure_cloudflare_api() |
|
|
|
|
|
def test_completion_anyscale_2(): |
|
try: |
|
|
|
messages = [ |
|
{"role": "system", "content": "You're a good bot"}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
{ |
|
"role": "user", |
|
"content": "Hey", |
|
}, |
|
] |
|
response = completion( |
|
model="anyscale/meta-llama/Llama-2-7b-chat-hf", messages=messages |
|
) |
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
def test_mistral_anyscale_stream(): |
|
litellm.set_verbose = False |
|
response = completion( |
|
model="anyscale/mistralai/Mistral-7B-Instruct-v0.1", |
|
messages=[{"content": "hello, good morning", "role": "user"}], |
|
stream=True, |
|
) |
|
for chunk in response: |
|
|
|
print(chunk["choices"][0]["delta"].get("content", ""), end="") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_ai21(): |
|
print("running ai21 j2light test") |
|
litellm.set_verbose = True |
|
model_name = "j2-light" |
|
try: |
|
response = completion( |
|
model=model_name, messages=messages, max_tokens=100, temperature=0.8 |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_deep_infra(): |
|
litellm.set_verbose = False |
|
model_name = "deepinfra/meta-llama/Llama-2-70b-chat-hf" |
|
try: |
|
response = completion( |
|
model=model_name, messages=messages, temperature=0, max_tokens=10 |
|
) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_completion_deep_infra_mistral(): |
|
print("deep infra test with temp=0") |
|
model_name = "deepinfra/mistralai/Mistral-7B-Instruct-v0.1" |
|
try: |
|
response = completion( |
|
model=model_name, |
|
messages=messages, |
|
temperature=0.01, |
|
max_tokens=10, |
|
) |
|
|
|
print(response) |
|
except litellm.exceptions.Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_gemini(): |
|
litellm.set_verbose = True |
|
model_name = "gemini/gemini-pro" |
|
messages = [{"role": "user", "content": "Hey, how's it going?"}] |
|
try: |
|
response = completion(model=model_name, messages=messages) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_palm(): |
|
litellm.set_verbose = True |
|
model_name = "palm/chat-bison" |
|
messages = [{"role": "user", "content": "Hey, how's it going?"}] |
|
try: |
|
response = completion(model=model_name, messages=messages) |
|
|
|
print(response) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_palm_stream(): |
|
|
|
model_name = "palm/chat-bison" |
|
try: |
|
response = completion( |
|
model=model_name, |
|
messages=messages, |
|
stop=["stop"], |
|
stream=True, |
|
max_tokens=20, |
|
) |
|
|
|
for chunk in response: |
|
print(chunk) |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_completion_together_ai_stream(): |
|
user_message = "Write 1pg about YC & litellm" |
|
messages = [{"content": user_message, "role": "user"}] |
|
try: |
|
response = completion( |
|
model="together_ai/mistralai/Mistral-7B-Instruct-v0.1", |
|
messages=messages, |
|
stream=True, |
|
max_tokens=5, |
|
) |
|
print(response) |
|
for chunk in response: |
|
print(chunk) |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skip(reason="Flaky test-cloudflare is very unstable") |
|
def test_completion_cloudflare(): |
|
try: |
|
litellm.set_verbose = True |
|
response = completion( |
|
model="cloudflare/@cf/meta/llama-2-7b-chat-int8", |
|
messages=[{"content": "what llm are you", "role": "user"}], |
|
max_tokens=15, |
|
num_retries=3, |
|
) |
|
print(response) |
|
|
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
test_completion_cloudflare() |
|
|
|
|
|
def test_moderation(): |
|
import openai |
|
|
|
openai.api_type = "azure" |
|
openai.api_version = "GM" |
|
response = litellm.moderation(input="i'm ishaan cto of litellm") |
|
print(response) |
|
output = response.results[0] |
|
print(output) |
|
return output |
|
|
|
|
|
|
|
|