Spaces:
Paused
Paused
| #### What this tests #### | |
| # This tests calling batch_completions by running 100 messages together | |
| import asyncio | |
| import os | |
| import sys | |
| import time | |
| import traceback | |
| import pytest | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import concurrent | |
| from dotenv import load_dotenv | |
| import litellm | |
| from litellm import Router | |
| load_dotenv() | |
| model_list = [ | |
| { # list of model deployments | |
| "model_name": "gpt-3.5-turbo", # openai model name | |
| "litellm_params": { # params for litellm completion/embedding call | |
| "model": "azure/chatgpt-v-3", | |
| "api_key": "bad-key", | |
| "api_version": os.getenv("AZURE_API_VERSION"), | |
| "api_base": os.getenv("AZURE_API_BASE"), | |
| }, | |
| "tpm": 240000, | |
| "rpm": 1800, | |
| }, | |
| { | |
| "model_name": "gpt-3.5-turbo", # openai model name | |
| "litellm_params": { # params for litellm completion/embedding call | |
| "model": "gpt-3.5-turbo", | |
| "api_key": os.getenv("OPENAI_API_KEY"), | |
| }, | |
| "tpm": 1000000, | |
| "rpm": 9000, | |
| }, | |
| ] | |
| kwargs = { | |
| "model": "gpt-3.5-turbo", | |
| "messages": [{"role": "user", "content": "Hey, how's it going?"}], | |
| } | |
| def test_multiple_deployments_sync(): | |
| import concurrent | |
| import time | |
| litellm.set_verbose = False | |
| results = [] | |
| router = Router( | |
| model_list=model_list, | |
| redis_host=os.getenv("REDIS_HOST"), | |
| redis_password=os.getenv("REDIS_PASSWORD"), | |
| redis_port=int(os.getenv("REDIS_PORT")), # type: ignore | |
| routing_strategy="simple-shuffle", | |
| set_verbose=True, | |
| num_retries=1, | |
| ) # type: ignore | |
| try: | |
| for _ in range(3): | |
| response = router.completion(**kwargs) | |
| results.append(response) | |
| print(results) | |
| router.reset() | |
| except Exception as e: | |
| print(f"FAILED TEST!") | |
| pytest.fail(f"An error occurred - {traceback.format_exc()}") | |
| # test_multiple_deployments_sync() | |
| def test_multiple_deployments_parallel(): | |
| litellm.set_verbose = False # Corrected the syntax for setting verbose to False | |
| results = [] | |
| futures = {} | |
| start_time = time.time() | |
| router = Router( | |
| model_list=model_list, | |
| redis_host=os.getenv("REDIS_HOST"), | |
| redis_password=os.getenv("REDIS_PASSWORD"), | |
| redis_port=int(os.getenv("REDIS_PORT")), # type: ignore | |
| routing_strategy="simple-shuffle", | |
| set_verbose=True, | |
| num_retries=1, | |
| ) # type: ignore | |
| # Assuming you have an executor instance defined somewhere in your code | |
| with concurrent.futures.ThreadPoolExecutor() as executor: | |
| for _ in range(5): | |
| future = executor.submit(router.completion, **kwargs) | |
| futures[future] = future | |
| # Retrieve the results from the futures | |
| while futures: | |
| done, not_done = concurrent.futures.wait( | |
| futures.values(), | |
| timeout=10, | |
| return_when=concurrent.futures.FIRST_COMPLETED, | |
| ) | |
| for future in done: | |
| try: | |
| result = future.result() | |
| results.append(result) | |
| del futures[future] # Remove the done future | |
| except Exception as e: | |
| print(f"Exception: {e}; traceback: {traceback.format_exc()}") | |
| del futures[future] # Remove the done future with exception | |
| print(f"Remaining futures: {len(futures)}") | |
| router.reset() | |
| end_time = time.time() | |
| print(results) | |
| print(f"ELAPSED TIME: {end_time - start_time}") | |
| # Assuming litellm, router, and executor are defined somewhere in your code | |
| # test_multiple_deployments_parallel() | |
| async def test_cooldown_same_model_name(sync_mode): | |
| # users could have the same model with different api_base | |
| # example | |
| # azure/chatgpt, api_base: 1234 | |
| # azure/chatgpt, api_base: 1235 | |
| # if 1234 fails, it should only cooldown 1234 and then try with 1235 | |
| litellm.set_verbose = False | |
| try: | |
| print("testing cooldown same model name") | |
| model_list = [ | |
| { | |
| "model_name": "gpt-3.5-turbo", | |
| "litellm_params": { | |
| "model": "azure/chatgpt-v-3", | |
| "api_key": "bad-key", | |
| "api_version": os.getenv("AZURE_API_VERSION"), | |
| "api_base": os.getenv("AZURE_API_BASE"), | |
| "tpm": 90, | |
| }, | |
| }, | |
| { | |
| "model_name": "gpt-3.5-turbo", | |
| "litellm_params": { | |
| "model": "azure/chatgpt-v-3", | |
| "api_key": os.getenv("AZURE_API_KEY"), | |
| "api_version": os.getenv("AZURE_API_VERSION"), | |
| "api_base": os.getenv("AZURE_API_BASE"), | |
| "tpm": 1, | |
| }, | |
| }, | |
| ] | |
| router = Router( | |
| model_list=model_list, | |
| redis_host=os.getenv("REDIS_HOST"), | |
| redis_password=os.getenv("REDIS_PASSWORD"), | |
| redis_port=int(os.getenv("REDIS_PORT")), | |
| routing_strategy="simple-shuffle", | |
| set_verbose=True, | |
| num_retries=3, | |
| allowed_fails=0, | |
| ) # type: ignore | |
| if sync_mode: | |
| response = router.completion( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": "hello this request will pass"}], | |
| ) | |
| print(router.model_list) | |
| model_ids = [] | |
| for model in router.model_list: | |
| model_ids.append(model["model_info"]["id"]) | |
| print("\n litellm model ids ", model_ids) | |
| # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960'] | |
| assert ( | |
| model_ids[0] != model_ids[1] | |
| ) # ensure both models have a uuid added, and they have different names | |
| print("\ngot response\n", response) | |
| else: | |
| response = await router.acompletion( | |
| model="gpt-3.5-turbo", | |
| messages=[{"role": "user", "content": "hello this request will pass"}], | |
| ) | |
| print(router.model_list) | |
| model_ids = [] | |
| for model in router.model_list: | |
| model_ids.append(model["model_info"]["id"]) | |
| print("\n litellm model ids ", model_ids) | |
| # example litellm_model_names ['azure/chatgpt-v-3-ModelID-64321', 'azure/chatgpt-v-3-ModelID-63960'] | |
| assert ( | |
| model_ids[0] != model_ids[1] | |
| ) # ensure both models have a uuid added, and they have different names | |
| print("\ngot response\n", response) | |
| except Exception as e: | |
| pytest.fail(f"Got unexpected exception on router! - {e}") | |
| # test_cooldown_same_model_name() | |