import sys, os import traceback sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import time import litellm from litellm import get_max_tokens, model_cost, open_ai_chat_completion_models import pytest def test_get_gpt3_tokens(): max_tokens = get_max_tokens("gpt-3.5-turbo") print(max_tokens) assert max_tokens == 4097 # print(results) # test_get_gpt3_tokens() def test_get_palm_tokens(): # # 🦄🦄🦄🦄🦄🦄🦄🦄 max_tokens = get_max_tokens("palm/chat-bison") assert max_tokens == 4096 print(max_tokens) # test_get_palm_tokens() def test_zephyr_hf_tokens(): max_tokens = get_max_tokens("huggingface/HuggingFaceH4/zephyr-7b-beta") print(max_tokens) assert max_tokens == 32768 # test_zephyr_hf_tokens() def test_cost_ft_gpt_35(): try: # this tests if litellm.completion_cost can calculate cost for ft:gpt-3.5-turbo:my-org:custom_suffix:id # it needs to lookup ft:gpt-3.5-turbo in the litellm model_cost map to get the correct cost from litellm import ModelResponse, Choices, Message from litellm.utils import Usage resp = ModelResponse( id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", choices=[ Choices( finish_reason=None, index=0, message=Message( content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", role="assistant", ), ) ], created=1700775391, model="ft:gpt-3.5-turbo:my-org:custom_suffix:id", object="chat.completion", system_fingerprint=None, usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38), ) cost = litellm.completion_cost(completion_response=resp) print("\n Calculated Cost for ft:gpt-3.5", cost) input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"] output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"] print(input_cost, output_cost) expected_cost = (input_cost * resp.usage.prompt_tokens) + ( output_cost * resp.usage.completion_tokens ) print("\n Excpected cost", expected_cost) assert cost == expected_cost except Exception as e: pytest.fail( f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}" ) # test_cost_ft_gpt_35() def test_cost_azure_gpt_35(): try: # this tests if litellm.completion_cost can calculate cost for azure/chatgpt-deployment-2 which maps to azure/gpt-3.5-turbo # for this test we check if passing `model` to completion_cost overrides the completion cost from litellm import ModelResponse, Choices, Message from litellm.utils import Usage resp = ModelResponse( id="chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac", choices=[ Choices( finish_reason=None, index=0, message=Message( content=" Sure! Here is a short poem about the sky:\n\nA canvas of blue, a", role="assistant", ), ) ], model="gpt-35-turbo", # azure always has model written like this usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38), ) cost = litellm.completion_cost( completion_response=resp, model="azure/gpt-35-turbo" ) print("\n Calculated Cost for azure/gpt-3.5-turbo", cost) input_cost = model_cost["azure/gpt-35-turbo"]["input_cost_per_token"] output_cost = model_cost["azure/gpt-35-turbo"]["output_cost_per_token"] expected_cost = (input_cost * resp.usage.prompt_tokens) + ( output_cost * resp.usage.completion_tokens ) print("\n Excpected cost", expected_cost) assert cost == expected_cost except Exception as e: pytest.fail( f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}" ) test_cost_azure_gpt_35() def test_cost_azure_embedding(): try: import asyncio litellm.set_verbose = True async def _test(): response = await litellm.aembedding( model="azure/azure-embedding-model", input=["good morning from litellm", "gm"], ) print(response) return response response = asyncio.run(_test()) cost = litellm.completion_cost(completion_response=response) print("Cost", cost) expected_cost = float("7e-07") assert cost == expected_cost except Exception as e: pytest.fail( f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}" ) # test_cost_azure_embedding()