File size: 3,991 Bytes
395201c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import sys, os
import traceback

sys.path.insert(
    0, os.path.abspath("../..")
)  # Adds the parent directory to the system path
import time
import litellm
from litellm import get_max_tokens, model_cost, open_ai_chat_completion_models
import pytest

def test_get_gpt3_tokens():
    max_tokens = get_max_tokens("gpt-3.5-turbo")
    print(max_tokens)
    assert max_tokens==4097
    # print(results)
test_get_gpt3_tokens()

def test_get_palm_tokens():
    # # πŸ¦„πŸ¦„πŸ¦„πŸ¦„πŸ¦„πŸ¦„πŸ¦„πŸ¦„
    max_tokens = get_max_tokens("palm/chat-bison")
    assert max_tokens == 4096
    print(max_tokens)
test_get_palm_tokens()

def test_zephyr_hf_tokens():
    max_tokens = get_max_tokens("huggingface/HuggingFaceH4/zephyr-7b-beta")
    print(max_tokens)
    assert max_tokens == 32768

test_zephyr_hf_tokens()

def test_cost_ft_gpt_35():
    try:
        # this tests if litellm.completion_cost can calculate cost for ft:gpt-3.5-turbo:my-org:custom_suffix:id
        # it needs to lookup  ft:gpt-3.5-turbo in the litellm model_cost map to get the correct cost
        from litellm import ModelResponse, Choices, Message
        from litellm.utils import Usage
        resp = ModelResponse(
            id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac', 
            choices=[Choices(finish_reason=None, index=0, 
                            message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))], 
                            created=1700775391, 
                            model='ft:gpt-3.5-turbo:my-org:custom_suffix:id', 
                            object='chat.completion', system_fingerprint=None, 
                            usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38)
        )

        cost = litellm.completion_cost(completion_response=resp)
        print("\n Calculated Cost for ft:gpt-3.5", cost)
        input_cost = model_cost["ft:gpt-3.5-turbo"]["input_cost_per_token"]
        output_cost = model_cost["ft:gpt-3.5-turbo"]["output_cost_per_token"]
        print(input_cost, output_cost)
        expected_cost = (input_cost*resp.usage.prompt_tokens) + (output_cost*resp.usage.completion_tokens)
        print("\n Excpected cost", expected_cost)
        assert cost == expected_cost
    except Exception as e:
        pytest.fail(f"Cost Calc failed for ft:gpt-3.5. Expected {expected_cost}, Calculated cost {cost}")
test_cost_ft_gpt_35()

def test_cost_azure_gpt_35():
    try:
        # this tests if litellm.completion_cost can calculate cost for azure/chatgpt-deployment-2 which maps to azure/gpt-3.5-turbo
        # for this test we check if passing `model` to completion_cost overrides the completion cost
        from litellm import ModelResponse, Choices, Message
        from litellm.utils import Usage
        resp = ModelResponse(
            id='chatcmpl-e41836bb-bb8b-4df2-8e70-8f3e160155ac', 
            choices=[Choices(finish_reason=None, index=0, 
                            message=Message(content=' Sure! Here is a short poem about the sky:\n\nA canvas of blue, a', role='assistant'))], 
                            model='azure/gpt-35-turbo',  # azure always has model written like this
                            usage=Usage(prompt_tokens=21, completion_tokens=17, total_tokens=38)
        )

        cost = litellm.completion_cost(completion_response=resp, model="azure/gpt-3.5-turbo")
        print("\n Calculated Cost for azure/gpt-3.5-turbo", cost)
        input_cost = model_cost["azure/gpt-3.5-turbo"]["input_cost_per_token"]
        output_cost = model_cost["azure/gpt-3.5-turbo"]["output_cost_per_token"]
        expected_cost = (input_cost*resp.usage.prompt_tokens) + (output_cost*resp.usage.completion_tokens)
        print("\n Excpected cost", expected_cost)
        assert cost == expected_cost
    except Exception as e:
        pytest.fail(f"Cost Calc failed for azure/gpt-3.5-turbo. Expected {expected_cost}, Calculated cost {cost}")
test_cost_azure_gpt_35()