|
import sys, os |
|
import traceback |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
import os, io |
|
|
|
sys.path.insert( |
|
0, os.path.abspath("../..") |
|
) |
|
import pytest, asyncio |
|
import litellm |
|
from litellm import embedding, completion, completion_cost, Timeout, acompletion |
|
from litellm import RateLimitError |
|
import json |
|
import os |
|
import tempfile |
|
|
|
litellm.num_retries = 3 |
|
litellm.cache = None |
|
user_message = "Write a short poem about the sky" |
|
messages = [{"content": user_message, "role": "user"}] |
|
|
|
|
|
def load_vertex_ai_credentials(): |
|
|
|
print("loading vertex ai credentials") |
|
filepath = os.path.dirname(os.path.abspath(__file__)) |
|
vertex_key_path = filepath + "/vertex_key.json" |
|
|
|
|
|
try: |
|
with open(vertex_key_path, "r") as file: |
|
|
|
print("Read vertexai file path") |
|
content = file.read() |
|
|
|
|
|
if not content or not content.strip(): |
|
service_account_key_data = {} |
|
else: |
|
|
|
file.seek(0) |
|
service_account_key_data = json.load(file) |
|
except FileNotFoundError: |
|
|
|
service_account_key_data = {} |
|
|
|
|
|
private_key_id = os.environ.get("VERTEX_AI_PRIVATE_KEY_ID", "") |
|
private_key = os.environ.get("VERTEX_AI_PRIVATE_KEY", "") |
|
private_key = private_key.replace("\\n", "\n") |
|
service_account_key_data["private_key_id"] = private_key_id |
|
service_account_key_data["private_key"] = private_key |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: |
|
|
|
json.dump(service_account_key_data, temp_file, indent=2) |
|
|
|
|
|
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name) |
|
|
|
|
|
@pytest.mark.asyncio |
|
async def get_response(): |
|
load_vertex_ai_credentials() |
|
prompt = '\ndef count_nums(arr):\n """\n Write a function count_nums which takes an array of integers and returns\n the number of elements which has a sum of digits > 0.\n If a number is negative, then its first signed digit will be negative:\n e.g. -123 has signed digits -1, 2, and 3.\n >>> count_nums([]) == 0\n >>> count_nums([-1, 11, -11]) == 1\n >>> count_nums([1, 1, 2]) == 3\n """\n' |
|
try: |
|
response = await acompletion( |
|
model="gemini-pro", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": "Complete the given code with no more explanation. Remember that there is a 4-space indent before the first line of your generated code.", |
|
}, |
|
{"role": "user", "content": prompt}, |
|
], |
|
) |
|
return response |
|
except litellm.UnprocessableEntityError as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"An error occurred - {str(e)}") |
|
|
|
|
|
def test_vertex_ai(): |
|
import random |
|
|
|
load_vertex_ai_credentials() |
|
test_models = ( |
|
litellm.vertex_chat_models |
|
+ litellm.vertex_code_chat_models |
|
+ litellm.vertex_text_models |
|
+ litellm.vertex_code_text_models |
|
) |
|
litellm.set_verbose = False |
|
litellm.vertex_project = "reliablekeys" |
|
|
|
test_models = random.sample(test_models, 1) |
|
|
|
test_models = litellm.vertex_language_models |
|
for model in test_models: |
|
try: |
|
if model in [ |
|
"code-gecko", |
|
"code-gecko@001", |
|
"code-gecko@002", |
|
"code-gecko@latest", |
|
"code-bison@001", |
|
"text-bison@001", |
|
]: |
|
|
|
continue |
|
print("making request", model) |
|
response = completion( |
|
model=model, |
|
messages=[{"role": "user", "content": "hi"}], |
|
temperature=0.7, |
|
) |
|
print("\nModel Response", response) |
|
print(response) |
|
assert type(response.choices[0].message.content) == str |
|
assert len(response.choices[0].message.content) > 1 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_vertex_ai_stream(): |
|
load_vertex_ai_credentials() |
|
litellm.set_verbose = False |
|
litellm.vertex_project = "reliablekeys" |
|
import random |
|
|
|
test_models = ( |
|
litellm.vertex_chat_models |
|
+ litellm.vertex_code_chat_models |
|
+ litellm.vertex_text_models |
|
+ litellm.vertex_code_text_models |
|
) |
|
test_models = random.sample(test_models, 1) |
|
test_models += litellm.vertex_language_models |
|
for model in test_models: |
|
try: |
|
if model in [ |
|
"code-gecko", |
|
"code-gecko@001", |
|
"code-gecko@002", |
|
"code-gecko@latest", |
|
"code-bison@001", |
|
"text-bison@001", |
|
]: |
|
|
|
continue |
|
print("making request", model) |
|
response = completion( |
|
model=model, |
|
messages=[ |
|
{"role": "user", "content": "write 10 line code code for saying hi"} |
|
], |
|
stream=True, |
|
) |
|
completed_str = "" |
|
for chunk in response: |
|
print(chunk) |
|
content = chunk.choices[0].delta.content or "" |
|
print("\n content", content) |
|
completed_str += content |
|
assert type(content) == str |
|
|
|
assert len(completed_str) > 4 |
|
except Exception as e: |
|
pytest.fail(f"Error occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
async def test_async_vertexai_response(): |
|
import random |
|
|
|
load_vertex_ai_credentials() |
|
test_models = ( |
|
litellm.vertex_chat_models |
|
+ litellm.vertex_code_chat_models |
|
+ litellm.vertex_text_models |
|
+ litellm.vertex_code_text_models |
|
) |
|
test_models = random.sample(test_models, 1) |
|
test_models += litellm.vertex_language_models |
|
for model in test_models: |
|
print(f"model being tested in async call: {model}") |
|
if model in [ |
|
"code-gecko", |
|
"code-gecko@001", |
|
"code-gecko@002", |
|
"code-gecko@latest", |
|
"code-bison@001", |
|
"text-bison@001", |
|
]: |
|
|
|
continue |
|
try: |
|
user_message = "Hello, how are you?" |
|
messages = [{"content": user_message, "role": "user"}] |
|
response = await acompletion( |
|
model=model, messages=messages, temperature=0.7, timeout=5 |
|
) |
|
print(f"response: {response}") |
|
except litellm.Timeout as e: |
|
pass |
|
except Exception as e: |
|
pytest.fail(f"An exception occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.asyncio |
|
async def test_async_vertexai_streaming_response(): |
|
import random |
|
|
|
load_vertex_ai_credentials() |
|
test_models = ( |
|
litellm.vertex_chat_models |
|
+ litellm.vertex_code_chat_models |
|
+ litellm.vertex_text_models |
|
+ litellm.vertex_code_text_models |
|
) |
|
test_models = random.sample(test_models, 1) |
|
test_models += litellm.vertex_language_models |
|
for model in test_models: |
|
if model in [ |
|
"code-gecko", |
|
"code-gecko@001", |
|
"code-gecko@002", |
|
"code-gecko@latest", |
|
"code-bison@001", |
|
"text-bison@001", |
|
]: |
|
|
|
continue |
|
try: |
|
user_message = "Hello, how are you?" |
|
messages = [{"content": user_message, "role": "user"}] |
|
response = await acompletion( |
|
model="gemini-pro", |
|
messages=messages, |
|
temperature=0.7, |
|
timeout=5, |
|
stream=True, |
|
) |
|
print(f"response: {response}") |
|
complete_response = "" |
|
async for chunk in response: |
|
print(f"chunk: {chunk}") |
|
complete_response += chunk.choices[0].delta.content |
|
print(f"complete_response: {complete_response}") |
|
assert len(complete_response) > 0 |
|
except litellm.Timeout as e: |
|
pass |
|
except Exception as e: |
|
print(e) |
|
pytest.fail(f"An exception occurred: {e}") |
|
|
|
|
|
|
|
|
|
|
|
def test_gemini_pro_vision(): |
|
try: |
|
load_vertex_ai_credentials() |
|
litellm.set_verbose = True |
|
litellm.num_retries = 0 |
|
resp = litellm.completion( |
|
model="vertex_ai/gemini-pro-vision", |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "Whats in this image?"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg" |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
) |
|
print(resp) |
|
|
|
prompt_tokens = resp.usage.prompt_tokens |
|
|
|
|
|
|
|
assert prompt_tokens == 263 |
|
|
|
except Exception as e: |
|
import traceback |
|
|
|
traceback.print_exc() |
|
raise e |
|
|
|
|
|
|
|
|
|
|
|
def gemini_pro_function_calling(): |
|
load_vertex_ai_credentials() |
|
tools = [ |
|
{ |
|
"type": "function", |
|
"function": { |
|
"name": "get_current_weather", |
|
"description": "Get the current weather in a given location", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "The city and state, e.g. San Francisco, CA", |
|
}, |
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, |
|
}, |
|
"required": ["location"], |
|
}, |
|
}, |
|
} |
|
] |
|
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] |
|
completion = litellm.completion( |
|
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" |
|
) |
|
print(f"completion: {completion}") |
|
|
|
|
|
|
|
|
|
|
|
async def gemini_pro_async_function_calling(): |
|
load_vertex_ai_credentials() |
|
tools = [ |
|
{ |
|
"type": "function", |
|
"function": { |
|
"name": "get_current_weather", |
|
"description": "Get the current weather in a given location", |
|
"parameters": { |
|
"type": "object", |
|
"properties": { |
|
"location": { |
|
"type": "string", |
|
"description": "The city and state, e.g. San Francisco, CA", |
|
}, |
|
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, |
|
}, |
|
"required": ["location"], |
|
}, |
|
}, |
|
} |
|
] |
|
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] |
|
completion = await litellm.acompletion( |
|
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" |
|
) |
|
print(f"completion: {completion}") |
|
|
|
|
|
asyncio.run(gemini_pro_async_function_calling()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|