import sys, os import traceback from dotenv import load_dotenv load_dotenv() import os, io sys.path.insert( 0, os.path.abspath("../..") ) # Adds the parent directory to the system path import pytest, asyncio import litellm from litellm import embedding, completion, completion_cost, Timeout, acompletion from litellm import RateLimitError import json import os import tempfile litellm.num_retries = 3 litellm.cache = None user_message = "Write a short poem about the sky" messages = [{"content": user_message, "role": "user"}] def load_vertex_ai_credentials(): # Define the path to the vertex_key.json file print("loading vertex ai credentials") filepath = os.path.dirname(os.path.abspath(__file__)) vertex_key_path = filepath + "/vertex_key.json" # Read the existing content of the file or create an empty dictionary try: with open(vertex_key_path, "r") as file: # Read the file content print("Read vertexai file path") content = file.read() # If the file is empty or not valid JSON, create an empty dictionary if not content or not content.strip(): service_account_key_data = {} else: # Attempt to load the existing JSON content file.seek(0) service_account_key_data = json.load(file) except FileNotFoundError: # If the file doesn't exist, create an empty dictionary service_account_key_data = {} # Update the service_account_key_data with environment variables private_key_id = os.environ.get("VERTEX_AI_PRIVATE_KEY_ID", "") private_key = os.environ.get("VERTEX_AI_PRIVATE_KEY", "") private_key = private_key.replace("\\n", "\n") service_account_key_data["private_key_id"] = private_key_id service_account_key_data["private_key"] = private_key # Create a temporary file with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: # Write the updated content to the temporary file json.dump(service_account_key_data, temp_file, indent=2) # Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name) @pytest.mark.asyncio async def get_response(): load_vertex_ai_credentials() prompt = '\ndef count_nums(arr):\n """\n Write a function count_nums which takes an array of integers and returns\n the number of elements which has a sum of digits > 0.\n If a number is negative, then its first signed digit will be negative:\n e.g. -123 has signed digits -1, 2, and 3.\n >>> count_nums([]) == 0\n >>> count_nums([-1, 11, -11]) == 1\n >>> count_nums([1, 1, 2]) == 3\n """\n' try: response = await acompletion( model="gemini-pro", messages=[ { "role": "system", "content": "Complete the given code with no more explanation. Remember that there is a 4-space indent before the first line of your generated code.", }, {"role": "user", "content": prompt}, ], ) return response except litellm.UnprocessableEntityError as e: pass except Exception as e: pytest.fail(f"An error occurred - {str(e)}") def test_vertex_ai(): import random load_vertex_ai_credentials() test_models = ( litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models ) litellm.set_verbose = False litellm.vertex_project = "reliablekeys" test_models = random.sample(test_models, 1) # test_models += litellm.vertex_language_models # always test gemini-pro test_models = litellm.vertex_language_models # always test gemini-pro for model in test_models: try: if model in [ "code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001", ]: # our account does not have access to this model continue print("making request", model) response = completion( model=model, messages=[{"role": "user", "content": "hi"}], temperature=0.7, ) print("\nModel Response", response) print(response) assert type(response.choices[0].message.content) == str assert len(response.choices[0].message.content) > 1 except Exception as e: pytest.fail(f"Error occurred: {e}") # test_vertex_ai() def test_vertex_ai_stream(): load_vertex_ai_credentials() litellm.set_verbose = False litellm.vertex_project = "reliablekeys" import random test_models = ( litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models ) test_models = random.sample(test_models, 1) test_models += litellm.vertex_language_models # always test gemini-pro for model in test_models: try: if model in [ "code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001", ]: # our account does not have access to this model continue print("making request", model) response = completion( model=model, messages=[ {"role": "user", "content": "write 10 line code code for saying hi"} ], stream=True, ) completed_str = "" for chunk in response: print(chunk) content = chunk.choices[0].delta.content or "" print("\n content", content) completed_str += content assert type(content) == str # pass assert len(completed_str) > 4 except Exception as e: pytest.fail(f"Error occurred: {e}") # test_vertex_ai_stream() @pytest.mark.asyncio async def test_async_vertexai_response(): import random load_vertex_ai_credentials() test_models = ( litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models ) test_models = random.sample(test_models, 1) test_models += litellm.vertex_language_models # always test gemini-pro for model in test_models: print(f"model being tested in async call: {model}") if model in [ "code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001", ]: # our account does not have access to this model continue try: user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] response = await acompletion( model=model, messages=messages, temperature=0.7, timeout=5 ) print(f"response: {response}") except litellm.Timeout as e: pass except Exception as e: pytest.fail(f"An exception occurred: {e}") # asyncio.run(test_async_vertexai_response()) @pytest.mark.asyncio async def test_async_vertexai_streaming_response(): import random load_vertex_ai_credentials() test_models = ( litellm.vertex_chat_models + litellm.vertex_code_chat_models + litellm.vertex_text_models + litellm.vertex_code_text_models ) test_models = random.sample(test_models, 1) test_models += litellm.vertex_language_models # always test gemini-pro for model in test_models: if model in [ "code-gecko", "code-gecko@001", "code-gecko@002", "code-gecko@latest", "code-bison@001", "text-bison@001", ]: # our account does not have access to this model continue try: user_message = "Hello, how are you?" messages = [{"content": user_message, "role": "user"}] response = await acompletion( model="gemini-pro", messages=messages, temperature=0.7, timeout=5, stream=True, ) print(f"response: {response}") complete_response = "" async for chunk in response: print(f"chunk: {chunk}") complete_response += chunk.choices[0].delta.content print(f"complete_response: {complete_response}") assert len(complete_response) > 0 except litellm.Timeout as e: pass except Exception as e: print(e) pytest.fail(f"An exception occurred: {e}") # asyncio.run(test_async_vertexai_streaming_response()) def test_gemini_pro_vision(): try: load_vertex_ai_credentials() litellm.set_verbose = True litellm.num_retries = 0 resp = litellm.completion( model="vertex_ai/gemini-pro-vision", messages=[ { "role": "user", "content": [ {"type": "text", "text": "Whats in this image?"}, { "type": "image_url", "image_url": { "url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg" }, }, ], } ], ) print(resp) prompt_tokens = resp.usage.prompt_tokens # DO Not DELETE this ASSERT # Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response assert prompt_tokens == 263 # the gemini api returns 263 to us except Exception as e: import traceback traceback.print_exc() raise e # test_gemini_pro_vision() def gemini_pro_function_calling(): load_vertex_ai_credentials() tools = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, "required": ["location"], }, }, } ] messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] completion = litellm.completion( model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" ) print(f"completion: {completion}") # gemini_pro_function_calling() async def gemini_pro_async_function_calling(): load_vertex_ai_credentials() tools = [ { "type": "function", "function": { "name": "get_current_weather", "description": "Get the current weather in a given location", "parameters": { "type": "object", "properties": { "location": { "type": "string", "description": "The city and state, e.g. San Francisco, CA", }, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, }, "required": ["location"], }, }, } ] messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] completion = await litellm.acompletion( model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" ) print(f"completion: {completion}") asyncio.run(gemini_pro_async_function_calling()) # Extra gemini Vision tests for completion + stream, async, async + stream # if we run into issues with gemini, we will also add these to our ci/cd pipeline # def test_gemini_pro_vision_stream(): # try: # litellm.set_verbose = False # litellm.num_retries=0 # print("streaming response from gemini-pro-vision") # resp = litellm.completion( # model = "vertex_ai/gemini-pro-vision", # messages=[ # { # "role": "user", # "content": [ # { # "type": "text", # "text": "Whats in this image?" # }, # { # "type": "image_url", # "image_url": { # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" # } # } # ] # } # ], # stream=True # ) # print(resp) # for chunk in resp: # print(chunk) # except Exception as e: # import traceback # traceback.print_exc() # raise e # test_gemini_pro_vision_stream() # def test_gemini_pro_vision_async(): # try: # litellm.set_verbose = True # litellm.num_retries=0 # async def test(): # resp = await litellm.acompletion( # model = "vertex_ai/gemini-pro-vision", # messages=[ # { # "role": "user", # "content": [ # { # "type": "text", # "text": "Whats in this image?" # }, # { # "type": "image_url", # "image_url": { # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" # } # } # ] # } # ], # ) # print("async response gemini pro vision") # print(resp) # asyncio.run(test()) # except Exception as e: # import traceback # traceback.print_exc() # raise e # test_gemini_pro_vision_async() # def test_gemini_pro_vision_async_stream(): # try: # litellm.set_verbose = True # litellm.num_retries=0 # async def test(): # resp = await litellm.acompletion( # model = "vertex_ai/gemini-pro-vision", # messages=[ # { # "role": "user", # "content": [ # { # "type": "text", # "text": "Whats in this image?" # }, # { # "type": "image_url", # "image_url": { # "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" # } # } # ] # } # ], # stream=True # ) # print("async response gemini pro vision") # print(resp) # for chunk in resp: # print(chunk) # asyncio.run(test()) # except Exception as e: # import traceback # traceback.print_exc() # raise e # test_gemini_pro_vision_async()