|
|
|
|
|
import sys, os |
|
import traceback |
|
from dotenv import load_dotenv |
|
|
|
load_dotenv() |
|
import os, io |
|
|
|
sys.path.insert( |
|
0, os.path.abspath("../..") |
|
) |
|
import pytest |
|
|
|
|
|
import litellm |
|
|
|
litellm.set_verbose = False |
|
|
|
|
|
question = "embed this very long text" * 100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
import concurrent.futures |
|
import random |
|
import time |
|
|
|
|
|
|
|
def make_openai_completion(question): |
|
try: |
|
start_time = time.time() |
|
import openai |
|
|
|
client = openai.OpenAI( |
|
api_key=os.environ["OPENAI_API_KEY"] |
|
) |
|
response = client.embeddings.create( |
|
model="text-embedding-ada-002", |
|
input=[question], |
|
) |
|
print(response) |
|
end_time = time.time() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return response |
|
except Exception as e: |
|
|
|
|
|
|
|
|
|
|
|
return None |
|
|
|
|
|
start_time = time.time() |
|
|
|
concurrent_calls = 500 |
|
|
|
|
|
futures = [] |
|
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_calls) as executor: |
|
for _ in range(concurrent_calls): |
|
futures.append(executor.submit(make_openai_completion, question)) |
|
|
|
|
|
concurrent.futures.wait(futures) |
|
|
|
|
|
successful_calls = 0 |
|
failed_calls = 0 |
|
|
|
for future in futures: |
|
if future.result() is not None: |
|
successful_calls += 1 |
|
else: |
|
failed_calls += 1 |
|
|
|
end_time = time.time() |
|
|
|
duration = end_time - start_time |
|
|
|
print(f"Load test Summary:") |
|
print(f"Total Requests: {concurrent_calls}") |
|
print(f"Successful Calls: {successful_calls}") |
|
print(f"Failed Calls: {failed_calls}") |
|
print(f"Total Time: {duration:.2f} seconds") |
|
|
|
|
|
with open("request_log.txt", "r") as log_file: |
|
print("\nRequest Log:\n", log_file.read()) |
|
|
|
with open("error_log.txt", "r") as error_log_file: |
|
print("\nError Log:\n", error_log_file.read()) |
|
|