#### What this does #### # On success + failure, log events to aispend.io import dotenv, os import requests dotenv.load_dotenv() # Loading env variables using dotenv import traceback import datetime model_cost = { "gpt-3.5-turbo": { "max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, }, "gpt-35-turbo": { "max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, }, # azure model name "gpt-3.5-turbo-0613": { "max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, }, "gpt-3.5-turbo-0301": { "max_tokens": 4000, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, }, "gpt-3.5-turbo-16k": { "max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, }, "gpt-35-turbo-16k": { "max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, }, # azure model name "gpt-3.5-turbo-16k-0613": { "max_tokens": 16000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, }, "gpt-4": { "max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006, }, "gpt-4-0613": { "max_tokens": 8000, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.00006, }, "gpt-4-32k": { "max_tokens": 8000, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, }, "claude-instant-1": { "max_tokens": 100000, "input_cost_per_token": 0.00000163, "output_cost_per_token": 0.00000551, }, "claude-2": { "max_tokens": 100000, "input_cost_per_token": 0.00001102, "output_cost_per_token": 0.00003268, }, "text-bison-001": { "max_tokens": 8192, "input_cost_per_token": 0.000004, "output_cost_per_token": 0.000004, }, "chat-bison-001": { "max_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, }, "command-nightly": { "max_tokens": 4096, "input_cost_per_token": 0.000015, "output_cost_per_token": 0.000015, }, } class BerriSpendLogger: # Class variables or attributes def __init__(self): # Instance variables self.account_id = os.getenv("BERRISPEND_ACCOUNT_ID") def price_calculator(self, model, response_obj, start_time, end_time): # try and find if the model is in the model_cost map # else default to the average of the costs prompt_tokens_cost_usd_dollar = 0 completion_tokens_cost_usd_dollar = 0 if model in model_cost: prompt_tokens_cost_usd_dollar = ( model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] ) completion_tokens_cost_usd_dollar = ( model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] ) elif "replicate" in model: # replicate models are charged based on time # llama 2 runs on an nvidia a100 which costs $0.0032 per second - https://replicate.com/replicate/llama-2-70b-chat model_run_time = end_time - start_time # assuming time in seconds cost_usd_dollar = model_run_time * 0.0032 prompt_tokens_cost_usd_dollar = cost_usd_dollar / 2 completion_tokens_cost_usd_dollar = cost_usd_dollar / 2 else: # calculate average input cost input_cost_sum = 0 output_cost_sum = 0 for model in model_cost: input_cost_sum += model_cost[model]["input_cost_per_token"] output_cost_sum += model_cost[model]["output_cost_per_token"] avg_input_cost = input_cost_sum / len(model_cost.keys()) avg_output_cost = output_cost_sum / len(model_cost.keys()) prompt_tokens_cost_usd_dollar = ( model_cost[model]["input_cost_per_token"] * response_obj["usage"]["prompt_tokens"] ) completion_tokens_cost_usd_dollar = ( model_cost[model]["output_cost_per_token"] * response_obj["usage"]["completion_tokens"] ) return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar def log_event( self, model, messages, response_obj, start_time, end_time, print_verbose ): # Method definition try: print_verbose( f"BerriSpend Logging - Enters logging function for model {model}" ) url = f"https://berrispend.berri.ai/spend" headers = {"Content-Type": "application/json"} ( prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar, ) = self.price_calculator(model, response_obj, start_time, end_time) total_cost = ( prompt_tokens_cost_usd_dollar + completion_tokens_cost_usd_dollar ) response_time = (end_time - start_time).total_seconds() if "response" in response_obj: data = [ { "response_time": response_time, "model_id": response_obj["model"], "total_cost": total_cost, "messages": messages, "response": response_obj["choices"][0]["message"]["content"], "account_id": self.account_id, } ] elif "error" in response_obj: data = [ { "response_time": response_time, "model_id": response_obj["model"], "total_cost": total_cost, "messages": messages, "error": response_obj["error"], "account_id": self.account_id, } ] print_verbose(f"BerriSpend Logging - final data object: {data}") response = requests.post(url, headers=headers, json=data) except: # traceback.print_exc() print_verbose(f"BerriSpend Logging Error - {traceback.format_exc()}") pass