Spaces:

Shyamnath
/

inferencing-llm

Sleeping

App Files Files Community

inferencing-llm / litellm /llms /fireworks_ai /cost_calculator.py

Shyamnath

Push core package and essential files

469eae6 9 days ago

raw

history blame contribute delete

2.83 kB

	"""
	For calculating cost of fireworks ai serverless inference models.
	"""

	from typing import Tuple

	from litellm.constants import (
	FIREWORKS_AI_4_B,
	FIREWORKS_AI_16_B,
	FIREWORKS_AI_56_B_MOE,
	FIREWORKS_AI_176_B_MOE,
	)
	from litellm.types.utils import Usage
	from litellm.utils import get_model_info


	# Extract the number of billion parameters from the model name
	# only used for together_computer LLMs
	def get_base_model_for_pricing(model_name: str) -> str:
	"""
	Helper function for calculating together ai pricing.

	Returns:
	- str: model pricing category if mapped else received model name
	"""
	import re

	model_name = model_name.lower()

	# Check for MoE models in the form <number>x<number>b
	moe_match = re.search(r"(\d+)x(\d+)b", model_name)
	if moe_match:
	total_billion = int(moe_match.group(1)) * int(moe_match.group(2))
	if total_billion <= FIREWORKS_AI_56_B_MOE:
	return "fireworks-ai-moe-up-to-56b"
	elif total_billion <= FIREWORKS_AI_176_B_MOE:
	return "fireworks-ai-56b-to-176b"

	# Check for standard models in the form <number>b
	re_params_match = re.search(r"(\d+)b", model_name)
	if re_params_match is not None:
	params_match = str(re_params_match.group(1))
	params_billion = float(params_match)

	# Determine the category based on the number of parameters
	if params_billion <= FIREWORKS_AI_4_B:
	return "fireworks-ai-up-to-4b"
	elif params_billion <= FIREWORKS_AI_16_B:
	return "fireworks-ai-4.1b-to-16b"
	elif params_billion > FIREWORKS_AI_16_B:
	return "fireworks-ai-above-16b"

	# If no matches, return the original model_name
	return "fireworks-ai-default"


	def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
	"""
	Calculates the cost per token for a given model, prompt tokens, and completion tokens.

	Input:
	- model: str, the model name without provider prefix
	- usage: LiteLLM Usage block, containing anthropic caching information

	Returns:
	Tuple[float, float] - prompt_cost_in_usd, completion_cost_in_usd
	"""
	## check if model mapped, else use default pricing
	try:
	model_info = get_model_info(model=model, custom_llm_provider="fireworks_ai")
	except Exception:
	base_model = get_base_model_for_pricing(model_name=model)

	## GET MODEL INFO
	model_info = get_model_info(
	model=base_model, custom_llm_provider="fireworks_ai"
	)

	## CALCULATE INPUT COST

	prompt_cost: float = usage["prompt_tokens"] * model_info["input_cost_per_token"]

	## CALCULATE OUTPUT COST
	completion_cost = usage["completion_tokens"] * model_info["output_cost_per_token"]

	return prompt_cost, completion_cost