File size: 3,116 Bytes
c5a9402 d7c13d5 3c5cb2c d7c13d5 3c5cb2c d7c13d5 3c5cb2c d7c13d5 3c5cb2c d7c13d5 3c5cb2c 6e42a29 c5a9402 3c5cb2c 6e42a29 c5a9402 3c5cb2c 6e42a29 c5a9402 3c5cb2c 6e42a29 af413b3 c5a9402 88aceaf c5a9402 5931289 c5a9402 6e42a29 88aceaf 9e1a125 88aceaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# aistudio gemini free, Rate Limits https://ai.google.dev/pricing#1_5pro
# gemini-1.5-pro: 2 rpm, 32,000 tpm, 1,500 RPD
# gemini-1.5-flash: 15 rpm, 1,000,000 tpm, 50 RPD
model_list:
# northflank
- model_name: gpt-4o-mini-northflank
litellm_params:
model: openai/gpt-4o-mini
api_base: https://yc--northflank-duckapi--b69bn8cbbs7k.code.run/v1
api_key: os.environ/NF_API_KEY
- model_name: gpt-4o-mini
litellm_params:
model: openai/gpt-4o-mini
api_base: https://yc--northflank-duckapi--b69bn8cbbs7k.code.run/v1
api_key: os.environ/NF_API_KEY
- model_name: gpt-3.5-turbo-northflank
litellm_params:
model: openai/gpt-3.5-turbo
api_base: https://yc--northflank-duckapi--b69bn8cbbs7k.code.run/v1
api_key: os.environ/NF_API_KEY
- model_name: gpt-3.5-turbo
litellm_params:
model: openai/gpt-3.5-turbo
api_base: https://yc--northflank-duckapi--b69bn8cbbs7k.code.run/v1
api_key: os.environ/NF_API_KEY
- model_name: gemini-1.5-pro-k1
litellm_params:
model: gemini/gemini-1.5-pro
api_key: os.environ/GEMINI_API_KEY1
- model_name: gemini-1.5-pro
litellm_params:
model: gemini/gemini-1.5-pro
api_key: os.environ/GEMINI_API_KEY1
rpm: 4 # 2 * # of keys
tpm: 6400 # 32,000 * # of keys
- model_name: gemini-1.5-flash-k1
litellm_params:
model: gemini/gemini-1.5-flash
api_key: os.environ/GEMINI_API_KEY1
- model_name: gemini-1.5-flash
litellm_params:
model: gemini/gemini-1.5-flash
api_key: os.environ/GEMINI_API_KEY1
rpm: 30 # 15 * # of keys
tpm: 2000000 # 1,000,000 * # of keys
- model_name: gemini-1.5-pro-k2
litellm_params:
model: gemini/gemini-1.5-pro
api_key: os.environ/GEMINI_API_KEY2
- model_name: gemini-1.5-pro
litellm_params:
model: gemini/gemini-1.5-pro
api_key: os.environ/GEMINI_API_KEY
rpm: 4 # 2 * # of keys
tpm: 6400 # 32,000 * # of keys
- model_name: gemini-1.5-flash-k2
litellm_params:
model: gemini/gemini-1.5-flash
api_key: os.environ/GEMINI_API_KEY2
- model_name: gemini-1.5-flash
litellm_params:
model: gemini/gemini-1.5-flash
api_key: os.environ/GEMINI_API_KEY2
rpm: 30 # 15 * # of keys
tpm: 2000000 # 1,000,000 * # of keys
litellm_settings:
# Networking settings
request_timeout: 20 # (int) llm request timeout in seconds. Raise Timeout error if call takes longer than 10s. Sets litellm.request_timeout
num_retries: 3
fallbacks: [{"gemini-1.5-pro": ["gemini-1.5-flash"]}]
allowed_fails: 3 # cooldown model if it fails > 1 call in a minute.
cooldown_time: 30 # how long to cooldown model if fails/min > allowed_fails
drop_params: true
general_settings:
master_key: os.environ/MASTER_KEY # sk-1234 # [OPTIONAL] Only use this if you require all calls to contain this key (Authorization: Bearer sk-1234)
router_settings:
fallbacks: [{"gpt-4o-mini": ["gemini-1.5-flash"]}, {"gpt-3.5-turbo": ["gemini-1.5-flash"]}]
model_group_alias: {"gpt-4": "gemini-1.5-pro"}
routing_strategy: simple-shuffle |