Spaces:
Running
Running
update latency to better fit use scenario
Browse files- .gitignore +1 -0
- crm-results/hf_leaderboard_latency_cost.csv +37 -37
.gitignore
CHANGED
@@ -11,3 +11,4 @@ eval-results/
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
|
|
|
11 |
eval-queue-bk/
|
12 |
eval-results-bk/
|
13 |
logs/
|
14 |
+
**/.DS_Store
|
crm-results/hf_leaderboard_latency_cost.csv
CHANGED
@@ -1,37 +1,37 @@
|
|
1 |
-
Model Name,Cost and Speed: Flavor,Version,Platform,
|
2 |
-
AI21 Jamba-Instruct,Long,,AI21,
|
3 |
-
AI21 Jamba-Instruct,Short,,AI21,
|
4 |
-
Claude 3 Haiku,Long,,Bedrock,
|
5 |
-
Claude 3 Haiku,Short,,Bedrock,
|
6 |
-
Claude 3 Opus,Long,,Bedrock,
|
7 |
-
Claude 3 Opus,Short,,Bedrock,
|
8 |
-
Cohere Command R+,Long,,Bedrock,
|
9 |
-
Cohere Command R+,Short,,Bedrock,
|
10 |
-
Cohere Command Text,Long,,Bedrock,
|
11 |
-
Cohere Command Text,Short,,Bedrock,
|
12 |
-
Gemini Pro 1.5,Long,,Google,
|
13 |
-
Gemini Pro 1.5,Short,,Google,
|
14 |
-
Gemini Pro 1,Long,,Google,
|
15 |
-
Gemini Pro 1,Short,,Google,
|
16 |
-
GPT 3.5 Turbo,Long,,OpenAI,
|
17 |
-
GPT 3.5 Turbo,Short,,OpenAI,
|
18 |
-
GPT 4 Turbo,Long,,OpenAI,
|
19 |
-
GPT 4 Turbo,Short,,OpenAI,
|
20 |
-
GPT4-o,Long,,OpenAI,
|
21 |
-
GPT4-o,Short,,OpenAI,
|
22 |
-
Mistral 7B,Long,Mistral-7B-Instruct-v0.2,Self-host (g5.48xlarge),
|
23 |
-
Mistral 7B,Short,Mistral-7B-Instruct-v0.2,Self-host (g5.48xlarge),
|
24 |
-
LLaMA 3 8B,Long,Meta-Llama-3-8B-Instruct,Self-host (g5.48xlarge),
|
25 |
-
LLaMA 3 8B,Short,Meta-Llama-3-8B-Instruct,Self-host (g5.48xlarge),
|
26 |
-
LLaMA 3 70B,Long,llama-3-70b-instruct,Self-host (p4d.24xlarge),
|
27 |
-
LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),
|
28 |
-
Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),
|
29 |
-
Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),
|
30 |
-
SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),
|
31 |
-
SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),
|
32 |
-
SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),
|
33 |
-
SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),
|
34 |
-
SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),
|
35 |
-
SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),
|
36 |
-
XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),
|
37 |
-
XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),
|
|
|
1 |
+
Model Name,Cost and Speed: Flavor,Version,Platform,Mean Output Tokens,Mean Cost per 1K Requests,Cost Band,Response Time (Sec)
|
2 |
+
AI21 Jamba-Instruct,Long,,AI21,232.9,1.6,Medium,4.0
|
3 |
+
AI21 Jamba-Instruct,Short,,AI21,243.9,0.5,Low,4.0
|
4 |
+
Claude 3 Haiku,Long,,Bedrock,236.9,1.0,Medium,2.8
|
5 |
+
Claude 3 Haiku,Short,,Bedrock,245.4,0.4,Low,2.2
|
6 |
+
Claude 3 Opus,Long,,Bedrock,242.7,61.1,High,12.2
|
7 |
+
Claude 3 Opus,Short,,Bedrock,243.2,25.4,High,8.4
|
8 |
+
Cohere Command R+,Long,,Bedrock,245.7,11.7,High,7.7
|
9 |
+
Cohere Command R+,Short,,Bedrock,249.9,5.1,High,7.1
|
10 |
+
Cohere Command Text,Long,,Bedrock,238.7,4.3,High,12.9
|
11 |
+
Cohere Command Text,Short,,Bedrock,245.6,1.1,Medium,9.6
|
12 |
+
Gemini Pro 1.5,Long,,Google,245.7,11.0,High,5.5
|
13 |
+
Gemini Pro 1.5,Short,,Google,247.5,3.3,Medium,5.4
|
14 |
+
Gemini Pro 1,Long,,Google,228.9,1.7,Medium,6.0
|
15 |
+
Gemini Pro 1,Short,,Google,247.4,0.6,Low,4.4
|
16 |
+
GPT 3.5 Turbo,Long,,OpenAI,249.9,1.6,Medium,4.5
|
17 |
+
GPT 3.5 Turbo,Short,,OpenAI,238.3,0.6,Low,4.2
|
18 |
+
GPT 4 Turbo,Long,,OpenAI,247.6,32.0,High,12.3
|
19 |
+
GPT 4 Turbo,Short,,OpenAI,250.0,11.7,High,12.3
|
20 |
+
GPT4-o,Long,,OpenAI,248.4,15.9,High,5.1
|
21 |
+
GPT4-o,Short,,OpenAI,250.0,5.8,High,5.0
|
22 |
+
Mistral 7B,Long,Mistral-7B-Instruct-v0.2,Self-host (g5.48xlarge),242.0,16.5,Low,3.74
|
23 |
+
Mistral 7B,Short,Mistral-7B-Instruct-v0.2,Self-host (g5.48xlarge),247.0,15.5,Low,3.68
|
24 |
+
LLaMA 3 8B,Long,Meta-Llama-3-8B-Instruct,Self-host (g5.48xlarge),251.5,7.0,Medium,3.42
|
25 |
+
LLaMA 3 8B,Short,Meta-Llama-3-8B-Instruct,Self-host (g5.48xlarge),243.6,6.0,Low,3.33
|
26 |
+
LLaMA 3 70B,Long,llama-3-70b-instruct,Self-host (p4d.24xlarge),243.9,67.7,High,6.78
|
27 |
+
LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),251.2,99.0,Medium,6.25
|
28 |
+
Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),248.5,8.22,Medium,4.90
|
29 |
+
Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),250.0,8.11,Low,4.54
|
30 |
+
SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.5,16.80,Low,8.99
|
31 |
+
SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.7,15.50,Low,8.29
|
32 |
+
SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),253.7,28.17,High,6.52
|
33 |
+
SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),249.7,26.96,Medium,6.24
|
34 |
+
SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),244.0,16.55,Low,3.43
|
35 |
+
SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),250.4,15.60,Low,3.38
|
36 |
+
XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,16.03,Medium,5.04
|
37 |
+
XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,11.40,Medium,4.66
|