Spaces:
Running
Running
update latency to better fit use scenario
Browse files
crm-results/hf_leaderboard_latency_cost.csv
CHANGED
@@ -27,11 +27,11 @@ LLaMA 3 70B,Long,llama-3-70b-instruct,Self-host (p4d.24xlarge),243.9,67.7,High,6
|
|
27 |
LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),251.2,99.0,Medium,6.25
|
28 |
Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),248.5,8.22,Medium,4.90
|
29 |
Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),250.0,8.11,Low,4.54
|
30 |
-
SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.5,16.80,Low,
|
31 |
-
SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.7,15.50,Low,
|
32 |
-
SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),253.7,28.17,High,
|
33 |
-
SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),249.7,26.96,Medium,
|
34 |
SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),244.0,16.55,Low,3.43
|
35 |
SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),250.4,15.60,Low,3.38
|
36 |
XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,16.03,Medium,5.04
|
37 |
-
XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,11.40,Medium,4.66
|
|
|
27 |
LLaMA 3 70B,Short,llama-3-70b-instruct,Self-host (p4d.24xlarge),251.2,99.0,Medium,6.25
|
28 |
Mixtral 8x7B,Long,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),248.5,8.22,Medium,4.90
|
29 |
Mixtral 8x7B,Short,mixtral-8x7b-instruct,Self-host (p4d.24xlarge),250.0,8.11,Low,4.54
|
30 |
+
SF-TextBase 7B,Long,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.5,16.80,Low,3.50
|
31 |
+
SF-TextBase 7B,Short,CRM-TextBase-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),248.7,15.50,Low,3.45
|
32 |
+
SF-TextBase 70B,Long,TextBase-70B-8K,Self-host (p4de.24xlarge),253.7,28.17,High,7.76
|
33 |
+
SF-TextBase 70B,Short,TextBase-70B-8K,Self-host (p4de.24xlarge),249.7,26.96,Medium,7.48
|
34 |
SF-TextSum,Long,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),244.0,16.55,Low,3.43
|
35 |
SF-TextSum,Short,CRM-TSUM-7b-22k-g5 (endpoint),Self-host (g5.48xlarge),250.4,15.60,Low,3.38
|
36 |
XGen 2,Long,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,16.03,Medium,5.04
|
37 |
+
XGen 2,Short,EinsteinXgen2E4DSStreaming (endpoint),Self-host (p4de.24xlarge),250.0,11.40,Medium,4.66
|