Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -50,27 +50,30 @@ model_table = """
|
|
50 |
|
51 |
## Rubra Benchmarks
|
52 |
|
53 |
-
| Model
|
54 |
-
|
55 |
-
| GPT-4o
|
56 |
-
| Claude-3.5 Sonnet
|
57 |
-
|
|
58 |
-
|
|
59 |
-
|
|
60 |
-
|
|
61 |
-
|
|
62 |
-
|
|
63 |
-
|
|
64 |
-
|
|
65 |
-
|
|
66 |
-
|
|
67 |
-
|
|
68 |
-
|
|
69 |
-
| Llama-3
|
70 |
-
|
|
71 |
-
|
|
72 |
-
|
|
73 |
-
|
|
|
|
|
|
|
|
74 |
"""
|
75 |
|
76 |
LICENSE = """
|
|
|
50 |
|
51 |
## Rubra Benchmarks
|
52 |
|
53 |
+
| Model | Params (in billions) | Function Calling | MMLU (5-shot) | GPQA (0-shot) | GSM-8K (8-shot, CoT) | MATH (4-shot, CoT) | MT-bench |
|
54 |
+
|------------------------------------------|----------------------|------------------|---------------|---------------|----------------------|--------------------|----------|
|
55 |
+
| GPT-4o | - | 98.57% | - | 53.6 | - | - | - |
|
56 |
+
| Claude-3.5 Sonnet | - | 98.57% | 88.7 | 59.4 | - | - | - |
|
57 |
+
| Rubra Llama-3 70B Instruct | 70.6 | 97.85% | 75.90 | 33.93 | 82.26 | 34.24 | 8.36 |
|
58 |
+
| Rubra Llama-3 8B Instruct | 8.9 | 89.28% | 64.39 | 31.70 | 68.99 | 23.76 | 8.03 |
|
59 |
+
| Rubra Qwen2-7B-Instruct | 8.55 | 85.71% | 68.88 | 30.36 | 75.82 | 28.72 | 8.08 |
|
60 |
+
| Rubra Mistral 7B Instruct v0.3 | 8.12 | 73.57% | 59.12 | 29.91 | 43.29 | 11.14 | 7.69 |
|
61 |
+
| Rubra Phi-3 Mini 128k Instruct | 4.73 | 70.00% | 67.87 | 29.69 | 79.45 | 30.80 | 8.21 |
|
62 |
+
| Rubra Mistral 7B Instruct v0.2 | 8.11 | 69.28% | 58.90 | 29.91 | 34.12 | 8.36 | 7.36 |
|
63 |
+
| meetkai/functionary-small-v2.5 | 8.03 | 57.14% | 63.92 | 32.14 | 66.11 | 20.54 | 7.09 |
|
64 |
+
| Nexusflow/NexusRaven-V2-13B | 13.0 | 53.75% | 43.23 | 28.79 | 22.67 | 7.12 | 5.36 |
|
65 |
+
| Mistral Large (closed-source) | - | 48.60% | - | - | 91.21 | 45.0 | - |
|
66 |
+
| Rubra Gemma-1.1 2B Instruct | 2.84 | 45.00% | 38.85 | 24.55 | 6.14 | 2.38 | 5.75 |
|
67 |
+
| meetkai/functionary-medium-v3.0 | 70.6 | 46.43% | 79.85 | 38.39 | 89.54 | 43.02 | 5.49 |
|
68 |
+
| gorilla-llm/gorilla-openfunctions-v2 | 6.91 | 41.25% | 49.14 | 23.66 | 48.29 | 17.54 | 5.13 |
|
69 |
+
| NousResearch/Hermes-2-Pro-Llama-3-8B | 8.03 | 41.25% | 64.16 | 31.92 | 73.92 | 21.58 | 7.83 |
|
70 |
+
| Mistral 7B Instruct v0.3 | 7.25 | 22.5% | 62.10 | 30.58 | 53.07 | 12.98 | 7.50 |
|
71 |
+
| Gemma-1.1 2B Instruct | 2.51 | - | 37.84 | 22.99 | 6.29 | 6.14 | 5.82 |
|
72 |
+
| Llama-3 8B Instruct | 8.03 | - | 65.69 | 31.47 | 77.41 | 27.58 | 8.07 |
|
73 |
+
| Llama-3 70B Instruct | 70.6 | - | 79.90 | 38.17 | 90.67 | 44.24 | 8.88 |
|
74 |
+
| Mistral 7B Instruct v0.2 | 7.24 | - | 59.27 | 27.68 | 43.21 | 10.30 | 7.50 |
|
75 |
+
| Phi-3 Mini 128k Instruct | 3.82 | - | 69.36 | 27.01 | 83.7 | 32.92 | 8.02 |
|
76 |
+
| Qwen2-7B-Instruct | 7.62 | - | 70.78 | 32.14 | 78.54 | 30.10 | 8.29 |
|
77 |
"""
|
78 |
|
79 |
LICENSE = """
|