sanjay920 commited on
Commit
b2adbbf
1 Parent(s): d2ecff3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -21
app.py CHANGED
@@ -50,27 +50,30 @@ model_table = """
50
 
51
  ## Rubra Benchmarks
52
 
53
- | Model | Params (in billions) | Function Calling | MMLU (5-shot) | GPQA (0-shot) | GSM-8K (8-shot, CoT) | MATH (4-shot, CoT) | MT-bench |
54
- |----------------------------------|----------------------|------------------|---------------|---------------|----------------------|--------------------|----------|
55
- | GPT-4o | - | 98.57% | - | 53.6 | - | - | - |
56
- | Claude-3.5 Sonnet | - | 98.57% | 88.7 | 59.4 | - | - | - |
57
- | [**Rubra Llama-3 70B Instruct**](https://huggingface.co/rubra-ai/Meta-Llama-3-70B-Instruct) | 70.6 | 97.85% | 75.90 | 33.93 | 82.26 | 34.24 | 8.36 |
58
- | [**Rubra Llama-3 8B Instruct**](https://huggingface.co/rubra-ai/Meta-Llama-3-8B-Instruct) | 8.9 | 89.28% | 64.39 | 31.70 | 68.99 | 23.76 | 8.03 |
59
- | [**Rubra Qwen2 7B Instruct**](https://huggingface.co/rubra-ai/Qwen2-7B-Instruct) | 8.55 | 85.71% | 68.88 | 30.36 | 75.82 | 28.72 | 8.08 |
60
- | [**Rubra Mistral 7B Instruct v0.3**](https://huggingface.co/rubra-ai/Mistral-7B-Instruct-v0.3) | 8.12 | 73.57% | 59.12 | 29.91 | 43.29 | 11.14 | 7.69 |
61
- | [**Rubra Phi-3 Mini 128k Instruct**](https://huggingface.co/rubra-ai/Phi-3-mini-128k-instruct) | 4.27 | 65.71% | 66.66 | 29.24 | 74.09 | 26.84 | 7.45 |
62
- | [**Rubra Mistral 7B Instruct v0.2**](https://huggingface.co/rubra-ai/Mistral-7B-Instruct-v0.2) | 8.11 | 69.28% | 58.90 | 29.91 | 34.12 | 8.36 | 7.36 |
63
- | Nexusflow/NexusRaven-V2-13B | 13.0 | 53.75%| 43.23 | 28.79 | 22.67 | 7.12 | 5.36 |
64
- | [**Rubra Gemma-1.1 2B Instruct**](https://huggingface.co/rubra-ai/gemma-1.1-2b-it) | 2.84 | 45.00% | 38.85 | 24.55 | 6.14 | 2.38 | 5.75 |
65
- | NousResearch/Hermes-2-Pro-Llama-3-8B | 8.03 | 41.25% | 64.16 | 31.92 | 73.92 | 21.58 | 7.83 |
66
- | gorilla-llm/gorilla-openfunctions-v2 | 6.91 | 41.25%| 49.14 | 23.66 | 48.29 | 17.54 | 5.13 |
67
- | Mistral 7B Instruct v0.3 | 7.25 | 22.5% | 62.10 | 30.58 | 53.07 | 12.98 | 7.50 |
68
- | Qwen2-7B-Instruct | 7.62 | - | 70.78 | 32.14 | 78.54 | 30.10 | 8.29 |
69
- | Llama-3 70B Instruct | 70.6 | - | 79.90 | 38.17 | 90.67 | 44.24 | 8.88 |
70
- | Llama-3 8B Instruct | 8.03 | - | 65.69 | 31.47 | 77.41 | 27.58 | 8.07 |
71
- | Mistral 7B Instruct v0.2 | 7.24 | - | 59.27 | 27.68 | 43.21 | 10.30 | 7.50 |
72
- | Phi-3 Mini 128k Instruct | 3.82 | - | 68.17 | 30.58 | 80.44 | 28.12 | 7.92 |
73
- | Gemma-1.1 2B Instruct | 2.51 | - | 37.84 | 22.99 | 6.29 | 6.14 | 5.82 |
 
 
 
74
  """
75
 
76
  LICENSE = """
 
50
 
51
  ## Rubra Benchmarks
52
 
53
+ | Model | Params (in billions) | Function Calling | MMLU (5-shot) | GPQA (0-shot) | GSM-8K (8-shot, CoT) | MATH (4-shot, CoT) | MT-bench |
54
+ |------------------------------------------|----------------------|------------------|---------------|---------------|----------------------|--------------------|----------|
55
+ | GPT-4o | - | 98.57% | - | 53.6 | - | - | - |
56
+ | Claude-3.5 Sonnet | - | 98.57% | 88.7 | 59.4 | - | - | - |
57
+ | Rubra Llama-3 70B Instruct | 70.6 | 97.85% | 75.90 | 33.93 | 82.26 | 34.24 | 8.36 |
58
+ | Rubra Llama-3 8B Instruct | 8.9 | 89.28% | 64.39 | 31.70 | 68.99 | 23.76 | 8.03 |
59
+ | Rubra Qwen2-7B-Instruct | 8.55 | 85.71% | 68.88 | 30.36 | 75.82 | 28.72 | 8.08 |
60
+ | Rubra Mistral 7B Instruct v0.3 | 8.12 | 73.57% | 59.12 | 29.91 | 43.29 | 11.14 | 7.69 |
61
+ | Rubra Phi-3 Mini 128k Instruct | 4.73 | 70.00% | 67.87 | 29.69 | 79.45 | 30.80 | 8.21 |
62
+ | Rubra Mistral 7B Instruct v0.2 | 8.11 | 69.28% | 58.90 | 29.91 | 34.12 | 8.36 | 7.36 |
63
+ | meetkai/functionary-small-v2.5 | 8.03 | 57.14% | 63.92 | 32.14 | 66.11 | 20.54 | 7.09 |
64
+ | Nexusflow/NexusRaven-V2-13B | 13.0 | 53.75% | 43.23 | 28.79 | 22.67 | 7.12 | 5.36 |
65
+ | Mistral Large (closed-source) | - | 48.60% | - | - | 91.21 | 45.0 | - |
66
+ | Rubra Gemma-1.1 2B Instruct | 2.84 | 45.00% | 38.85 | 24.55 | 6.14 | 2.38 | 5.75 |
67
+ | meetkai/functionary-medium-v3.0 | 70.6 | 46.43% | 79.85 | 38.39 | 89.54 | 43.02 | 5.49 |
68
+ | gorilla-llm/gorilla-openfunctions-v2 | 6.91 | 41.25% | 49.14 | 23.66 | 48.29 | 17.54 | 5.13 |
69
+ | NousResearch/Hermes-2-Pro-Llama-3-8B | 8.03 | 41.25% | 64.16 | 31.92 | 73.92 | 21.58 | 7.83 |
70
+ | Mistral 7B Instruct v0.3 | 7.25 | 22.5% | 62.10 | 30.58 | 53.07 | 12.98 | 7.50 |
71
+ | Gemma-1.1 2B Instruct | 2.51 | - | 37.84 | 22.99 | 6.29 | 6.14 | 5.82 |
72
+ | Llama-3 8B Instruct | 8.03 | - | 65.69 | 31.47 | 77.41 | 27.58 | 8.07 |
73
+ | Llama-3 70B Instruct | 70.6 | - | 79.90 | 38.17 | 90.67 | 44.24 | 8.88 |
74
+ | Mistral 7B Instruct v0.2 | 7.24 | - | 59.27 | 27.68 | 43.21 | 10.30 | 7.50 |
75
+ | Phi-3 Mini 128k Instruct | 3.82 | - | 69.36 | 27.01 | 83.7 | 32.92 | 8.02 |
76
+ | Qwen2-7B-Instruct | 7.62 | - | 70.78 | 32.14 | 78.54 | 30.10 | 8.29 |
77
  """
78
 
79
  LICENSE = """