shanchen commited on
Commit
b54a4db
β€’
1 Parent(s): 5ba0e61

Update data/csv/models_data.csv

Browse files
Files changed (1) hide show
  1. data/csv/models_data.csv +23 -23
data/csv/models_data.csv CHANGED
@@ -1,27 +1,27 @@
1
  T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_g2b,medqa_4options_orig_filtered,medqa_diff
2
- πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/01-ai/Yi-1.5-34B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">01-ai-Yi-1.5-34B</a>",85.16,75.37,59.77,69.25,-9.48,59.79,64.55,-4.76
3
- πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/aaditya/Llama3-OpenBioLLM-70B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">aaditya-Llama3-OpenBioLLM-70B</a>",85.1,78.76,63.22,73.85,-10.63,70.9,75.4,-4.5
4
- πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/CohereForAI/aya-23-35B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">CohereForAI-aya-23-35B</a>",78.4,65.72,48.56,52.87,-4.31,47.88,51.06,-3.18
5
- πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/CohereForAI/c4ai-command-r-plus"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">CohereForAI-c4ai-command-r-plus</a>",84.93,72.41,49.14,61.49,-12.35,56.61,60.32,-3.71
6
- πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/johnsnowlabs/JSL-MedLlama-3-8B-v9"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">johnsnowlabs-JSL-MedLlama-3-8B-v9</a>",75.17,74.45,64.08,77.01,-12.93,70.63,82.01,-11.38
7
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-70B-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Llama-2-70B-hf</a>",77.01,65.63,45.98,52.3,-6.32,52.65,55.03,-2.38
8
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-7b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Llama-2-7b-hf</a>",36.83,36.0,33.91,34.2,-0.29,34.39,37.3,-2.91
9
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-70B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Meta-Llama-3-70B</a>",90.12,82.55,66.67,78.16,-11.49,72.75,75.13,-2.38
10
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Meta-Llama-3-8B</a>",82.7,71.21,52.87,59.2,-6.33,55.03,60.85,-5.82
11
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1_5"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1_5</a>",28.01,30.24,31.61,30.46,1.15,34.92,34.66,0.26
12
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",19.64,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
13
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",47.49,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
14
- πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",69.98,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
15
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",70.31,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
16
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",87.72,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
17
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",86.1,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
18
- πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",71.93,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
19
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",91.02,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
20
- 🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",80.41,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
21
- πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4</a>",94.92,,88.79,91.67,-2.88,89.95,92.33,-2.38
22
- πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",96.48,,86.49,90.52,-4.03,88.36,90.21,-1.85
23
- πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",91.74,,97.7,98.28,-0.58,96.03,96.3,-0.27
24
- πŸ’¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",92.19,,79.89,86.49,-6.6,83.33,85.71,-2.38
25
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Pro</a>",0.0,,82.47,86.49,-4.02,87.3,88.62,-1.32
26
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini Pro 1</a>",0.0,,73.85,68.1,5.75,73.02,70.63,2.39
27
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Flash</a>",0.0,,94.83,97.41,-2.58,96.03,97.09,-1.06
 
1
  T,Model,b4bqa,b4b,medmcqa_g2b,medmcqa_orig_filtered,medmcqa_diff,medqa_4options_g2b,medqa_4options_orig_filtered,medqa_diff
2
+ πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/01-ai/Yi-1.5-34B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">01-ai-Yi-1.5-34B</a>",97.81,75.37,59.77,69.25,-9.48,59.79,64.55,-4.76
3
+ πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/aaditya/Llama3-OpenBioLLM-70B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">aaditya-Llama3-OpenBioLLM-70B</a>",97.42,78.76,63.22,73.85,-10.63,70.9,75.4,-4.5
4
+ πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/CohereForAI/aya-23-35B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">CohereForAI-aya-23-35B</a>",95.81,65.72,48.56,52.87,-4.31,47.88,51.06,-3.18
5
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/CohereForAI/c4ai-command-r-plus"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">CohereForAI-c4ai-command-r-plus</a>",98.28,72.41,49.14,61.49,-12.35,56.61,60.32,-3.71
6
+ πŸ”Ά,"<a target=""_blank"" href=""https://huggingface.co/johnsnowlabs/JSL-MedLlama-3-8B-v9"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">johnsnowlabs-JSL-MedLlama-3-8B-v9</a>",93.41,74.45,64.08,77.01,-12.93,70.63,82.01,-11.38
7
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-70B-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Llama-2-70B-hf</a>",93.61,65.63,45.98,52.3,-6.32,52.65,55.03,-2.38
8
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Llama-2-7b-hf"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Llama-2-7b-hf</a>",68.61,36.0,33.91,34.2,-0.29,34.39,37.3,-2.91
9
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-70B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Meta-Llama-3-70B</a>",98.57,82.55,66.67,78.16,-11.49,72.75,75.13,-2.38
10
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/meta-llama/Meta-Llama-3-8B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">meta-llama-Meta-Llama-3-8B</a>",96.18,71.21,52.87,59.2,-6.33,55.03,60.85,-5.82
11
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1_5"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1_5</a>",37.88,30.24,31.61,30.46,1.15,34.92,34.66,0.26
12
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-1</a>",25.76,21.18,24.14,25.86,-1.72,21.69,20.9,0.79
13
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/microsoft/phi-2"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-phi-2</a>",71.66,44.79,37.64,42.24,-4.6,41.8,43.92,-2.12
14
+ πŸ’¬,"<a target=""_blank"" href=""https://huggingface.co/microsoft/Phi-3-medium-4k-instruct"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">microsoft-Phi-3-medium-4k-instruct</a>",87.12,65.94,60.34,72.41,-12.07,53.44,58.47,-5.03
15
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mistral-7B-v0.3"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mistral-7B-v0.3</a>",90.65,61.99,48.28,56.9,-8.62,48.68,53.17,-4.49
16
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x22B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x22B-v0.1</a>",98.66,78.82,61.78,70.4,-8.62,67.46,71.43,-3.97
17
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/mistralai/Mixtral-8x7B-v0.1"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">mistralai-Mixtral-8x7B-v0.1</a>",97.14,74.75,55.46,64.94,-9.48,60.05,62.43,-2.38
18
+ οΏ½οΏ½οΏ½,"<a target=""_blank"" href=""https://huggingface.co/ProbeMedicalYonseiMAILab/medllama3-v20"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">ProbeMedicalYonseiMAILab-medllama3-v20</a>",92.46,74.75,65.23,80.17,-14.94,76.46,90.21,-13.75
19
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-72B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-72B</a>",99.05,83.72,71.55,77.87,-6.32,74.07,75.4,-1.33
20
+ 🟒,"<a target=""_blank"" href=""https://huggingface.co/Qwen/Qwen2-7B"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Qwen-Qwen2-7B</a>",93.13,70.28,55.17,63.51,-8.34,53.7,58.99,-5.29
21
+ πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4</a>",99.71,,88.79,91.67,-2.88,89.95,92.33,-2.38
22
+ πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-4o</a>",99.61,,86.49,90.52,-4.03,88.36,90.21,-1.85
23
+ πŸ’¬,"<a target=""_blank"" href=""https://platform.openai.com/docs/models"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">GPT-3.5 Turbo</a>",99.43,,97.7,98.28,-0.58,96.03,96.3,-0.27
24
+ πŸ’¬,"<a target=""_blank"" href=""https://www.anthropic.com/api"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Claude Opus</a>",99.14,,79.89,86.49,-6.6,83.33,85.71,-2.38
25
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Pro</a>",0.0,,82.47,86.49,-4.02,87.3,88.62,-1.32
26
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini Pro 1</a>",0.0,,73.85,68.1,5.75,73.02,70.63,2.39
27
  πŸ’¬,"<a target=""_blank"" href=""https://ai.google.dev/"" style=""color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;"">Gemini 1.5 Flash</a>",0.0,,94.83,97.41,-2.58,96.03,97.09,-1.06