magilogi
new data
cbf54c8
raw
history blame
1.3 kB
# Corrected and cleaned data
gpt4 = {
'b4bqa': 0.94921875,
'medqa_og': 0.9232804232804233,
'medqa_g2b': 0.8994708994708994,
'medmcqa_og': 0.9166666666666666,
'medmcqa_g2b': 0.8879310344827587
}
gpt4o = {
'b4bqa': 0.96484375,
'medqa_og': 0.9021164021164021,
'medqa_g2b': 0.8835978835978836,
'medmcqa_og': 0.9051724137931034,
'medmcqa_g2b': 0.8649425287356322
}
gpt35turbo = {
'b4bqa': 0.9174107142857143,
'medmcqa_og': 0.9827586206896551,
'medmcqa_g2b': 0.9770114942528736,
'medqa_og': 0.9629629629629629,
'medqa_g2b': 0.9603174603174603
}
claude_opus = {
'b4bqa': 0.921875,
'medqa_og': 0.8571428571428571,
'medqa_g2b': 0.8333333333333334,
'medmcqa_og': 0.8649425287356322,
'medmcqa_g2b': 0.7988505747126436
}
gemini_15_pro = {
'medqa_og': 0.8862433862433863,
'medqa_g2b': 0.873015873015873,
'medmcqa_og': 0.8649425287356322,
'medmcqa_g2b': 0.8247126436781609
}
gemini_pro_1 = {
'medqa_og': 0.7063492063492064,
'medqa_g2b': 0.7301587301587301,
'medmcqa_og': 0.6810344827586207,
'medmcqa_g2b': 0.7385057471264368
}
gemini_15_flash = {
'medqa_og': 0.9708994708994709,
'medqa_g2b': 0.9603174603174603,
'medmcqa_og': 0.9741379310344828,
'medmcqa_g2b': 0.9482758620689655
}