effibench-leaderboard / leaderboard_table_20240525.csv
DONG19's picture
update
eaf182c
raw
history blame
10.3 kB
model,Dataset,Timeout,ET,NET,Max_NET,NET>5,MU,NMU,Max_NMU,NMU>5,TMU,NTMU,Max_TMU,TMU>5,pass1
Artigenz-Coder-DS-6.7B,EffiBench,10,0.65,4.86,97.02,6.0,71.74,1.85,317.51,0.8,207.87,25.6,27332.76,6.3,38.2
claude-3-haiku_1,EffiBench,10,0.48,3.69,100.28,3.6,38.33,1.07,40.12,0.5,32.52,4.85,3524.68,3.8,44.3
claude-3-sonnet,EffiBench,10,0.7,5.19,100.31,5.7,73.52,1.97,539.82,0.4,248.98,33.08,48799.93,5.7,45.5
CodeFuse-DeepSeek-33B,EffiBench,10,0.48,3.74,96.19,3.3,80.44,2.3,259.7,1.0,305.94,48.22,28532.0,3.3,30.1
codegemma-7b-it,EffiBench,10,1.32,10.04,100.45,12.8,406.94,11.92,910.63,2.7,1772.13,327.14,59405.83,13.4,18.7
codegemma-7b,EffiBench,10,0.77,6.36,93.87,6.6,39.37,1.22,43.21,0.7,62.26,14.93,1286.75,6.6,13.7
CodeLlama-7b-hf,EffiBench,10,5.25,46.68,96.38,52.4,336.22,12.41,248.66,9.5,1617.73,800.47,17563.1,52.4,2.2
CodeLlama-7b-Instruct-hf,EffiBench,10,3.33,21.97,98.15,33.3,63.96,1.37,48.16,1.4,191.57,14.99,3564.4,33.3,7.0
CodeLlama-7b-Python-hf,EffiBench,10,2.4,17.68,97.84,22.6,114.89,2.86,207.42,6.0,449.52,56.99,13685.67,25.0,8.6
CodeLlama-13b-hf,EffiBench,10,5.8,36.46,95.26,57.7,127.59,2.44,67.15,7.7,531.94,31.1,4600.56,57.7,2.6
CodeLlama-13b-Instruct-hf,EffiBench,10,1.49,11.17,98.89,14.7,117.34,2.87,304.16,1.1,608.26,68.3,30376.45,14.7,9.7
CodeLlama-13b-Python-hf,EffiBench,10,1.77,12.33,94.58,15.7,90.72,2.22,134.59,2.6,277.59,31.52,8324.75,15.7,11.5
CodeLlama-34b-hf,EffiBench,10,2.07,16.45,98.57,19.0,61.34,1.81,44.34,3.8,201.6,35.33,3160.63,19.0,10.5
CodeLlama-34b-Instruct-hf,EffiBench,10,1.96,14.26,97.48,18.5,99.05,2.46,148.0,3.7,458.56,55.67,13278.69,19.3,13.5
CodeLlama-34b-Python-hf,EffiBench,10,1.61,11.76,97.37,20.5,41.49,1.0,1.39,0.0,88.52,9.55,162.73,16.4,12.2
CodeLlama-70b-hf,EffiBench,10,1.75,13.4,97.57,45.1,39.96,1.07,11.87,1.0,61.82,9.15,745.81,26.5,10.2
CodeLlama-70b-Instruct-hf,EffiBench,10,1.28,10.49,94.94,56.4,109.05,3.56,259.78,1.3,361.24,106.76,16667.78,34.6,7.8
deepseek-coder-1.3b-instruct,EffiBench,10,1.09,8.57,94.8,35.4,38.38,1.13,9.69,2.1,43.81,10.14,687.67,25.0,4.8
deepseek-coder-6.7b-base,EffiBench,10,1.35,10.11,97.9,46.6,40.14,1.04,14.57,0.6,50.94,6.13,1107.79,29.2,17.9
deepseek-coder-6.7b-instruct,EffiBench,10,1.24,7.83,92.04,62.5,77.28,1.45,9.53,1.4,181.46,12.18,183.82,47.2,7.2
deepseek-coder-33b-base,EffiBench,10,1.36,11.02,95.37,63.1,98.3,2.99,259.02,3.2,302.75,55.15,16479.52,53.2,25.2
gpt-3.5-turbo-0301,EffiBench,10,1.18,8.92,93.26,64.2,77.19,2.08,336.7,0.9,238.96,32.52,24636.2,56.3,44.0
gpt-3.5-turbo-0613,EffiBench,10,1.19,9.08,93.69,68.5,41.64,1.15,111.14,0.2,74.45,10.61,8009.72,63.7,48.2
gpt-3.5-turbo-1106,EffiBench,10,1.08,8.3,83.23,72.1,36.04,1.01,7.76,0.2,45.23,6.64,255.16,67.1,50.2
gpt-4,EffiBench,10,1.18,8.89,91.94,73.5,68.52,1.84,246.91,1.3,193.6,23.12,15521.36,69.3,52.5
gpt-4-turbo-preview,EffiBench,10,1.12,8.53,94.13,77.2,35.07,1.0,7.78,0.2,46.75,6.58,141.19,73.9,66.6
Magicoder-S-DS-6.7B,EffiBench,10,1.07,8.19,90.3,79.3,38.23,1.03,15.74,0.5,47.72,6.64,953.97,76.6,36.8
Mistral-7B-codealpaca-lora,EffiBench,10,3.19,27.73,94.55,77.8,27.35,1.03,2.22,0.0,72.67,36.67,142.55,72.2,3.6
octocoder,EffiBench,10,3.62,30.38,92.39,83.3,31.49,1.0,1.01,0.0,82.33,32.71,139.5,50.0,0.6
OpenCodeInterpreter-DS-1.3B,EffiBench,10,1.84,11.01,90.12,73.3,59.27,1.03,3.13,0.0,175.01,6.84,318.38,68.3,6.2
OpenCodeInterpreter-DS-6.7B,EffiBench,10,1.4,10.22,95.3,82.5,39.89,1.0,1.38,0.0,60.03,7.11,140.4,78.1,13.7
OpenCodeInterpreter-DS-33B,EffiBench,10,1.41,10.23,91.03,77.8,39.06,1.0,2.3,0.0,59.67,5.93,136.37,73.8,24.9
Phind-CodeLlama-34B-Python-v1,EffiBench,10,2.21,14.62,91.62,77.5,76.64,1.58,153.3,1.3,271.78,19.87,10583.35,74.8,15.1
Phind-CodeLlama-34B-v1,EffiBench,10,2.25,16.26,94.3,83.3,158.79,3.78,215.99,5.8,588.07,61.3,16601.89,81.2,13.8
Phind-CodeLlama-34B-v2,EffiBench,10,1.85,12.26,90.88,85.0,97.79,1.99,148.92,2.9,350.9,24.91,9532.94,82.1,20.9
speechless-starcoder2-15b,EffiBench,10,2.09,19.65,82.35,83.3,26.27,1.0,1.01,0.0,45.12,25.67,127.38,83.3,0.6
starcoder,EffiBench,10,3.3,23.31,94.42,85.4,39.96,1.08,5.5,2.1,104.5,14.65,138.13,85.4,4.8
starcoder2-3b,EffiBench,10,2.82,13.29,85.73,94.1,83.53,1.12,7.03,5.9,288.56,5.3,379.23,88.2,1.7
starcoder2-7b,EffiBench,10,2.31,21.74,86.4,100.0,29.46,1.2,4.79,0.0,66.1,38.95,276.52,100.0,1.9
starcoder2-15b,EffiBench,10,3.43,17.46,86.44,100.0,93.19,1.09,3.71,0.0,282.84,10.2,392.53,100.0,0.9
starcoderbase,EffiBench,10,3.93,23.75,96.02,90.3,196.64,4.57,152.91,12.9,764.66,78.21,8540.21,87.1,3.2
WizardCoder-15B-V1.0,EffiBench,10,3.11,22.17,92.78,87.2,168.1,3.74,139.88,7.7,821.23,86.88,9659.92,87.2,4.0
WizardCoder-Python-13B-V1.0-GPTQ,EffiBench,10,2.83,15.33,93.53,95.0,93.92,1.75,66.6,2.5,334.38,23.03,4540.18,90.0,4.0
XwinCoder-13B,EffiBench,10,2.81,23.55,93.76,91.4,53.97,1.66,83.49,1.9,177.3,37.61,4347.14,88.6,10.8
XwinCoder-34B,EffiBench,10,2.23,16.54,96.23,89.3,42.92,1.16,26.85,1.9,103.16,11.58,1352.04,88.8,21.5
Yi-34B-200K,EffiBench,10,2.84,25.52,93.66,91.5,73.99,2.79,74.99,4.3,288.27,149.95,5222.09,91.5,4.7
Yi-34B-Chat,EffiBench,10,2.37,12.72,90.75,90.9,67.5,1.0,1.02,0.0,202.05,5.59,135.56,87.9,3.3
Yi-34B,EffiBench,10,4.21,24.62,93.25,95.3,135.57,2.18,133.09,2.3,540.82,23.26,8138.38,93.0,4.3
Artigenz-Coder-DS-6.7B,HumanEval,10,2.66,11.38,54.23,91.5,64.1,1.1,3.5,0.0,123.54,15.89,240.56,100.0,96.3
CodeFuse-DeepSeek-33B,HumanEval,10,2.49,10.64,52.69,93.8,64.03,1.1,3.51,0.0,112.73,14.54,228.11,100.0,95.1
CodeLlama-7b-Instruct-hf,HumanEval,10,2.22,9.39,43.51,95.2,65.04,1.14,3.51,0.0,99.06,12.45,187.92,95.2,48.8
CodeLlama-13b-Instruct-hf,HumanEval,10,2.48,10.32,15.87,91.7,63.61,1.06,1.14,0.0,111.7,13.91,20.63,91.7,68.9
CodeLlama-34b-hf,HumanEval,10,2.57,11.27,61.03,92.5,63.94,1.11,3.51,0.0,113.78,15.19,229.79,100.0,82.3
CodeLlama-34b-Instruct-hf,HumanEval,10,2.4,10.62,52.72,97.4,64.48,1.12,3.5,0.0,109.61,14.8,218.12,100.0,76.8
CodeLlama-70b-Instruct-hf,HumanEval,10,1.46,6.48,9.57,90.9,63.52,1.07,1.09,0.0,61.2,8.54,12.03,90.9,18.3
deepseek-coder-1.3b-instruct,HumanEval,10,1.55,6.81,8.65,90.0,64.33,1.06,1.09,0.0,65.9,8.98,12.83,90.0,22.6
deepseek-coder-6.7b-instruct,HumanEval,10,1.8,7.71,31.36,100.0,64.32,1.21,3.39,0.0,79.09,10.71,129.23,100.0,21.3
Magicoder-S-DS-6.7B,HumanEval,10,2.66,11.24,61.6,91.7,63.94,1.09,3.49,0.0,119.65,15.25,234.52,100.0,95.7
Mistral-7B-codealpaca-lora,HumanEval,10,2.4,10.38,52.99,93.0,64.05,1.11,3.5,0.0,113.59,14.82,233.85,93.0,89.0
OpenCodeInterpreter-DS-1.3B,HumanEval,10,1.9,8.2,36.57,95.9,63.67,1.1,3.39,0.0,85.06,11.06,153.99,100.0,40.2
OpenCodeInterpreter-DS-6.7B,HumanEval,10,2.18,9.71,48.95,96.2,62.91,1.11,3.51,0.0,95.65,13.29,210.58,96.2,63.4
OpenCodeInterpreter-DS-33B,HumanEval,10,2.57,11.07,57.58,91.5,64.05,1.1,3.51,0.0,114.67,14.89,248.48,100.0,89.0
Phind-CodeLlama-34B-v2,HumanEval,10,2.34,10.08,47.35,95.2,63.63,1.11,3.53,0.0,102.55,13.48,212.01,100.0,81.7
speechless-starcoder2-15b,HumanEval,10,1.35,5.93,7.07,83.3,62.26,1.06,1.08,0.0,57.92,7.88,9.53,83.3,10.4
starcoder2-3b,HumanEval,10,1.72,6.59,10.08,88.9,64.02,1.06,1.09,0.0,76.79,8.79,13.0,88.9,23.2
starcoder2-7b,HumanEval,10,1.55,6.04,8.99,75.0,66.69,1.06,1.08,0.0,67.85,7.5,10.61,100.0,9.8
starcoder2-15b,HumanEval,10,1.88,6.99,6.99,100.0,70.76,1.08,1.08,0.0,88.23,8.8,8.8,100.0,4.9
WizardCoder-Python-7B-V1.0-GPTQ,HumanEval,10,2.56,11.09,18.94,100.0,63.75,1.08,1.73,0.0,114.61,15.23,22.51,100.0,64.0
WizardCoder-Python-13B-V1.0-GPTQ,HumanEval,10,1.78,7.34,10.8,93.3,62.65,1.09,1.73,0.0,77.92,9.88,14.21,93.3,31.7
WizardCoder-Python-34B-V1.0-GPTQ,HumanEval,10,2.47,10.57,17.08,96.7,63.71,1.08,1.73,0.0,109.55,14.37,21.73,96.7,68.3
XwinCoder-7B,HumanEval,10,2.7,11.63,56.15,92.0,64.03,1.1,3.5,0.0,122.6,15.95,227.69,98.0,100.0
XwinCoder-13B,HumanEval,10,2.56,11.02,54.75,91.7,64.04,1.1,3.51,0.0,114.46,14.9,250.79,95.8,93.3
XwinCoder-34B,HumanEval,10,2.61,11.23,59.91,92.0,63.91,1.1,3.51,0.0,116.84,15.2,233.53,100.0,95.7
Artigenz-Coder-DS-6.7B,MBPP,10,2.7,9.02,43.11,98.7,67.65,1.0,1.1,0.0,132.95,3.51,70.75,98.7,63.2
CodeFuse-DeepSeek-33B,MBPP,10,2.96,9.74,48.61,98.2,62.39,1.07,1.1,0.0,129.82,10.57,87.21,98.2,59.0
CodeLlama-7b-Instruct-hf,MBPP,10,1.97,8.94,38.89,100.0,61.76,1.06,1.1,0.0,81.83,11.07,53.15,100.0,13.8
CodeLlama-13b-Instruct-hf,MBPP,10,2.12,8.38,43.06,95.6,61.22,1.05,1.15,0.0,90.76,9.8,75.23,95.6,24.1
CodeLlama-34b-hf,MBPP,10,2.55,8.39,46.86,97.4,61.68,1.06,1.41,0.0,109.46,8.94,80.83,97.4,30.2
CodeLlama-34b-Instruct-hf,MBPP,10,2.76,7.81,40.61,97.0,62.16,1.06,2.61,0.0,118.78,7.91,96.77,96.3,35.4
CodeLlama-70b-Instruct-hf,MBPP,10,1.54,7.08,9.8,94.6,62.74,1.06,1.09,0.0,62.97,8.62,13.32,97.3,9.8
deepseek-coder-1.3b-instruct,MBPP,10,2.28,6.25,48.07,93.8,131.54,2.25,38.52,3.1,397.41,25.13,1240.85,96.9,8.5
deepseek-coder-6.7b-instruct,MBPP,10,1.92,9.2,49.89,100.0,62.47,1.07,1.1,0.0,86.58,12.59,93.37,100.0,4.8
Magicoder-S-DS-6.7B,MBPP,10,2.68,9.13,44.19,97.4,62.48,1.06,1.09,0.0,114.14,9.22,72.37,97.4,60.1
Mistral-7B-codealpaca-lora,MBPP,10,1.99,8.36,14.71,98.9,62.33,1.07,1.1,0.0,83.74,10.0,20.28,98.9,23.5
OpenCodeInterpreter-DS-1.3B,MBPP,10,2.16,7.3,39.51,96.6,62.74,1.07,1.98,0.0,93.47,8.0,52.95,96.6,23.3
OpenCodeInterpreter-DS-6.7B,MBPP,10,2.27,9.46,31.76,98.6,62.37,1.07,1.09,0.0,93.45,11.03,52.63,98.6,37.8
OpenCodeInterpreter-DS-33B,MBPP,10,2.71,8.88,48.27,98.5,62.66,1.06,1.41,0.0,117.53,8.91,85.46,98.5,52.6
Phind-CodeLlama-34B-v2,MBPP,10,2.64,8.66,42.58,98.5,62.4,1.06,1.1,0.0,113.38,8.67,71.13,98.5,52.6
starcoder2-3b,MBPP,10,1.31,6.28,9.77,66.7,57.97,0.99,1.15,0.0,51.05,7.52,13.58,72.2,4.8
starcoder2-7b,MBPP,10,2.34,11.72,45.21,50.0,49.07,0.84,1.07,0.0,98.86,15.22,63.59,50.0,1.6
starcoder2-15b,MBPP,10,1.59,8.7,8.7,100.0,62.45,1.08,1.08,0.0,60.54,10.25,10.25,100.0,0.3
WizardCoder-Python-7B-V1.0-GPTQ,MBPP,10,1.76,8.29,13.73,100.0,62.26,1.06,1.07,0.0,70.39,10.05,16.64,100.0,3.4
WizardCoder-Python-13B-V1.0-GPTQ,MBPP,10,2.21,4.43,49.49,93.4,62.05,1.06,1.1,0.0,98.26,4.15,91.41,93.4,16.1
WizardCoder-Python-34B-V1.0-GPTQ,MBPP,10,2.68,6.79,36.35,88.5,61.04,1.04,1.08,0.0,122.51,7.01,64.28,88.5,6.9
XwinCoder-7B,MBPP,10,2.56,12.82,13.49,100.0,62.39,1.06,1.07,0.0,109.08,16.68,17.88,100.0,0.8
XwinCoder-13B,MBPP,10,2.88,11.31,45.38,98.1,62.35,1.07,1.08,0.0,125.2,13.29,83.29,98.1,41.5
XwinCoder-34B,MBPP,10,2.73,10.38,46.05,98.6,62.29,1.07,1.1,0.0,116.42,11.76,78.19,99.1,58.2