rrr-leaderboard / test_all.csv
pasha
Tunes
eae9362
raw
history blame
13.9 kB
model,model_name,model_size,model_quant,dataset_name,dataset_split,total_tests,valid_responses,correct_responses,accuracy,avg_response_time,avg_token_count
llama3.1:8b-instruct-q4_K_M,llama3.1,8b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,123,0.9462,0.421,53.3
llama3.1:8b-instruct-q8_0,llama3.1,8b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,122,0.9385,0.619,54.5
llama3.1:8b-instruct-fp16,llama3.1,8b,fp16,evilfreelancer/rrr-benchmark,generic,130,130,126,0.9692,1.039,55.7
llama3.2:1b-instruct-q4_K_M,llama3.2,1b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,38,12,0.0923,0.451,32.9
llama3.2:1b-instruct-q8_0,llama3.2,1b,q8_0,evilfreelancer/rrr-benchmark,generic,130,20,14,0.1077,0.441,30.4
llama3.2:1b-instruct-fp16,llama3.2,1b,fp16,evilfreelancer/rrr-benchmark,generic,130,24,10,0.0769,0.492,28.4
llama3.2:3b-instruct-q4_K_M,llama3.2,3b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,121,0.9308,0.275,58.5
llama3.2:3b-instruct-q8_0,llama3.2,3b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,122,0.9385,0.376,58.8
llama3.2:3b-instruct-fp16,llama3.2,3b,fp16,evilfreelancer/rrr-benchmark,generic,130,130,127,0.9769,0.566,60.1
qwen3:8b-q4_K_M,qwen3,8b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,126,0.9692,0.547,62.6
qwen3:8b-q8_0,qwen3,8b,q8_0,evilfreelancer/rrr-benchmark,generic,130,128,126,0.9692,0.818,67.8
qwen3:8b-fp16,qwen3,8b,fp16,evilfreelancer/rrr-benchmark,generic,130,130,129,0.9923,2.279,64.8
deepseek-r1:7b-qwen-distill-q4_K_M,deepseek-r1,7b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:7b-qwen-distill-q8_0,deepseek-r1,7b,q8_0,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:7b-qwen-distill-fp16,deepseek-r1,7b,fp16,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:8b-llama-distill-q4_K_M,deepseek-r1,8b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,120,0.9231,0.539,67.6
deepseek-r1:8b-llama-distill-q8_0,deepseek-r1,8b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,124,0.9538,0.73,62.9
deepseek-r1:8b-llama-distill-fp16,deepseek-r1,8b,fp16,evilfreelancer/rrr-benchmark,generic,130,130,123,0.9462,1.239,63.9
deepseek-v2:16b-lite-chat-q4_K_M,deepseek-v2,16b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,111,0.8538,1.24,108.8
deepseek-v2:16b-lite-chat-q8_0,deepseek-v2,16b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,113,0.8692,2.284,124.3
deepseek-v2:16b-lite-chat-fp16,deepseek-v2,16b,fp16,evilfreelancer/rrr-benchmark,generic,130,130,111,0.8538,9.833,129.5
hf.co/t-tech/T-pro-it-1.0-Q4_K_M-GGUF,T-pro-it-1.0,32b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,130,128,0.9846,1.418,47.7
hf.co/t-tech/T-pro-it-1.0-Q8_0-GGUF,T-pro-it-1.0,32b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,128,0.9846,19.59,48.4
hf.co/NikolayKozloff/T-pro-it-1.0-Q2_K-GGUF,T-pro-it-1.0,32b,q2_k,evilfreelancer/rrr-benchmark,generic,130,130,128,0.9846,1.157,56.1
hf.co/mradermacher/T-lite-it-1.0-GGUF:Q4_K_M,T-lite-it-1.0,7b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
hf.co/ai-sage/GigaChat-20B-A3B-instruct-v1.5-GGUF:Q4_K_M,GigaChat-v1.5,20b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
llama3.1:8b-instruct-q4_K_M,llama3.1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,99,93,0.93,0.42,53.2
llama3.1:8b-instruct-q8_0,llama3.1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,92,0.92,0.612,53.3
llama3.1:8b-instruct-fp16,llama3.1,8b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,100,90,0.9,1.023,52.8
llama3.2:1b-instruct-q4_K_M,llama3.2,1b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,25,17,0.17,0.443,36.5
llama3.2:1b-instruct-q8_0,llama3.2,1b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,7,6,0.06,1.861,163.0
llama3.2:1b-instruct-fp16,llama3.2,1b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,12,11,0.11,0.476,34.5
llama3.2:3b-instruct-q4_K_M,llama3.2,3b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,99,89,0.89,0.299,57.8
llama3.2:3b-instruct-q8_0,llama3.2,3b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,89,0.89,0.412,59.9
llama3.2:3b-instruct-fp16,llama3.2,3b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,100,92,0.92,0.631,62.9
qwen3:8b-q4_K_M,qwen3,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,100,93,0.93,0.577,61.8
qwen3:8b-q8_0,qwen3,8b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,94,0.94,0.871,67.0
qwen3:8b-fp16,qwen3,8b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,100,93,0.93,1.426,67.6
deepseek-r1:7b-qwen-distill-q4_K_M,deepseek-r1,7b,q4_k_m,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:7b-qwen-distill-q8_0,deepseek-r1,7b,q8_0,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:7b-qwen-distill-fp16,deepseek-r1,7b,fp16,evilfreelancer/rrr-benchmark,generic,130,0,0,0.0,0,0
deepseek-r1:8b-llama-distill-q4_K_M,deepseek-r1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,100,90,0.9,0.578,67.5
deepseek-r1:8b-llama-distill-q8_0,deepseek-r1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,90,0.9,0.851,68.6
deepseek-r1:8b-llama-distill-fp16,deepseek-r1,8b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,100,92,0.92,1.285,62.4
deepseek-v2:16b-lite-chat-q4_K_M,deepseek-v2,16b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,100,78,0.78,0.647,97.2
deepseek-v2:16b-lite-chat-q8_0,deepseek-v2,16b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,88,0.88,0.896,114.1
deepseek-v2:16b-lite-chat-fp16,deepseek-v2,16b,fp16,evilfreelancer/rrr-benchmark,routes_3,100,100,85,0.85,8.552,110.9
hf.co/NikolayKozloff/T-pro-it-1.0-Q2_K-GGUF,T-pro-it-1.0,32b,q2_k,evilfreelancer/rrr-benchmark,routes_3,100,100,93,0.93,1.094,52.1
hf.co/t-tech/T-pro-it-1.0-Q4_K_M-GGUF,T-pro-it-1.0,32b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,100,93,0.93,1.44,48.5
hf.co/t-tech/T-pro-it-1.0-Q8_0-GGUF,T-pro-it-1.0,32b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,93,0.93,19.95,48.4
hf.co/mradermacher/T-lite-it-1.0-GGUF,T-lite-it-1.0,7b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,0,0,0.0,0,0
hf.co/ai-sage/GigaChat-20B-A3B-instruct-v1.5-GGUF,GigaChat-v1.5,20b,q4_k_m,evilfreelancer/rrr-benchmark,routes_3,100,0,0,0.0,0,0
deepseek-r1:8b-llama-distill-fp16,deepseek-r1,8b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,100,94,0.94,1.363,66.5
deepseek-r1:8b-llama-distill-q4_K_M,deepseek-r1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,91,0.91,0.591,68.3
deepseek-r1:8b-llama-distill-q8_0,deepseek-r1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,92,0.92,0.842,67.4
deepseek-v2:16b-lite-chat-fp16,deepseek-v2,16b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,100,86,0.86,9.394,119.6
deepseek-v2:16b-lite-chat-q4_K_M,deepseek-v2,16b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,80,0.8,0.68,106.5
deepseek-v2:16b-lite-chat-q8_0,deepseek-v2,16b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,87,0.87,0.955,123.7
hf.co/NikolayKozloff/T-pro-it-1.0-Q2_K-GGUF,T-pro-it-1.0,32b,q2_k,evilfreelancer/rrr-benchmark,routes_5,100,100,96,0.96,1.1,50.5
hf.co/t-tech/T-pro-it-1.0-Q4_K_M-GGUF,T-pro-it-1.0,32b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,97,0.97,1.428,47.4
hf.co/t-tech/T-pro-it-1.0-Q8_0-GGUF,T-pro-it-1.0,32b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,98,0.98,20.004,48.7
llama3.1:8b-instruct-fp16,llama3.1,8b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,100,95,0.95,1.145,53.8
llama3.1:8b-instruct-q4_K_M,llama3.1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,93,0.93,0.478,56.0
llama3.1:8b-instruct-q8_0,llama3.1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,95,0.95,0.681,54.8
llama3.2:1b-instruct-fp16,llama3.2,1b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,16,13,0.13,0.488,33.1
llama3.2:1b-instruct-q4_K_M,llama3.2,1b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,30,18,0.18,0.355,24.9
llama3.2:1b-instruct-q8_0,llama3.2,1b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,14,8,0.08,0.448,32.2
llama3.2:3b-instruct-fp16,llama3.2,3b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,100,92,0.92,0.622,61.4
llama3.2:3b-instruct-q4_K_M,llama3.2,3b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,90,0.9,0.306,59.5
llama3.2:3b-instruct-q8_0,llama3.2,3b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,95,0.95,0.375,53.8
qwen3:8b-fp16,qwen3,8b,fp16,evilfreelancer/rrr-benchmark,routes_5,100,100,97,0.97,1.45,67.9
qwen3:8b-q4_K_M,qwen3,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_5,100,100,97,0.97,0.585,63.2
qwen3:8b-q8_0,qwen3,8b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,96,0.96,0.852,64.7
deepseek-r1:8b-llama-distill-fp16,deepseek-r1,8b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,100,90,0.9,1.526,75.2
deepseek-r1:8b-llama-distill-q4_K_M,deepseek-r1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,100,92,0.92,0.626,73.8
deepseek-r1:8b-llama-distill-q8_0,deepseek-r1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,90,0.9,0.964,78.8
deepseek-v2:16b-lite-chat-fp16,deepseek-v2,16b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,100,81,0.81,10.05,128.2
deepseek-v2:16b-lite-chat-q4_K_M,deepseek-v2,16b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,100,75,0.75,0.744,118.5
deepseek-v2:16b-lite-chat-q8_0,deepseek-v2,16b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,83,0.83,1.026,133.1
hf.co/NikolayKozloff/T-pro-it-1.0-Q2_K-GGUF,T-pro-it-1.0,32b,q2_k,evilfreelancer/rrr-benchmark,routes_7,100,100,92,0.92,1.152,53.9
hf.co/t-tech/T-pro-it-1.0-Q4_K_M-GGUF,T-pro-it-1.0,32b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,100,94,0.94,1.469,48.7
hf.co/t-tech/T-pro-it-1.0-Q8_0-GGUF,T-pro-it-1.0,32b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,94,0.94,20.257,49.4
llama3.1:8b-instruct-fp16,llama3.1,8b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,100,91,0.91,1.199,58.8
llama3.1:8b-instruct-q4_K_M,llama3.1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,99,89,0.89,0.465,55.4
llama3.1:8b-instruct-q8_0,llama3.1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,91,0.91,0.727,59.3
llama3.2:1b-instruct-fp16,llama3.2,1b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,14,8,0.08,0.786,32.3
llama3.2:1b-instruct-q4_K_M,llama3.2,1b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,31,14,0.14,0.548,38.6
llama3.2:1b-instruct-q8_0,llama3.2,1b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,11,8,0.08,0.586,44.0
llama3.2:3b-instruct-fp16,llama3.2,3b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,100,87,0.87,0.577,64.1
llama3.2:3b-instruct-q4_K_M,llama3.2,3b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,100,89,0.89,0.29,56.0
llama3.2:3b-instruct-q8_0,llama3.2,3b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,91,0.91,0.405,59.8
qwen3:8b-fp16,qwen3,8b,fp16,evilfreelancer/rrr-benchmark,routes_7,100,100,89,0.89,1.53,74.5
qwen3:8b-q4_K_M,qwen3,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_7,100,100,90,0.9,0.649,71.5
qwen3:8b-q8_0,qwen3,8b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,91,0.91,0.904,69.5
deepseek-r1:8b-llama-distill-fp16,deepseek-r1,8b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,100,92,0.92,1.435,69.5
deepseek-r1:8b-llama-distill-q4_K_M,deepseek-r1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,100,90,0.9,0.622,72.1
deepseek-r1:8b-llama-distill-q8_0,deepseek-r1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,95,0.95,0.929,75.0
deepseek-v2:16b-lite-chat-fp16,deepseek-v2,16b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,100,81,0.81,10.405,131.0
deepseek-v2:16b-lite-chat-q4_K_M,deepseek-v2,16b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,100,75,0.75,0.776,122.1
deepseek-v2:16b-lite-chat-q8_0,deepseek-v2,16b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,82,0.82,1.005,130.2
hf.co/NikolayKozloff/T-pro-it-1.0-Q2_K-GGUF,T-pro-it-1.0,32b,q2_k,evilfreelancer/rrr-benchmark,routes_9,100,100,95,0.95,1.168,52.9
hf.co/t-tech/T-pro-it-1.0-Q4_K_M-GGUF,T-pro-it-1.0,32b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,100,95,0.95,1.498,50.0
hf.co/t-tech/T-pro-it-1.0-Q8_0-GGUF,T-pro-it-1.0,32b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,94,0.94,20.849,51.0
llama3.1:8b-instruct-fp16,llama3.1,8b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,100,92,0.92,1.256,60.3
llama3.1:8b-instruct-q4_K_M,llama3.1,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,100,89,0.89,0.46,52.9
llama3.1:8b-instruct-q8_0,llama3.1,8b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,93,0.93,0.697,55.6
llama3.2:1b-instruct-fp16,llama3.2,1b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,21,9,0.09,0.598,40.0
llama3.2:1b-instruct-q4_K_M,llama3.2,1b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,30,8,0.08,0.802,60.7
llama3.2:1b-instruct-q8_0,llama3.2,1b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,17,9,0.09,0.431,25.1
llama3.2:3b-instruct-fp16,llama3.2,3b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,99,90,0.9,0.595,58.8
llama3.2:3b-instruct-q4_K_M,llama3.2,3b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,99,83,0.83,0.303,58.7
llama3.2:3b-instruct-q8_0,llama3.2,3b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,93,0.93,0.406,60.5
qwen3:8b-fp16,qwen3,8b,fp16,evilfreelancer/rrr-benchmark,routes_9,100,100,94,0.94,1.425,68.8
qwen3:8b-q4_K_M,qwen3,8b,q4_k_m,evilfreelancer/rrr-benchmark,routes_9,100,100,95,0.95,0.604,65.2
qwen3:8b-q8_0,qwen3,8b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,95,0.95,0.895,69.2
hf.co/NikolayKozloff/ReZero-v0.1-llama-3.2-3b-it-grpo-250404-Q8_0-GGUF,ReZero-v0.1,3b,q8_0,evilfreelancer/rrr-benchmark,generic,130,130,124,0.9538,0.366,61.1
hf.co/NikolayKozloff/ReZero-v0.1-llama-3.2-3b-it-grpo-250404-Q8_0-GGUF,ReZero-v0.1,3b,q8_0,evilfreelancer/rrr-benchmark,routes_3,100,100,91,0.91,0.405,68.5
hf.co/NikolayKozloff/ReZero-v0.1-llama-3.2-3b-it-grpo-250404-Q8_0-GGUF,ReZero-v0.1,3b,q8_0,evilfreelancer/rrr-benchmark,routes_5,100,100,93,0.93,0.39,65.5
hf.co/NikolayKozloff/ReZero-v0.1-llama-3.2-3b-it-grpo-250404-Q8_0-GGUF,ReZero-v0.1,3b,q8_0,evilfreelancer/rrr-benchmark,routes_7,100,100,94,0.94,0.392,65.5
hf.co/NikolayKozloff/ReZero-v0.1-llama-3.2-3b-it-grpo-250404-Q8_0-GGUF,ReZero-v0.1,3b,q8_0,evilfreelancer/rrr-benchmark,routes_9,100,100,89,0.89,0.381,63.3