catalog-extract / data /evaluation_results.csv
RobertoBarrosoLuque
Add Qwen 3 8B TO vizz
474491f
,model,category,accuracy,precision,recall,num_samples
0,Qwen2-VL-72B-BASE,masterCategory,0.968968968968969,0.9711267688093788,0.968968968968969,999
1,Qwen2-VL-72B-BASE,gender,0.7607607607607607,0.9354341592843324,0.7607607607607607,999
2,Qwen2-VL-72B-BASE,subCategory,0.3413413413413413,0.6784829173652965,0.3413413413413413,999
3,Qwen2-VL-72B-SFT,masterCategory,0.993993993993994,0.9940108529582212,0.993993993993994,999
4,Qwen2-VL-72B-SFT,gender,0.9169169169169168,0.9144956029794004,0.9169169169169168,999
5,Qwen2-VL-72B-SFT,subCategory,0.941941941941942,0.951274349522218,0.941941941941942,999
12,Qwen3-VL-8B-BASE,masterCategory,0.962,0.9629406792952264,0.962,1000
13,Qwen3-VL-8B-BASE,gender,0.562,0.9290041404605954,0.562,1000
14,Qwen3-VL-8B-BASE,subCategory,0.282,0.6802663471772906,0.282,1000
15,Qwen3-VL-8B-SFT,masterCategory,0.9749498997995992,0.9758055299979992,0.9749498997995992,998
16,Qwen3-VL-8B-SFT,gender,0.8617234468937875,0.8933007948085799,0.8617234468937875,998
17,Qwen3-VL-8B-SFT,subCategory,0.905811623246493,0.9399882328428748,0.905811623246493,998
18,Qwen3-VL-8B-BASE,masterCategory,0.962,0.9634927343171824,0.962,1000
19,Qwen3-VL-8B-BASE,gender,0.551,0.922472822742475,0.551,1000
20,Qwen3-VL-8B-BASE,subCategory,0.282,0.6486835591969566,0.282,1000
21,GPT-5-Mini,masterCategory,0.981,0.9810138759482104,0.981,1000
22,GPT-5-Mini,gender,0.907,0.9260515702929444,0.907,1000
23,GPT-5-Mini,subCategory,0.897,0.944355065421394,0.897,1000