{ "metadata": { "timestamp": "2025-01-31T14:00:16.349261", "vidore_benchmark_version": "4.0.3.dev20+g2d72668" }, "metrics": { "data_dir/eval_vidore/arxivqa_test_subsampled": { "ndcg_at_1": 0.856, "ndcg_at_3": 0.88745, "ndcg_at_5": 0.89227, "ndcg_at_10": 0.90332, "ndcg_at_20": 0.90941, "ndcg_at_50": 0.91153, "ndcg_at_100": 0.91252, "map_at_1": 0.856, "map_at_3": 0.87967, "map_at_5": 0.88227, "map_at_10": 0.88687, "map_at_20": 0.88854, "map_at_50": 0.88895, "map_at_100": 0.88904, "recall_at_1": 0.856, "recall_at_3": 0.91, "recall_at_5": 0.922, "recall_at_10": 0.956, "recall_at_20": 0.98, "recall_at_50": 0.99, "recall_at_100": 0.996, "precision_at_1": 0.856, "precision_at_3": 0.30333, "precision_at_5": 0.1844, "precision_at_10": 0.0956, "precision_at_20": 0.049, "precision_at_50": 0.0198, "precision_at_100": 0.00996, "mrr_at_1": 0.854, "mrr_at_3": 0.8773333333333334, "mrr_at_5": 0.8809333333333332, "mrr_at_10": 0.8855674603174604, "mrr_at_20": 0.8873665921453847, "mrr_at_50": 0.8876643128304966, "mrr_at_100": 0.887751797810069, "naucs_at_1_max": 0.8146679814134421, "naucs_at_1_std": 0.07396801383185674, "naucs_at_1_diff1": 0.9423560082126644, "naucs_at_3_max": 0.8066396929142029, "naucs_at_3_std": 0.07559912854030806, "naucs_at_3_diff1": 0.9181969083929873, "naucs_at_5_max": 0.8034068328185975, "naucs_at_5_std": 0.02020637314754663, "naucs_at_5_diff1": 0.9160860925566815, "naucs_at_10_max": 0.825566590272474, "naucs_at_10_std": -0.15187590187589842, "naucs_at_10_diff1": 0.9390968508615584, "naucs_at_20_max": 0.8921568627450932, "naucs_at_20_std": 0.0641923436041116, "naucs_at_20_diff1": 0.9738562091503187, "naucs_at_50_max": 0.947712418300658, "naucs_at_50_std": 0.37030812324930756, "naucs_at_50_diff1": 0.9738562091503188, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "data_dir/eval_vidore/docvqa_test_subsampled": { "ndcg_at_1": 0.55876, "ndcg_at_3": 0.61485, "ndcg_at_5": 0.63222, "ndcg_at_10": 0.65493, "ndcg_at_20": 0.66746, "ndcg_at_50": 0.67657, "ndcg_at_100": 0.68329, "map_at_1": 0.55876, "map_at_3": 0.60126, "map_at_5": 0.6109, "map_at_10": 0.62013, "map_at_20": 0.62368, "map_at_50": 0.62509, "map_at_100": 0.62564, "recall_at_1": 0.55876, "recall_at_3": 0.6541, "recall_at_5": 0.69623, "recall_at_10": 0.76718, "recall_at_20": 0.81596, "recall_at_50": 0.86253, "recall_at_100": 0.90466, "precision_at_1": 0.55876, "precision_at_3": 0.21803, "precision_at_5": 0.13925, "precision_at_10": 0.07672, "precision_at_20": 0.0408, "precision_at_50": 0.01725, "precision_at_100": 0.00905, "mrr_at_1": 0.5543237250554324, "mrr_at_3": 0.599039172209904, "mrr_at_5": 0.609349593495935, "mrr_at_10": 0.6183401963889769, "mrr_at_20": 0.6209344044461912, "mrr_at_50": 0.6227106859869997, "mrr_at_100": 0.6232889837626537, "naucs_at_1_max": 0.29569387287927856, "naucs_at_1_std": 0.6842470452244832, "naucs_at_1_diff1": 0.9092758189268736, "naucs_at_3_max": 0.21005633503267365, "naucs_at_3_std": 0.805068083051039, "naucs_at_3_diff1": 0.8754563618133548, "naucs_at_5_max": 0.19259019824979384, "naucs_at_5_std": 0.8409490067520664, "naucs_at_5_diff1": 0.8726101181684977, "naucs_at_10_max": 0.048646855770537796, "naucs_at_10_std": 0.8833213018935924, "naucs_at_10_diff1": 0.8643899050698751, "naucs_at_20_max": -0.06225799483664109, "naucs_at_20_std": 0.9127300541361831, "naucs_at_20_diff1": 0.8597071895030789, "naucs_at_50_max": -0.24442668101014947, "naucs_at_50_std": 0.930294669379938, "naucs_at_50_diff1": 0.8714057920484658, "naucs_at_100_max": -0.3617257884810223, "naucs_at_100_std": 0.9386292283530702, "naucs_at_100_diff1": 0.8597379463433718 }, "data_dir/eval_vidore/syntheticDocQA_energy_test": { "ndcg_at_1": 0.93, "ndcg_at_3": 0.95893, "ndcg_at_5": 0.95893, "ndcg_at_10": 0.95893, "ndcg_at_20": 0.96163, "ndcg_at_50": 0.96365, "ndcg_at_100": 0.96365, "map_at_1": 0.93, "map_at_3": 0.95167, "map_at_5": 0.95167, "map_at_10": 0.95167, "map_at_20": 0.9525, "map_at_50": 0.95283, "map_at_100": 0.95283, "recall_at_1": 0.93, "recall_at_3": 0.98, "recall_at_5": 0.98, "recall_at_10": 0.98, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.93, "precision_at_3": 0.32667, "precision_at_5": 0.196, "precision_at_10": 0.098, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.93, "mrr_at_3": 0.9516666666666667, "mrr_at_5": 0.9516666666666667, "mrr_at_10": 0.9516666666666667, "mrr_at_20": 0.9525757575757575, "mrr_at_50": 0.9529205851619644, "mrr_at_100": 0.9529205851619644, "naucs_at_1_max": 0.39482459650526885, "naucs_at_1_std": -0.31419234360410914, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 0.6790382819794457, "naucs_at_3_std": -0.9556489262371661, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 0.6790382819794609, "naucs_at_5_std": -0.9556489262371534, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 0.6790382819794609, "naucs_at_10_std": -0.9556489262371534, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": -0.1713352007469681, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "data_dir/eval_vidore/tatdqa_test": { "ndcg_at_1": 0.69927, "ndcg_at_3": 0.79372, "ndcg_at_5": 0.81105, "ndcg_at_10": 0.82459, "ndcg_at_20": 0.82878, "ndcg_at_50": 0.83303, "ndcg_at_100": 0.83442, "map_at_1": 0.69927, "map_at_3": 0.77106, "map_at_5": 0.78072, "map_at_10": 0.78653, "map_at_20": 0.7877, "map_at_50": 0.78839, "map_at_100": 0.78852, "recall_at_1": 0.69927, "recall_at_3": 0.85905, "recall_at_5": 0.90097, "recall_at_10": 0.94168, "recall_at_20": 0.95808, "recall_at_50": 0.97934, "recall_at_100": 0.98785, "precision_at_1": 0.69927, "precision_at_3": 0.28635, "precision_at_5": 0.18019, "precision_at_10": 0.09417, "precision_at_20": 0.0479, "precision_at_50": 0.01959, "precision_at_100": 0.00988, "mrr_at_1": 0.7004860267314702, "mrr_at_3": 0.7722762251923856, "mrr_at_5": 0.7813284730660186, "mrr_at_10": 0.7874199598835079, "mrr_at_20": 0.7885261582425689, "mrr_at_50": 0.7892193634018624, "mrr_at_100": 0.7893291443742743, "naucs_at_1_max": 0.2682416909968819, "naucs_at_1_std": -0.1346098281401034, "naucs_at_1_diff1": 0.8296403667835969, "naucs_at_3_max": 0.36050493227494845, "naucs_at_3_std": -0.01834048113954263, "naucs_at_3_diff1": 0.7445505884597561, "naucs_at_5_max": 0.36523716935916267, "naucs_at_5_std": 0.06489405058952001, "naucs_at_5_diff1": 0.7142046612217674, "naucs_at_10_max": 0.4293315802144752, "naucs_at_10_std": 0.2564808038730297, "naucs_at_10_diff1": 0.6873136665710184, "naucs_at_20_max": 0.4688188620078513, "naucs_at_20_std": 0.26917226500908054, "naucs_at_20_diff1": 0.6720615167289586, "naucs_at_50_max": 0.5671944664000176, "naucs_at_50_std": 0.4964253483275758, "naucs_at_50_diff1": 0.722603370462453, "naucs_at_100_max": 0.6585255212623138, "naucs_at_100_std": 0.5978336814194427, "naucs_at_100_diff1": 0.8138609714332596 }, "data_dir/eval_vidore/infovqa_test_subsampled": { "ndcg_at_1": 0.88462, "ndcg_at_3": 0.91698, "ndcg_at_5": 0.92378, "ndcg_at_10": 0.92838, "ndcg_at_20": 0.93146, "ndcg_at_50": 0.93263, "ndcg_at_100": 0.93362, "map_at_1": 0.88462, "map_at_3": 0.90924, "map_at_5": 0.91309, "map_at_10": 0.91501, "map_at_20": 0.91585, "map_at_50": 0.91603, "map_at_100": 0.91612, "recall_at_1": 0.88462, "recall_at_3": 0.93927, "recall_at_5": 0.95547, "recall_at_10": 0.96964, "recall_at_20": 0.98178, "recall_at_50": 0.98785, "recall_at_100": 0.99393, "precision_at_1": 0.88462, "precision_at_3": 0.31309, "precision_at_5": 0.19109, "precision_at_10": 0.09696, "precision_at_20": 0.04909, "precision_at_50": 0.01976, "precision_at_100": 0.00994, "mrr_at_1": 0.8846153846153846, "mrr_at_3": 0.9092442645074225, "mrr_at_5": 0.9129892037786774, "mrr_at_10": 0.9147717049032839, "mrr_at_20": 0.9154333931688503, "mrr_at_50": 0.9156112743493511, "mrr_at_100": 0.915698522470477, "naucs_at_1_max": 0.6002033777762705, "naucs_at_1_std": -0.07102965572007143, "naucs_at_1_diff1": 0.9544715151522033, "naucs_at_3_max": 0.8025453360230541, "naucs_at_3_std": 0.19885112605813188, "naucs_at_3_diff1": 0.960819563780572, "naucs_at_5_max": 0.9121856227472513, "naucs_at_5_std": 0.3583912666662475, "naucs_at_5_diff1": 0.9584449918884831, "naucs_at_10_max": 0.9368484108193146, "naucs_at_10_std": 0.44097650713388975, "naucs_at_10_diff1": 0.9651729455827266, "naucs_at_20_max": 0.9709774546522766, "naucs_at_20_std": 0.6007213904253922, "naucs_at_20_diff1": 0.9854887273261383, "naucs_at_50_max": 0.9782330909892136, "naucs_at_50_std": 0.7577252323561762, "naucs_at_50_diff1": 0.9782330909892136, "naucs_at_100_max": 1.0, "naucs_at_100_std": 0.7075525547215259, "naucs_at_100_diff1": 0.9564661819784259 }, "data_dir/eval_vidore/syntheticDocQA_healthcare_industry_test": { "ndcg_at_1": 0.95, "ndcg_at_3": 0.97893, "ndcg_at_5": 0.97893, "ndcg_at_10": 0.97893, "ndcg_at_20": 0.97893, "ndcg_at_50": 0.97893, "ndcg_at_100": 0.97893, "map_at_1": 0.95, "map_at_3": 0.97167, "map_at_5": 0.97167, "map_at_10": 0.97167, "map_at_20": 0.97167, "map_at_50": 0.97167, "map_at_100": 0.97167, "recall_at_1": 0.95, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.95, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.96, "mrr_at_3": 0.9783333333333333, "mrr_at_5": 0.9783333333333333, "mrr_at_10": 0.9783333333333333, "mrr_at_20": 0.9783333333333333, "mrr_at_50": 0.9783333333333333, "mrr_at_100": 0.9783333333333333, "naucs_at_1_max": 0.7605042016806716, "naucs_at_1_std": -0.35732959850606716, "naucs_at_1_diff1": 0.9738562091503253, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "data_dir/eval_vidore/tabfquad_test_subsampled": { "ndcg_at_1": 0.85714, "ndcg_at_3": 0.90523, "ndcg_at_5": 0.91122, "ndcg_at_10": 0.91807, "ndcg_at_20": 0.92163, "ndcg_at_50": 0.92385, "ndcg_at_100": 0.92385, "map_at_1": 0.85714, "map_at_3": 0.89345, "map_at_5": 0.89685, "map_at_10": 0.89962, "map_at_20": 0.90057, "map_at_50": 0.90097, "map_at_100": 0.90097, "recall_at_1": 0.85714, "recall_at_3": 0.93929, "recall_at_5": 0.95357, "recall_at_10": 0.975, "recall_at_20": 0.98929, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.85714, "precision_at_3": 0.3131, "precision_at_5": 0.19071, "precision_at_10": 0.0975, "precision_at_20": 0.04946, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.8571428571428571, "mrr_at_3": 0.8928571428571429, "mrr_at_5": 0.8955357142857143, "mrr_at_10": 0.8989866780045351, "mrr_at_20": 0.8999608080411652, "mrr_at_50": 0.9003545809349381, "mrr_at_100": 0.9003545809349381, "naucs_at_1_max": 0.44880952380952394, "naucs_at_1_std": 0.06635082604470473, "naucs_at_1_diff1": 0.9110301263362479, "naucs_at_3_max": 0.7531443950129105, "naucs_at_3_std": 0.4460647003899606, "naucs_at_3_diff1": 0.9288735101883916, "naucs_at_5_max": 0.8075127486892179, "naucs_at_5_std": 0.529124470300943, "naucs_at_5_diff1": 0.917043740573152, "naucs_at_10_max": 0.7902494331065706, "naucs_at_10_std": 0.6329198346005056, "naucs_at_10_diff1": 0.9626517273576021, "naucs_at_20_max": 0.9564270152505505, "naucs_at_20_std": 0.8638344226579515, "naucs_at_20_diff1": 0.9564270152505505, "naucs_at_50_max": 1.0, "naucs_at_50_std": 1.0, "naucs_at_50_diff1": 1.0, "naucs_at_100_max": 1.0, "naucs_at_100_std": 1.0, "naucs_at_100_diff1": 1.0 }, "data_dir/eval_vidore/syntheticDocQA_government_reports_test": { "ndcg_at_1": 0.92, "ndcg_at_3": 0.96417, "ndcg_at_5": 0.96417, "ndcg_at_10": 0.9675, "ndcg_at_20": 0.9675, "ndcg_at_50": 0.9675, "ndcg_at_100": 0.9675, "map_at_1": 0.92, "map_at_3": 0.955, "map_at_5": 0.955, "map_at_10": 0.95643, "map_at_20": 0.95643, "map_at_50": 0.95643, "map_at_100": 0.95643, "recall_at_1": 0.92, "recall_at_3": 0.99, "recall_at_5": 0.99, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.92, "precision_at_3": 0.33, "precision_at_5": 0.198, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.92, "mrr_at_3": 0.955, "mrr_at_5": 0.955, "mrr_at_10": 0.9564285714285714, "mrr_at_20": 0.9564285714285714, "mrr_at_50": 0.9564285714285714, "mrr_at_100": 0.9564285714285714, "naucs_at_1_max": 0.8768674136321195, "naucs_at_1_std": 0.41940943043884304, "naucs_at_1_diff1": 0.9673202614379083, "naucs_at_3_max": 1.0, "naucs_at_3_std": 0.8692810457516356, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 0.8692810457516413, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "data_dir/eval_vidore/shiftproject_test": { "ndcg_at_1": 0.78, "ndcg_at_3": 0.8644, "ndcg_at_5": 0.87302, "ndcg_at_10": 0.8828, "ndcg_at_20": 0.88795, "ndcg_at_50": 0.88997, "ndcg_at_100": 0.88997, "map_at_1": 0.78, "map_at_3": 0.845, "map_at_5": 0.85, "map_at_10": 0.8541, "map_at_20": 0.85555, "map_at_50": 0.85589, "map_at_100": 0.85589, "recall_at_1": 0.78, "recall_at_3": 0.92, "recall_at_5": 0.94, "recall_at_10": 0.97, "recall_at_20": 0.99, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.78, "precision_at_3": 0.30667, "precision_at_5": 0.188, "precision_at_10": 0.097, "precision_at_20": 0.0495, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.81, "mrr_at_3": 0.8633333333333333, "mrr_at_5": 0.8658333333333332, "mrr_at_10": 0.8699285714285715, "mrr_at_20": 0.871412087912088, "mrr_at_50": 0.8717454212454213, "mrr_at_100": 0.8717454212454213, "naucs_at_1_max": -0.24755413363008374, "naucs_at_1_std": -0.6164510594890337, "naucs_at_1_diff1": 0.8162006769601703, "naucs_at_3_max": 0.2243814192343627, "naucs_at_3_std": -0.23225957049486393, "naucs_at_3_diff1": 0.8544000933706815, "naucs_at_5_max": -0.012371615312794324, "naucs_at_5_std": -0.5504201680672273, "naucs_at_5_diff1": 0.9128540305010897, "naucs_at_10_max": -0.049486461251166146, "naucs_at_10_std": -0.27591036414565706, "naucs_at_10_diff1": 0.9128540305010848, "naucs_at_20_max": 0.7222222222222276, "naucs_at_20_std": 0.5541549953314738, "naucs_at_20_diff1": 0.8692810457516413, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null }, "data_dir/eval_vidore/syntheticDocQA_artificial_intelligence_test": { "ndcg_at_1": 0.99, "ndcg_at_3": 0.99631, "ndcg_at_5": 0.99631, "ndcg_at_10": 0.99631, "ndcg_at_20": 0.99631, "ndcg_at_50": 0.99631, "ndcg_at_100": 0.99631, "map_at_1": 0.99, "map_at_3": 0.995, "map_at_5": 0.995, "map_at_10": 0.995, "map_at_20": 0.995, "map_at_50": 0.995, "map_at_100": 0.995, "recall_at_1": 0.99, "recall_at_3": 1.0, "recall_at_5": 1.0, "recall_at_10": 1.0, "recall_at_20": 1.0, "recall_at_50": 1.0, "recall_at_100": 1.0, "precision_at_1": 0.99, "precision_at_3": 0.33333, "precision_at_5": 0.2, "precision_at_10": 0.1, "precision_at_20": 0.05, "precision_at_50": 0.02, "precision_at_100": 0.01, "mrr_at_1": 0.99, "mrr_at_3": 0.995, "mrr_at_5": 0.995, "mrr_at_10": 0.995, "mrr_at_20": 0.995, "mrr_at_50": 0.995, "mrr_at_100": 0.995, "naucs_at_1_max": 0.8692810457516276, "naucs_at_1_std": -0.5634920634920657, "naucs_at_1_diff1": 1.0, "naucs_at_3_max": 1.0, "naucs_at_3_std": 1.0, "naucs_at_3_diff1": 1.0, "naucs_at_5_max": 1.0, "naucs_at_5_std": 1.0, "naucs_at_5_diff1": 1.0, "naucs_at_10_max": 1.0, "naucs_at_10_std": 1.0, "naucs_at_10_diff1": 1.0, "naucs_at_20_max": 1.0, "naucs_at_20_std": 1.0, "naucs_at_20_diff1": 1.0, "naucs_at_50_max": null, "naucs_at_50_std": null, "naucs_at_50_diff1": null, "naucs_at_100_max": null, "naucs_at_100_std": null, "naucs_at_100_diff1": null } } }