File size: 39,441 Bytes
7cb4732 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
Input File,sequence_matcher,cosine,jaccard,precision,recall,f1_score,model,parser_type
benchmark,0.9943117178612059,0.9976483450234231,0.9519230769230769,0.9658536585365853,0.9850746268656716,0.9753694581280787,claude-3-5-sonnet-20241022,LLM_PARSE1752195527
benchmark,0.9775376741541086,0.9964731546055415,0.9468599033816425,0.9560975609756097,0.98989898989899,0.9727047146401985,claude-3-7-sonnet-20250219,LLM_PARSE1752192118
benchmark,0.9823663253697383,0.9976483450234231,0.9660194174757282,0.9707317073170731,0.995,0.982716049382716,claude-opus-4-20250514,LLM_PARSE1752191645
benchmark,0.9872122762148338,0.9953021449322264,0.9471153846153846,0.9609756097560975,0.985,0.9728395061728395,claude-sonnet-4-20250514,LLM_PARSE1752191876
costco_bill,0.9504,0.9843196907197536,0.9333333333333333,0.9545454545454546,0.9767441860465116,0.9655172413793104,claude-3-5-sonnet-20241022,LLM_PARSE1752195570
costco_bill,0.06652512384996462,0.2313271194718138,0.16463414634146342,0.3068181818181818,0.2621359223300971,0.28272251308900526,claude-3-7-sonnet-20250219,LLM_PARSE1752192159
costco_bill,0.9686520376175548,0.9604564350002722,0.9021739130434783,0.9431818181818182,0.9540229885057471,0.9485714285714286,claude-opus-4-20250514,LLM_PARSE1752191693
costco_bill,0.9704236610711431,0.9428666305684691,0.9,0.9204545454545454,0.9759036144578314,0.9473684210526315,claude-sonnet-4-20250514,LLM_PARSE1752191931
cvs_coupon,0.9435665914221218,0.9114379397804238,0.868421052631579,0.8918918918918919,0.9705882352941176,0.9295774647887325,claude-3-5-sonnet-20241022,LLM_PARSE1752195586
cvs_coupon,0.1813186813186813,0.263433026844427,0.1797752808988764,0.43243243243243246,0.23529411764705882,0.30476190476190473,claude-3-7-sonnet-20250219,LLM_PARSE1752192169
cvs_coupon,0.9435665914221218,0.9114379397804238,0.868421052631579,0.8918918918918919,0.9705882352941176,0.9295774647887325,claude-opus-4-20250514,LLM_PARSE1752191715
cvs_coupon,0.577639751552795,0.5994541118316568,0.42105263157894735,0.43243243243243246,0.9411764705882353,0.5925925925925927,claude-sonnet-4-20250514,LLM_PARSE1752191954
grocery_bill,0.8917819365337672,0.9539973123930129,0.824468085106383,0.9011627906976745,0.9064327485380117,0.9037900874635568,claude-3-5-sonnet-20241022,LLM_PARSE1752195595
grocery_bill,0.9136807817589576,0.9595981354624561,0.8695652173913043,0.9302325581395349,0.9302325581395349,0.9302325581395349,claude-3-7-sonnet-20250219,LLM_PARSE1752192176
grocery_bill,0.5111492281303602,0.613928759048487,0.3791208791208791,0.4011627906976744,0.8734177215189873,0.549800796812749,claude-opus-4-20250514,LLM_PARSE1752191725
grocery_bill,0.7186358099878197,0.9022129995709839,0.7424242424242424,0.8546511627906976,0.8497109826589595,0.8521739130434782,claude-sonnet-4-20250514,LLM_PARSE1752191970
medical_invoice_sample1,0.43752629364745477,0.8753582889384084,0.6850828729281768,0.6850828729281768,1.0,0.8131147540983606,claude-3-5-sonnet-20241022,LLM_PARSE1752195617
medical_invoice_sample1,0.544229149115417,0.8067712761863249,0.7142857142857143,0.7182320441988951,0.9923664122137404,0.8333333333333334,claude-3-7-sonnet-20250219,LLM_PARSE1752192196
medical_invoice_sample1,0.450374531835206,0.68955623868024,0.6042780748663101,0.6243093922651933,0.9495798319327731,0.7533333333333334,claude-opus-4-20250514,LLM_PARSE1752191751
medical_invoice_sample1,0.6186770428015564,0.9481130966260292,0.9230769230769231,0.9281767955801105,0.9940828402366864,0.9599999999999999,claude-sonnet-4-20250514,LLM_PARSE1752192003
medical_travel_request_OWCP_957,0.9809800850302081,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,claude-3-5-sonnet-20241022,LLM_PARSE1752195633
medical_travel_request_OWCP_957,0.7990593153906455,0.9111930756739456,0.7922705314009661,0.8159203980099502,0.9647058823529412,0.8840970350404312,claude-3-7-sonnet-20250219,LLM_PARSE1752192213
medical_travel_request_OWCP_957,0.6848351648351648,0.9897516503852182,0.9468599033816425,0.9751243781094527,0.9702970297029703,0.9727047146401985,claude-opus-4-20250514,LLM_PARSE1752191774
medical_travel_request_OWCP_957,0.6881483116309866,0.9906247813089888,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,claude-sonnet-4-20250514,LLM_PARSE1752192028
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-3-5-sonnet-20241022,LLM_PARSE1752195653
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-3-7-sonnet-20250219,LLM_PARSE1752192831
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-opus-4-20250514,LLM_PARSE1752191803
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192055
test_2,0.8635294117647059,0.9344803961620254,0.9384615384615385,0.953125,0.9838709677419355,0.9682539682539683,claude-3-5-sonnet-20241022,LLM_PARSE1752195662
test_2,0.017057569296375266,0.5182028975919346,0.30327868852459017,0.578125,0.3894736842105263,0.46540880503144655,claude-3-7-sonnet-20250219,LLM_PARSE1752192840
test_2,0.7800729040097205,0.9076057641198195,0.875,0.875,1.0,0.9333333333333333,claude-opus-4-20250514,LLM_PARSE1752191815
test_2,0.9107344632768362,0.956789666727399,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192065
test_3,0.54868041694389,0.6800251173856684,0.47194719471947194,0.4897260273972603,0.9285714285714286,0.6412556053811659,claude-3-5-sonnet-20241022,LLM_PARSE1752195674
test_3,0.47942238267148013,0.6104071875503646,0.40273037542662116,0.4041095890410959,0.9915966386554622,0.5742092457420924,claude-3-7-sonnet-20250219,LLM_PARSE1752192849
test_3,0.4668769716088328,0.6112992185446862,0.3835616438356164,0.3835616438356164,1.0,0.5544554455445544,claude-opus-4-20250514,LLM_PARSE1752191829
test_3,0.4896007650011953,0.6130325357013243,0.4246575342465753,0.4246575342465753,1.0,0.5961538461538461,claude-sonnet-4-20250514,LLM_PARSE1752192079
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-3-5-sonnet-20241022,LLM_PARSE1752195694
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-3-7-sonnet-20250219,LLM_PARSE1752192867
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-opus-4-20250514,LLM_PARSE1752191849
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,claude-sonnet-4-20250514,LLM_PARSE1752192097
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-3-5-sonnet-20241022,LLM_PARSE1752195703
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-3-7-sonnet-20250219,LLM_PARSE1752192877
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-opus-4-20250514,LLM_PARSE1752191860
test_5,1.0,1.0,1.0,1.0,1.0,1.0,claude-sonnet-4-20250514,LLM_PARSE1752192108
benchmark,0.7154370737755734,0.9749846454998445,0.9516908212560387,0.9609756097560975,0.9899497487437185,0.9752475247524752,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193580
benchmark,0.9695945945945946,0.987761239745018,0.9471153846153846,0.9609756097560975,0.985,0.9728395061728395,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193698
costco_bill,0.9304987735077678,0.9804331969030851,0.8666666666666667,0.8863636363636364,0.975,0.9285714285714285,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193607
costco_bill,0.9761146496815286,0.9763315683176068,0.9438202247191011,0.9545454545454546,0.9882352941176471,0.9710982658959537,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193719
cvs_coupon,0.8640776699029126,0.8657283989931209,0.8157894736842105,0.8378378378378378,0.96875,0.8985507246376812,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193617
cvs_coupon,0.9684684684684685,0.9640488658284413,0.918918918918919,0.918918918918919,1.0,0.9577464788732395,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193726
grocery_bill,0.6153023006955591,0.7030506693946105,0.4722222222222222,0.4941860465116279,0.9139784946236559,0.641509433962264,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193627
grocery_bill,0.6180528182893181,0.879040239872227,0.7628865979381443,0.8604651162790697,0.8705882352941177,0.8654970760233918,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193732
medical_invoice_sample1,0.5519765739385066,0.9838399456156867,0.8846153846153846,0.8895027624309392,0.9938271604938271,0.9387755102040816,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193639
medical_invoice_sample1,0.5095588235294117,0.9302010671353808,0.8415300546448088,0.850828729281768,0.9871794871794872,0.913946587537092,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193743
medical_travel_request_OWCP_957,0.9807520143240823,0.9918980164869274,0.9420289855072463,0.9701492537313433,0.9701492537313433,0.9701492537313433,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193648
medical_travel_request_OWCP_957,0.8171557562076749,0.9225747170771591,0.8823529411764706,0.8955223880597015,0.9836065573770492,0.9375,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193749
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193655
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193759
test_2,0.5625744934445769,0.9399869182588547,0.9375,0.9375,1.0,0.967741935483871,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193661
test_2,0.2836990595611285,0.8354157828288629,0.7261904761904762,0.953125,0.7530864197530864,0.8413793103448276,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193764
test_3,0.49320295730980207,0.6359668796370604,0.4349315068493151,0.4349315068493151,1.0,0.6062052505966588,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193673
test_3,0.7360435712896324,0.8013312224709405,0.7431506849315068,0.7431506849315068,1.0,0.8526522593320236,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193771
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193684
test_4,0.9670014347202296,0.9484458356080679,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193795
test_5,1.0,1.0,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-maverick-instruct-basic,LLM_PARSE1752193692
test_5,1.0,1.0,1.0,1.0,1.0,1.0,accounts/fireworks/models/llama4-scout-instruct-basic,LLM_PARSE1752193800
benchmark,0.9845183486238532,0.9946795114428338,0.9565217391304348,0.9658536585365853,0.99,0.9777777777777777,gemini-1.5-flash,LLM_PARSE1752192567
benchmark,0.989401317674019,0.9970404887955766,0.9707317073170731,0.9707317073170731,1.0,0.9851485148514851,gemini-1.5-pro,LLM_PARSE1752192756
benchmark,0.9874500855675984,0.9940983091457294,0.9567307692307693,0.9707317073170731,0.9851485148514851,0.9778869778869779,gemini-2.0-flash,LLM_PARSE1752191518
benchmark,0.9937250427837993,0.9964515385167041,0.966183574879227,0.975609756097561,0.9900990099009901,0.9828009828009828,gemini-2.5-flash,LLM_PARSE1752189460
benchmark,0.9339783722253842,0.9952735461930056,0.9758454106280193,0.9853658536585366,0.9901960784313726,0.9877750611246944,gemini-2.5-pro,LLM_PARSE1752190362
costco_bill,0.9760383386581469,1.0000000000000002,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-flash,LLM_PARSE1752192601
costco_bill,0.9760383386581469,1.0000000000000002,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-pro,LLM_PARSE1752192794
costco_bill,0.9984375,1.0000000000000002,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191535
costco_bill,0.9984375,1.0000000000000002,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189576
costco_bill,0.9808135072908672,0.9883546339951822,0.967032967032967,1.0,0.967032967032967,0.9832402234636871,gemini-2.5-pro,LLM_PARSE1752190408
cvs_coupon,0.9217002237136466,0.8655333872506977,0.825,0.8918918918918919,0.9166666666666666,0.9041095890410958,gemini-1.5-flash,LLM_PARSE1752192618
cvs_coupon,0.9774774774774775,0.9640488658284413,0.8947368421052632,0.918918918918919,0.9714285714285714,0.9444444444444445,gemini-1.5-pro,LLM_PARSE1752192807
cvs_coupon,0.9819819819819819,0.9640488658284413,0.9459459459459459,0.9459459459459459,1.0,0.9722222222222222,gemini-2.0-flash,LLM_PARSE1752191547
cvs_coupon,0.9910714285714286,0.9818700292070942,0.972972972972973,0.972972972972973,1.0,0.9863013698630138,gemini-2.5-flash,LLM_PARSE1752189629
cvs_coupon,0.9535864978902954,0.9472018579507341,0.9230769230769231,0.972972972972973,0.9473684210526315,0.9599999999999999,gemini-2.5-pro,LLM_PARSE1752190427
grocery_bill,0.9405537459283387,0.9750275100306482,0.8736263736263736,0.9244186046511628,0.9408284023668639,0.9325513196480938,gemini-1.5-flash,LLM_PARSE1752192631
grocery_bill,0.9508599508599509,0.9793457694002203,0.8833333333333333,0.9244186046511628,0.9520958083832335,0.9380530973451326,gemini-1.5-pro,LLM_PARSE1752192816
grocery_bill,0.9414654113794515,0.9812402317724886,0.8888888888888888,0.9302325581395349,0.9523809523809523,0.9411764705882352,gemini-2.0-flash,LLM_PARSE1752191554
grocery_bill,0.9500818330605565,0.9849687838909444,0.9050279329608939,0.9418604651162791,0.9585798816568047,0.9501466275659824,gemini-2.5-flash,LLM_PARSE1752189662
grocery_bill,0.9258353708231459,0.9868702347954403,0.9106145251396648,0.9476744186046512,0.9588235294117647,0.9532163742690059,gemini-2.5-pro,LLM_PARSE1752190444
medical_invoice_sample1,0.42026117926394935,0.947978241231951,0.8121546961325967,0.8121546961325967,1.0,0.8963414634146342,gemini-1.5-flash,LLM_PARSE1752192667
medical_invoice_sample1,0.3044982698961938,0.45924767143506656,0.3021978021978022,0.30386740331491713,0.9821428571428571,0.46413502109704646,gemini-1.5-pro,LLM_PARSE1752192832
medical_invoice_sample1,0.6664298401420959,0.9969468443298555,0.9945054945054945,1.0,0.9945054945054945,0.9972451790633609,gemini-2.0-flash,LLM_PARSE1752191577
medical_invoice_sample1,0.681592039800995,1.0000000000000004,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189695
medical_invoice_sample1,0.584140653031934,0.9969468443298555,0.9285714285714286,0.9337016574585635,0.9941176470588236,0.962962962962963,gemini-2.5-pro,LLM_PARSE1752190462
medical_travel_request_OWCP_957,0.9393939393939394,0.9918327708656376,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-1.5-flash,LLM_PARSE1752192682
medical_travel_request_OWCP_957,0.9836212699124972,0.9928651210459996,0.9655172413793104,0.9751243781094527,0.98989898989899,0.9824561403508771,gemini-1.5-pro,LLM_PARSE1752192915
medical_travel_request_OWCP_957,0.6677108972924591,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-2.0-flash,LLM_PARSE1752191590
medical_travel_request_OWCP_957,0.6695086380973749,0.9928651210459996,0.9655172413793104,0.9751243781094527,0.98989898989899,0.9824561403508771,gemini-2.5-flash,LLM_PARSE1752189778
medical_travel_request_OWCP_957,0.9809800850302081,0.9928651210459996,0.9514563106796117,0.9751243781094527,0.9751243781094527,0.9751243781094527,gemini-2.5-pro,LLM_PARSE1752190495
test_1,0.9879154078549849,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gemini-1.5-flash,LLM_PARSE1752192697
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-1.5-pro,LLM_PARSE1752192932
test_1,0.9701492537313433,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191603
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189806
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190516
test_2,0.8776978417266187,0.9333128812745526,0.8955223880597015,0.9375,0.9523809523809523,0.9448818897637795,gemini-1.5-flash,LLM_PARSE1752192706
test_2,0.09340659340659341,0.36201693721224343,0.24770642201834864,0.421875,0.375,0.39705882352941174,gemini-1.5-pro,LLM_PARSE1752192940
test_2,0.8158192090395481,0.956789666727399,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191606
test_2,0.6518636003172086,0.8971698769455133,0.735632183908046,1.0,0.735632183908046,0.847682119205298,gemini-2.5-flash,LLM_PARSE1752189834
test_2,0.6294339622641509,0.8215120986648818,0.6808510638297872,1.0,0.6808510638297872,0.810126582278481,gemini-2.5-pro,LLM_PARSE1752190529
test_3,0.9952516619183286,0.9972565576469495,1.0,1.0,1.0,1.0,gemini-1.5-flash,LLM_PARSE1752192715
test_3,0.3889315910837817,0.523970690224494,0.3287671232876712,0.3287671232876712,1.0,0.49484536082474223,gemini-1.5-pro,LLM_PARSE1752193025
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.0-flash,LLM_PARSE1752191632
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189870
test_3,0.9976201808662541,0.9993101121396254,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190545
test_4,0.5350140056022409,0.9383854946113873,0.9523809523809523,0.967741935483871,0.9836065573770492,0.975609756097561,gemini-1.5-flash,LLM_PARSE1752192733
test_4,0.9415204678362573,0.8866095469254991,0.8888888888888888,0.9032258064516129,0.9824561403508771,0.9411764705882352,gemini-1.5-pro,LLM_PARSE1752193042
test_4,0.9789621318373072,0.882281367849817,0.8636363636363636,0.9193548387096774,0.9344262295081968,0.9268292682926829,gemini-2.0-flash,LLM_PARSE1752191644
test_4,0.9846582984658299,0.9736442407200391,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gemini-2.5-flash,LLM_PARSE1752189940
test_4,0.9874125874125874,0.9736442407200391,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gemini-2.5-pro,LLM_PARSE1752190585
test_5,0.9666666666666667,0.98240551387501,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,gemini-1.5-flash,LLM_PARSE1752192748
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-1.5-pro,LLM_PARSE1752193050
test_5,0.8923076923076924,0.9122764299654935,0.8,1.0,0.8,0.888888888888889,gemini-2.0-flash,LLM_PARSE1752191650
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-2.5-flash,LLM_PARSE1752189960
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gemini-2.5-pro,LLM_PARSE1752190596
benchmark,0.9008844159804819,0.9369832924227369,0.7980769230769231,0.8097560975609757,0.9822485207100592,0.8877005347593584,gpt-4.1,LLM_PARSE1752192631
benchmark,0.9289386150589658,0.9471961007993572,0.8454106280193237,0.8536585365853658,0.9887005649717514,0.9162303664921466,gpt-4.1-mini,LLM_PARSE1752193012
benchmark,0.9965850882185544,0.9976483450234231,0.9757281553398058,0.9804878048780488,0.995049504950495,0.9877149877149877,gpt-4o,LLM_PARSE1752193261
benchmark,0.9931662870159453,0.9941282806901699,0.9565217391304348,0.9658536585365853,0.99,0.9777777777777777,gpt-4o-mini,LLM_PARSE1752193572
costco_bill,0.788550323176362,0.7932010681164532,0.6451612903225806,0.6818181818181818,0.9230769230769231,0.7843137254901961,gpt-4.1,LLM_PARSE1752192743
costco_bill,0.9126378286683631,0.9097971354270784,0.8426966292134831,0.8522727272727273,0.9868421052631579,0.9146341463414636,gpt-4.1-mini,LLM_PARSE1752193080
costco_bill,0.9147410358565737,0.9137892432215373,0.8297872340425532,0.8863636363636364,0.9285714285714286,0.9069767441860465,gpt-4o,LLM_PARSE1752193339
costco_bill,0.7649687220732797,0.843704268935445,0.673469387755102,0.75,0.868421052631579,0.8048780487804879,gpt-4o-mini,LLM_PARSE1752193613
cvs_coupon,0.9623059866962306,0.9118604542100428,0.8717948717948718,0.918918918918919,0.9444444444444444,0.9315068493150684,gpt-4.1,LLM_PARSE1752192766
cvs_coupon,0.9715536105032823,0.9293902157051029,0.8974358974358975,0.9459459459459459,0.9459459459459459,0.9459459459459459,gpt-4.1-mini,LLM_PARSE1752193103
cvs_coupon,0.9668874172185431,0.9462438241241992,0.9210526315789473,0.9459459459459459,0.9722222222222222,0.9589041095890412,gpt-4o,LLM_PARSE1752193375
cvs_coupon,0.9490022172949002,0.8561732638838288,0.7560975609756098,0.8378378378378378,0.8857142857142857,0.8611111111111112,gpt-4o-mini,LLM_PARSE1752193631
grocery_bill,0.364741641337386,0.5646872951556197,0.3333333333333333,0.3430232558139535,0.921875,0.5,gpt-4.1,LLM_PARSE1752192788
grocery_bill,0.9288835915772745,0.9584150871707698,0.8518518518518519,0.936046511627907,0.9044943820224719,0.9199999999999999,gpt-4.1-mini,LLM_PARSE1752193112
grocery_bill,0.9075144508670521,0.972007104683558,0.8681318681318682,0.9186046511627907,0.9404761904761905,0.9294117647058823,gpt-4o,LLM_PARSE1752193388
grocery_bill,0.31939605110336816,0.653408018573298,0.3743016759776536,0.38953488372093026,0.9054054054054054,0.5447154471544715,gpt-4o-mini,LLM_PARSE1752193642
medical_invoice_sample1,0.27628865979381445,0.6525948468218983,0.49171270718232046,0.49171270718232046,1.0,0.6592592592592593,gpt-4.1,LLM_PARSE1752192825
medical_invoice_sample1,0.39835306227483275,0.5365263411641703,0.4308510638297872,0.44751381215469616,0.9204545454545454,0.6022304832713755,gpt-4.1-mini,LLM_PARSE1752193133
medical_invoice_sample1,0.3889409559512652,0.7301804605852915,0.580110497237569,0.580110497237569,1.0,0.7342657342657343,gpt-4o,LLM_PARSE1752193418
medical_invoice_sample1,0.5210384959713519,0.7731409228849677,0.5837837837837838,0.5966850828729282,0.9642857142857143,0.7372013651877134,gpt-4o-mini,LLM_PARSE1752193669
medical_travel_request_OWCP_957,0.7609702916325974,0.887973703662565,0.7205882352941176,0.7313432835820896,0.98,0.8376068376068375,gpt-4.1,LLM_PARSE1752192856
medical_travel_request_OWCP_957,0.7388641425389755,0.8658437531963121,0.6650246305418719,0.6716417910447762,0.9854014598540146,0.7988165680473374,gpt-4.1-mini,LLM_PARSE1752193149
medical_travel_request_OWCP_957,0.7442988840368753,0.8485150292937816,0.803921568627451,0.8159203980099502,0.9820359281437125,0.8913043478260868,gpt-4o,LLM_PARSE1752193453
medical_travel_request_OWCP_957,0.7607785168760779,0.8874992931050839,0.8177339901477833,0.8258706467661692,0.9880952380952381,0.8997289972899729,gpt-4o-mini,LLM_PARSE1752193685
test_1,0.8538587848932676,0.8499829522942023,0.8085106382978723,0.8636363636363636,0.926829268292683,0.8941176470588236,gpt-4.1,LLM_PARSE1752192879
test_1,0.7073170731707317,0.5692872229094355,0.5576923076923077,0.6590909090909091,0.7837837837837838,0.7160493827160493,gpt-4.1-mini,LLM_PARSE1752193171
test_1,1.0,1.0000000000000009,1.0,1.0,1.0,1.0,gpt-4o,LLM_PARSE1752193475
test_1,0.9879154078549849,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,gpt-4o-mini,LLM_PARSE1752193702
test_2,0.10275229357798166,0.5885438108408162,0.4019607843137255,0.640625,0.5189873417721519,0.5734265734265733,gpt-4.1,LLM_PARSE1752192906
test_2,0.768361581920904,0.9016179598417589,0.7878787878787878,0.8125,0.9629629629629629,0.8813559322033898,gpt-4.1-mini,LLM_PARSE1752193187
test_2,0.3020408163265306,0.8289104158435472,0.61,0.953125,0.6288659793814433,0.7577639751552796,gpt-4o,LLM_PARSE1752193500
test_2,0.45464725643896975,0.8527165377904645,0.8507462686567164,0.890625,0.95,0.9193548387096774,gpt-4o-mini,LLM_PARSE1752193711
test_3,0.3261848027535081,0.4359934422925263,0.29692832764505117,0.2979452054794521,0.9886363636363636,0.45789473684210524,gpt-4.1,LLM_PARSE1752192933
test_3,0.24040066777963273,0.4232113544305079,0.24232081911262798,0.24315068493150685,0.9861111111111112,0.3901098901098901,gpt-4.1-mini,LLM_PARSE1752193207
test_3,0.5386939344643272,0.6623639230615441,0.47440273037542663,0.476027397260274,0.9928571428571429,0.6435185185185186,gpt-4o,LLM_PARSE1752193512
test_3,0.49667300380228135,0.6304139746314001,0.46598639455782315,0.4691780821917808,0.9856115107913669,0.6357308584686774,gpt-4o-mini,LLM_PARSE1752193726
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4.1,LLM_PARSE1752192971
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4.1-mini,LLM_PARSE1752193234
test_4,0.994413407821229,0.9792016292549367,0.9682539682539683,0.9838709677419355,0.9838709677419355,0.9838709677419355,gpt-4o,LLM_PARSE1752193544
test_4,0.7440559440559441,0.6695411051373749,0.8507462686567164,0.9193548387096774,0.9193548387096774,0.9193548387096774,gpt-4o-mini,LLM_PARSE1752193744
test_5,0.5103448275862069,1.0,0.5769230769230769,0.75,0.7142857142857143,0.7317073170731706,gpt-4.1,LLM_PARSE1752192990
test_5,0.847457627118644,0.8542113981255111,0.68,0.85,0.7727272727272727,0.8095238095238095,gpt-4.1-mini,LLM_PARSE1752193247
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gpt-4o,LLM_PARSE1752193555
test_5,1.0,1.0,1.0,1.0,1.0,1.0,gpt-4o-mini,LLM_PARSE1752193752
benchmark,0.8646162690428284,0.9833706893475125,0.9182692307692307,0.9317073170731708,0.9845360824742269,0.9573934837092734,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193557
benchmark,0.8039322774440196,0.9381793603264821,0.8591549295774648,0.8926829268292683,0.9581151832460733,0.9242424242424242,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193636
benchmark,0.8646162690428284,0.9833706893475125,0.9182692307692307,0.9317073170731708,0.9845360824742269,0.9573934837092734,meta-llama/Llama-Vision-Free,LLM_PARSE1752193941
costco_bill,0.38760806916426516,0.6662878518816022,0.456,0.6477272727272727,0.6063829787234043,0.6263736263736264,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193573
costco_bill,0.3740685543964232,0.5988965870697539,0.4596774193548387,0.6477272727272727,0.6129032258064516,0.6298342541436464,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193716
costco_bill,0.38760806916426516,0.6662878518816022,0.456,0.6477272727272727,0.6063829787234043,0.6263736263736264,meta-llama/Llama-Vision-Free,LLM_PARSE1752193976
cvs_coupon,0.8583509513742071,0.8493550473732521,0.6888888888888889,0.8378378378378378,0.7948717948717948,0.8157894736842105,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193580
cvs_coupon,0.598079561042524,0.8734566254000021,0.75,0.8918918918918919,0.825,0.8571428571428571,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193733
cvs_coupon,0.9094736842105263,0.8759877585881911,0.7674418604651163,0.8918918918918919,0.8461538461538461,0.868421052631579,meta-llama/Llama-Vision-Free,LLM_PARSE1752193982
grocery_bill,0.5517522412387939,0.81470846566113,0.6097560975609756,0.7267441860465116,0.7911392405063291,0.7575757575757576,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193585
grocery_bill,0.4329576369996114,0.6752012250444275,0.5107296137339056,0.6918604651162791,0.6611111111111111,0.6761363636363636,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193747
grocery_bill,0.6,0.8182133341234487,0.6116504854368932,0.7325581395348837,0.7875,0.7590361445783131,meta-llama/Llama-Vision-Free,LLM_PARSE1752193987
medical_invoice_sample1,0.681783243658724,0.9472935340823311,0.7845303867403315,0.7845303867403315,1.0,0.8792569659442725,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193596
medical_invoice_sample1,0.3680649526387009,0.7954352821899798,0.5469613259668509,0.5469613259668509,1.0,0.7071428571428572,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193771
medical_invoice_sample1,0.6357254290171607,0.8977921331555682,0.7417582417582418,0.7458563535911602,0.9926470588235294,0.8517350157728707,meta-llama/Llama-Vision-Free,LLM_PARSE1752194006
medical_travel_request_OWCP_957,0.949358541525996,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193601
medical_travel_request_OWCP_957,0.9331532748143146,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193799
medical_travel_request_OWCP_957,0.949358541525996,0.9914563670839204,0.9320388349514563,0.9552238805970149,0.9746192893401016,0.9648241206030151,meta-llama/Llama-Vision-Free,LLM_PARSE1752194013
test_1,0.8388214904679376,0.5907352142521325,0.7727272727272727,0.7727272727272727,1.0,0.8717948717948718,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193607
test_1,0.9196141479099679,0.9134550238575389,0.8409090909090909,0.8409090909090909,1.0,0.9135802469135803,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193827
test_1,0.8099467140319716,0.5752160041929604,0.75,0.75,1.0,0.8571428571428571,meta-llama/Llama-Vision-Free,LLM_PARSE1752194021
test_2,0.4387755102040816,0.7944661768602863,0.8461538461538461,0.859375,0.9821428571428571,0.9166666666666665,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193610
test_2,0.33691164327002476,0.7878664683463091,0.5125,0.640625,0.7192982456140351,0.6776859504132231,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193844
test_2,0.4387755102040816,0.7944661768602863,0.8461538461538461,0.859375,0.9821428571428571,0.9166666666666665,meta-llama/Llama-Vision-Free,LLM_PARSE1752194027
test_3,0.3956175298804781,0.7941857105413662,0.6462585034013606,0.6506849315068494,0.9895833333333334,0.7851239669421488,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193616
test_3,0.6038095238095238,0.8853783887562813,0.7781569965870307,0.7808219178082192,0.9956331877729258,0.8752399232245681,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193858
test_3,0.42868525896414345,0.7954742905549371,0.6518771331058021,0.6541095890410958,0.9947916666666666,0.7892561983471074,meta-llama/Llama-Vision-Free,LLM_PARSE1752194033
test_4,0.9597701149425287,0.9428738047322169,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193627
test_4,0.4896907216494845,0.8594297517138777,0.9104477611940298,0.9838709677419355,0.9242424242424242,0.9531249999999999,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193905
test_4,0.9597701149425287,0.9428738047322169,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,meta-llama/Llama-Vision-Free,LLM_PARSE1752194065
test_5,0.5168067226890757,0.9785654006271267,1.0,1.0,1.0,1.0,meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo,LLM_PARSE1752193630
test_5,0.29411764705882354,0.7184721906586644,0.43478260869565216,1.0,0.43478260869565216,0.6060606060606061,meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo,LLM_PARSE1752193917
test_5,0.5168067226890757,0.9785654006271267,1.0,1.0,1.0,1.0,meta-llama/Llama-Vision-Free,LLM_PARSE1752194071
benchmark,0.9696622781911849,0.9847441394559939,0.9375,0.9512195121951219,0.9848484848484849,0.967741935483871,mistral-ocr-latest,LLM_PARSE1752338656
costco_bill,0.9398907103825137,0.9688803177143563,0.9130434782608695,0.9545454545454546,0.9545454545454546,0.9545454545454546,mistral-ocr-latest,LLM_PARSE1752338675
cvs_coupon,0.8979591836734694,0.8140511424435039,0.7560975609756098,0.8378378378378378,0.8857142857142857,0.8611111111111112,mistral-ocr-latest,LLM_PARSE1752338681
grocery_bill,0.7866388308977036,0.9479507228644266,0.8260869565217391,0.8837209302325582,0.926829268292683,0.9047619047619047,mistral-ocr-latest,LLM_PARSE1752338687
medical_invoice_sample1,0.6483983018139714,0.93463637677369,0.7692307692307693,0.7734806629834254,0.9929078014184397,0.8695652173913044,mistral-ocr-latest,LLM_PARSE1752338694
medical_travel_request_OWCP_957,0.9250785105428443,0.9916745936489192,0.9420289855072463,0.9701492537313433,0.9701492537313433,0.9701492537313433,mistral-ocr-latest,LLM_PARSE1752338697
test_1,0.89937106918239,0.6904920269308475,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,mistral-ocr-latest,LLM_PARSE1752338700
test_2,0.8858757062146893,0.956789666727399,1.0,1.0,1.0,1.0,mistral-ocr-latest,LLM_PARSE1752338703
test_3,0.9852185089974294,0.9961134622761473,0.9863013698630136,0.9863013698630136,1.0,0.993103448275862,mistral-ocr-latest,LLM_PARSE1752338708
test_4,0.9670014347202296,0.9484458356080679,0.9206349206349206,0.9354838709677419,0.9830508474576272,0.9586776859504132,mistral-ocr-latest,LLM_PARSE1752338715
test_5,0.8896551724137931,1.0,1.0,1.0,1.0,1.0,mistral-ocr-latest,LLM_PARSE1752338717
benchmark,0.937950937950938,0.9931595734254032,0.975609756097561,0.975609756097561,1.0,0.9876543209876543,google/gemma-3-27b-it,LLM_PARSE1752193189
benchmark,0.8642487046632125,0.89213958158887,0.9245283018867925,0.9560975609756097,0.9655172413793104,0.9607843137254901,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193459
benchmark,0.21546134663341646,0.29284676308689694,0.15458937198067632,0.15609756097560976,0.9411764705882353,0.2677824267782427,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193044
costco_bill,0.852589641434263,0.9561488518737534,0.9333333333333333,0.9545454545454546,0.9767441860465116,0.9655172413793104,google/gemma-3-27b-it,LLM_PARSE1752193225
costco_bill,0.7669753086419753,0.9422227301386843,0.8817204301075269,0.9318181818181818,0.9425287356321839,0.9371428571428572,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193495
costco_bill,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193072
cvs_coupon,0.12922173274596183,0.4246115008733564,0.3333333333333333,0.40540540540540543,0.6521739130434783,0.5,google/gemma-3-27b-it,LLM_PARSE1752193239
cvs_coupon,0.676595744680851,0.825943861885887,0.7857142857142857,0.8918918918918919,0.868421052631579,0.88,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193505
cvs_coupon,0.5125,0.915390834704615,0.8,0.8648648648648649,0.9142857142857143,0.888888888888889,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193081
grocery_bill,0.8597014925373134,0.968767180260828,0.88268156424581,0.9186046511627907,0.9575757575757575,0.9376854599406528,google/gemma-3-27b-it,LLM_PARSE1752193279
grocery_bill,0.5961538461538461,0.895542566523866,0.7055837563451777,0.8081395348837209,0.8475609756097561,0.8273809523809523,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193510
grocery_bill,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193095
medical_invoice_sample1,0.5514157973174366,0.9438375682019277,0.8369565217391305,0.850828729281768,0.9808917197452229,0.9112426035502958,google/gemma-3-27b-it,LLM_PARSE1752193296
medical_invoice_sample1,0.5238095238095238,0.8654821601615998,0.6885245901639344,0.6961325966850829,0.984375,0.8155339805825242,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193526
medical_invoice_sample1,0.5923984272608126,0.9454713915672728,0.8115183246073299,0.856353591160221,0.9393939393939394,0.8959537572254335,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193104
medical_travel_request_OWCP_957,0.8744313011828936,0.9823788973625543,0.893719806763285,0.9203980099502488,0.9685863874345549,0.9438775510204082,google/gemma-3-27b-it,LLM_PARSE1752193319
medical_travel_request_OWCP_957,0.9333633498424133,0.9904869530574749,0.9227053140096618,0.9502487562189055,0.9695431472081218,0.9597989949748743,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193533
medical_travel_request_OWCP_957,0.921727395411606,0.9650691379973317,0.8701923076923077,0.900497512437811,0.9627659574468085,0.9305912596401028,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193125
test_1,0.9577039274924471,0.9859861817544773,0.9772727272727273,0.9772727272727273,1.0,0.9885057471264368,google/gemma-3-27b-it,LLM_PARSE1752193363
test_1,0.8682432432432432,0.8513135168957464,0.7272727272727273,0.7272727272727273,1.0,0.8421052631578948,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193544
test_1,0.7553551296505073,0.9299846825779932,0.9565217391304348,1.0,0.9565217391304348,0.9777777777777777,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193132
test_2,0.09429280397022333,0.2303955584794412,0.1527777777777778,0.171875,0.5789473684210527,0.2650602409638554,google/gemma-3-27b-it,LLM_PARSE1752193372
test_2,0.09375,0.2358455734776353,0.2153846153846154,0.21875,0.9333333333333333,0.35443037974683544,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193551
test_2,0.0,0.0,0.0,0.0,0.0,0.0,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193148
test_3,0.5540571428571428,0.6965109155704817,0.5136986301369864,0.5136986301369864,1.0,0.6787330316742082,google/gemma-3-27b-it,LLM_PARSE1752193402
test_3,0.5178280121183874,0.6601984018137722,0.4828767123287671,0.4828767123287671,1.0,0.6512702078521939,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193561
test_3,0.6811800610376398,0.8244587258416712,0.7064846416382252,0.708904109589041,0.9951923076923077,0.8279999999999998,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193154
test_4,0.9404934687953556,0.9209651369162134,0.9047619047619048,0.9193548387096774,0.9827586206896551,0.95,google/gemma-3-27b-it,LLM_PARSE1752193426
test_4,0.49577464788732395,0.6577918775316766,0.835820895522388,0.9032258064516129,0.9180327868852459,0.9105691056910569,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193570
test_4,0.509915014164306,0.9384222336078193,0.90625,0.9354838709677419,0.9666666666666667,0.9508196721311476,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193179
test_5,0.11688311688311688,0.145217121515546,0.1,0.1,1.0,0.18181818181818182,google/gemma-3-27b-it,LLM_PARSE1752193431
test_5,0.9830508474576272,0.9804331969030868,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,microsoft/phi-4-multimodal-instruct,LLM_PARSE1752193575
test_5,0.9698996655518395,0.9804331969030868,0.9523809523809523,1.0,0.9523809523809523,0.975609756097561,qwen/qwen-2.5-vl-7b-instruct,LLM_PARSE1752193187
benchmark,0.3131591013296653,0.6959250370277419,0.3510204081632653,0.8390243902439024,0.37636761487964987,0.5196374622356495,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327183
costco_bill,0.20055710306406685,0.20747572194890912,0.1590909090909091,0.1590909090909091,1.0,0.2745098039215686,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327423
cvs_coupon,0.9384965831435079,0.9380089897463492,0.8048780487804879,0.8918918918918919,0.8918918918918919,0.8918918918918919,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327533
grocery_bill,0.05804416403785489,0.068531396277478,0.06857142857142857,0.06976744186046512,0.8,0.1283422459893048,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327568
medical_invoice_sample1,0.4715447154471545,0.5420295004635525,0.3879781420765027,0.39226519337016574,0.9726027397260274,0.5590551181102362,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327679
medical_travel_request_OWCP_957,0.9116704805491991,0.9846151720521983,0.9,0.9402985074626866,0.9545454545454546,0.9473684210526316,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327801
test_1,0.9142857142857143,0.9045941094942251,0.8409090909090909,0.8409090909090909,1.0,0.9135802469135803,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327887
test_2,0.07761828814460393,0.07491099737364178,0.07368421052631578,0.21875,0.1,0.13725490196078433,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752327917
test_3,0.9631433674297776,0.9852545986269438,0.9692832764505119,0.9726027397260274,0.9964912280701754,0.9844020797227038,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328039
test_4,0.42105263157894735,0.41443971976512034,0.30158730158730157,0.3064516129032258,0.95,0.4634146341463415,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328144
test_5,0.08142116950407106,0.595844223293273,0.75,0.75,1.0,0.8571428571428571,ds4sd/SmolDocling-256M-preview,LLM_PARSE1752328266
|