amirali1985 commited on
Commit
5166f80
·
verified ·
1 Parent(s): 5b51410

Upload add_sub_sorl_v1_abs10_K1_100K_2L1H128d/metrics.json with huggingface_hub

Browse files
add_sub_sorl_v1_abs10_K1_100K_2L1H128d/metrics.json CHANGED
@@ -5030,502 +5030,567 @@
5030
  "K": null,
5031
  "mode": "sft",
5032
  "n_digits": 6,
5033
- "n_per_split": 100
5034
  },
5035
  "splits": {
5036
  "add_S0": {
5037
- "full_accuracy": 0.03,
5038
- "n_examples": 100,
 
5039
  "per_subtask": {
5040
  "SA": {
5041
- "accuracy": 0.5933884297520661,
5042
- "count": 605
5043
  },
5044
  "SS": {
5045
- "accuracy": 0.8526315789473684,
5046
- "count": 95
5047
  }
5048
  }
5049
  },
5050
  "add_S1": {
5051
  "full_accuracy": 0.0,
5052
- "n_examples": 100,
 
5053
  "per_subtask": {
5054
  "SA": {
5055
- "accuracy": 0.5833333333333334,
5056
- "count": 204
5057
  },
5058
  "SC": {
5059
- "accuracy": 0.41420118343195267,
5060
- "count": 169
5061
  },
5062
  "SS": {
5063
- "accuracy": 0.9032258064516129,
5064
- "count": 31
5065
  },
5066
  "UC": {
5067
- "accuracy": 0.5202702702702703,
5068
- "count": 296
5069
  }
5070
  }
5071
  },
5072
  "add_S2": {
5073
- "full_accuracy": 0.01,
5074
- "n_examples": 100,
 
5075
  "per_subtask": {
5076
  "SA": {
5077
- "accuracy": 0.588957055214724,
5078
- "count": 163
5079
  },
5080
  "SC": {
5081
- "accuracy": 0.3384615384615385,
5082
- "count": 130
5083
  },
5084
  "SS": {
5085
- "accuracy": 0.6781609195402298,
5086
- "count": 87
5087
  },
5088
  "UC": {
5089
- "accuracy": 0.5911330049261084,
5090
- "count": 203
5091
  },
5092
  "US": {
5093
- "accuracy": 0.49572649572649574,
5094
- "count": 117
5095
  }
5096
  }
5097
  },
5098
  "add_S3": {
5099
- "full_accuracy": 0.0,
5100
- "n_examples": 100,
 
5101
  "per_subtask": {
5102
  "SA": {
5103
- "accuracy": 0.6033057851239669,
5104
- "count": 121
5105
  },
5106
  "SC": {
5107
- "accuracy": 0.2809917355371901,
5108
- "count": 121
5109
  },
5110
  "SS": {
5111
- "accuracy": 0.7959183673469388,
5112
- "count": 49
5113
  },
5114
  "UC": {
5115
- "accuracy": 0.510752688172043,
5116
- "count": 186
5117
  },
5118
  "US": {
5119
- "accuracy": 0.4080717488789238,
5120
- "count": 223
5121
  }
5122
  }
5123
  },
5124
  "add_S4": {
5125
- "full_accuracy": 0.03,
5126
- "n_examples": 100,
 
5127
  "per_subtask": {
5128
  "SA": {
5129
- "accuracy": 0.7115384615384616,
5130
- "count": 104
5131
  },
5132
  "SC": {
5133
- "accuracy": 0.27358490566037735,
5134
- "count": 106
5135
  },
5136
  "SS": {
5137
- "accuracy": 0.7391304347826086,
5138
- "count": 23
5139
  },
5140
  "UC": {
5141
- "accuracy": 0.56875,
5142
- "count": 160
5143
  },
5144
  "US": {
5145
- "accuracy": 0.31596091205211724,
5146
- "count": 307
5147
  }
5148
  }
5149
  },
5150
  "add_S5": {
5151
- "full_accuracy": 0.01,
5152
- "n_examples": 100,
 
5153
  "per_subtask": {
5154
  "SA": {
5155
- "accuracy": 0.6,
5156
- "count": 100
5157
  },
5158
  "SC": {
5159
- "accuracy": 0.25,
5160
- "count": 100
5161
  },
5162
  "UC": {
5163
- "accuracy": 0.34,
5164
- "count": 100
5165
  },
5166
  "US": {
5167
- "accuracy": 0.115,
5168
- "count": 400
5169
  }
5170
  }
5171
  },
5172
  "add_S6": {
5173
- "full_accuracy": 0.17,
5174
- "n_examples": 100,
 
5175
  "per_subtask": {
5176
  "SC": {
5177
- "accuracy": 0.26,
5178
- "count": 100
5179
  },
5180
  "UC": {
5181
- "accuracy": 0.84,
5182
- "count": 100
5183
  },
5184
  "US": {
5185
- "accuracy": 0.672,
5186
- "count": 500
5187
  }
5188
  }
5189
  },
5190
  "add_random": {
5191
- "full_accuracy": 0.02,
 
5192
  "n_examples": 200,
5193
  "per_subtask": {
5194
  "SA": {
5195
- "accuracy": 0.5704697986577181,
5196
- "count": 447
5197
  },
5198
  "SC": {
5199
- "accuracy": 0.459375,
5200
- "count": 320
5201
  },
5202
  "SS": {
5203
- "accuracy": 0.8214285714285714,
5204
- "count": 56
5205
  },
5206
  "UC": {
5207
- "accuracy": 0.5671077504725898,
5208
- "count": 529
5209
  },
5210
  "US": {
5211
- "accuracy": 0.4375,
5212
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5213
  }
5214
  }
5215
  },
5216
  "add_C3": {
5217
  "full_accuracy": 0.0,
5218
- "n_examples": 100,
 
5219
  "per_subtask": {
5220
  "SA": {
5221
- "accuracy": 0.6366666666666667,
5222
- "count": 300
5223
  },
5224
  "SC": {
5225
- "accuracy": 0.28,
5226
- "count": 100
5227
  },
5228
  "UC": {
5229
- "accuracy": 0.38860103626943004,
5230
- "count": 193
5231
  },
5232
  "US": {
5233
- "accuracy": 0.205607476635514,
5234
- "count": 107
5235
  }
5236
  }
5237
  },
5238
  "add_C4": {
5239
  "full_accuracy": 0.0,
5240
- "n_examples": 100,
 
5241
  "per_subtask": {
5242
  "SA": {
5243
- "accuracy": 0.68,
5244
- "count": 200
5245
  },
5246
  "SC": {
5247
- "accuracy": 0.27,
5248
- "count": 100
5249
  },
5250
  "UC": {
5251
- "accuracy": 0.33203125,
5252
- "count": 256
5253
  },
5254
  "US": {
5255
- "accuracy": 0.2916666666666667,
5256
- "count": 144
5257
  }
5258
  }
5259
  },
5260
  "add_C5": {
5261
- "full_accuracy": 0.0,
5262
- "n_examples": 100,
 
5263
  "per_subtask": {
5264
  "SA": {
5265
- "accuracy": 0.63,
5266
- "count": 100
5267
  },
5268
  "SC": {
5269
- "accuracy": 0.21,
5270
- "count": 100
5271
  },
5272
  "UC": {
5273
- "accuracy": 0.4150326797385621,
5274
- "count": 306
5275
  },
5276
  "US": {
5277
- "accuracy": 0.2268041237113402,
5278
- "count": 194
5279
  }
5280
  }
5281
  },
5282
  "add_C6": {
5283
- "full_accuracy": 0.01,
5284
- "n_examples": 100,
 
5285
  "per_subtask": {
5286
  "SC": {
5287
- "accuracy": 0.3,
5288
- "count": 100
5289
  },
5290
  "UC": {
5291
- "accuracy": 0.44808743169398907,
5292
- "count": 366
5293
  },
5294
  "US": {
5295
- "accuracy": 0.5854700854700855,
5296
- "count": 234
5297
  }
5298
  }
5299
  },
5300
  "sub_M0": {
5301
- "full_accuracy": 0.21,
5302
- "n_examples": 100,
 
5303
  "per_subtask": {
5304
  "MD": {
5305
- "accuracy": 0.7903494176372712,
5306
- "count": 601
5307
  },
5308
  "ME": {
5309
- "accuracy": 0.9292929292929293,
5310
- "count": 99
5311
  }
5312
  }
5313
  },
5314
  "sub_M1": {
5315
- "full_accuracy": 0.04,
5316
- "n_examples": 100,
 
5317
  "per_subtask": {
5318
  "MD": {
5319
- "accuracy": 0.8172043010752689,
5320
- "count": 279
5321
  },
5322
  "MB": {
5323
- "accuracy": 0.6758620689655173,
5324
- "count": 145
5325
  },
5326
  "ME": {
5327
- "accuracy": 0.9583333333333334,
5328
- "count": 24
5329
  },
5330
  "UB": {
5331
- "accuracy": 0.5753968253968254,
5332
- "count": 252
5333
  }
5334
  }
5335
  },
5336
  "sub_M2": {
5337
- "full_accuracy": 0.04,
5338
- "n_examples": 100,
 
5339
  "per_subtask": {
5340
  "MD": {
5341
- "accuracy": 0.863849765258216,
5342
- "count": 213
5343
  },
5344
  "MB": {
5345
- "accuracy": 0.672566371681416,
5346
- "count": 113
5347
  },
5348
  "ME": {
5349
- "accuracy": 0.9411764705882353,
5350
- "count": 85
5351
  },
5352
  "UB": {
5353
- "accuracy": 0.4696132596685083,
5354
- "count": 181
5355
  },
5356
  "UD": {
5357
- "accuracy": 0.4444444444444444,
5358
- "count": 108
5359
  }
5360
  }
5361
  },
5362
  "sub_M3": {
5363
  "full_accuracy": 0.04,
5364
- "n_examples": 100,
 
5365
  "per_subtask": {
5366
  "MD": {
5367
- "accuracy": 0.9162011173184358,
5368
- "count": 179
5369
  },
5370
  "MB": {
5371
- "accuracy": 0.5922330097087378,
5372
- "count": 103
5373
  },
5374
  "ME": {
5375
- "accuracy": 0.9285714285714286,
5376
- "count": 56
5377
  },
5378
  "UB": {
5379
- "accuracy": 0.5033557046979866,
5380
- "count": 149
5381
  },
5382
  "UD": {
5383
- "accuracy": 0.22065727699530516,
5384
- "count": 213
5385
  }
5386
  }
5387
  },
5388
  "sub_M4": {
5389
- "full_accuracy": 0.01,
5390
- "n_examples": 100,
 
5391
  "per_subtask": {
5392
  "MD": {
5393
- "accuracy": 0.79,
5394
- "count": 200
5395
  },
5396
  "MB": {
5397
- "accuracy": 0.44,
5398
- "count": 100
5399
  },
5400
  "UB": {
5401
- "accuracy": 0.57,
5402
- "count": 100
5403
  },
5404
  "UD": {
5405
- "accuracy": 0.11,
5406
- "count": 300
5407
  }
5408
  }
5409
  },
5410
  "sub_M5": {
5411
- "full_accuracy": 0.05,
5412
- "n_examples": 100,
 
5413
  "per_subtask": {
5414
  "MD": {
5415
  "accuracy": 1.0,
5416
- "count": 100
5417
  },
5418
  "MB": {
5419
- "accuracy": 0.42,
5420
- "count": 100
5421
  },
5422
  "UB": {
5423
- "accuracy": 0.61,
5424
- "count": 100
5425
  },
5426
  "UD": {
5427
- "accuracy": 0.11,
5428
- "count": 400
5429
  }
5430
  }
5431
  },
5432
  "sub_random": {
5433
- "full_accuracy": 0.095,
 
5434
  "n_examples": 200,
5435
  "per_subtask": {
5436
  "MD": {
5437
- "accuracy": 0.7883333333333333,
5438
- "count": 600
5439
  },
5440
  "MB": {
5441
- "accuracy": 0.6853932584269663,
5442
- "count": 267
5443
  },
5444
  "ME": {
5445
- "accuracy": 0.9433962264150944,
5446
  "count": 53
5447
  },
5448
  "UB": {
5449
- "accuracy": 0.6264236902050114,
5450
- "count": 439
5451
  },
5452
  "UD": {
5453
- "accuracy": 0.43902439024390244,
5454
- "count": 41
5455
  }
5456
  }
5457
  },
5458
  "sub_B3": {
5459
  "full_accuracy": 0.02,
5460
- "n_examples": 100,
 
5461
  "per_subtask": {
5462
  "MD": {
5463
- "accuracy": 0.7133333333333334,
5464
- "count": 300
5465
  },
5466
  "MB": {
5467
- "accuracy": 0.67,
5468
- "count": 100
5469
  },
5470
  "UB": {
5471
- "accuracy": 0.5279187817258884,
5472
- "count": 197
5473
  },
5474
  "UD": {
5475
- "accuracy": 0.3300970873786408,
5476
- "count": 103
5477
  }
5478
  }
5479
  },
5480
  "sub_B4": {
5481
- "full_accuracy": 0.01,
5482
- "n_examples": 100,
 
5483
  "per_subtask": {
5484
  "MD": {
5485
- "accuracy": 0.765,
5486
- "count": 200
5487
  },
5488
  "MB": {
5489
- "accuracy": 0.59,
5490
- "count": 100
5491
  },
5492
  "UB": {
5493
- "accuracy": 0.38866396761133604,
5494
- "count": 247
5495
  },
5496
  "UD": {
5497
- "accuracy": 0.3333333333333333,
5498
- "count": 153
5499
  }
5500
  }
5501
  },
5502
  "sub_B5": {
5503
- "full_accuracy": 0.0,
5504
- "n_examples": 100,
 
5505
  "per_subtask": {
5506
  "MD": {
5507
  "accuracy": 1.0,
5508
- "count": 100
5509
  },
5510
  "MB": {
5511
- "accuracy": 0.53,
5512
- "count": 100
5513
  },
5514
  "UB": {
5515
- "accuracy": 0.46308724832214765,
5516
- "count": 298
5517
  },
5518
  "UD": {
5519
- "accuracy": 0.31683168316831684,
5520
- "count": 202
5521
  }
5522
  }
5523
  }
5524
  },
5525
  "summary": {
5526
- "overall_accuracy": 0.03791666666666667,
5527
- "total_examples": 2400,
5528
- "n_splits": 22
 
5529
  }
5530
  },
5531
  "sorl_eval": {
@@ -5534,502 +5599,567 @@
5534
  "K": 1,
5535
  "mode": "sorl",
5536
  "n_digits": 6,
5537
- "n_per_split": 100
5538
  },
5539
  "splits": {
5540
  "add_S0": {
5541
  "full_accuracy": 0.98,
5542
- "n_examples": 100,
 
5543
  "per_subtask": {
5544
  "SA": {
5545
- "accuracy": 0.996694214876033,
5546
- "count": 605
5547
  },
5548
  "SS": {
5549
  "accuracy": 1.0,
5550
- "count": 95
5551
  }
5552
  }
5553
  },
5554
  "add_S1": {
5555
- "full_accuracy": 0.93,
5556
- "n_examples": 100,
 
5557
  "per_subtask": {
5558
  "SA": {
5559
- "accuracy": 0.9852941176470589,
5560
- "count": 204
5561
  },
5562
  "SC": {
5563
- "accuracy": 0.9822485207100592,
5564
- "count": 169
5565
  },
5566
  "SS": {
5567
- "accuracy": 0.967741935483871,
5568
- "count": 31
5569
  },
5570
  "UC": {
5571
- "accuracy": 0.9932432432432432,
5572
- "count": 296
5573
  }
5574
  }
5575
  },
5576
  "add_S2": {
5577
- "full_accuracy": 0.81,
5578
- "n_examples": 100,
 
5579
  "per_subtask": {
5580
  "SA": {
5581
- "accuracy": 0.9877300613496932,
5582
- "count": 163
5583
  },
5584
  "SC": {
5585
- "accuracy": 0.9923076923076923,
5586
- "count": 130
5587
  },
5588
  "SS": {
5589
- "accuracy": 1.0,
5590
- "count": 87
5591
  },
5592
  "UC": {
5593
- "accuracy": 0.916256157635468,
5594
- "count": 203
5595
  },
5596
  "US": {
5597
  "accuracy": 1.0,
5598
- "count": 117
5599
  }
5600
  }
5601
  },
5602
  "add_S3": {
5603
- "full_accuracy": 0.63,
5604
- "n_examples": 100,
 
5605
  "per_subtask": {
5606
  "SA": {
5607
  "accuracy": 1.0,
5608
- "count": 121
5609
  },
5610
  "SC": {
5611
- "accuracy": 0.9752066115702479,
5612
- "count": 121
5613
  },
5614
  "SS": {
5615
  "accuracy": 1.0,
5616
- "count": 49
5617
  },
5618
  "UC": {
5619
- "accuracy": 0.8118279569892473,
5620
- "count": 186
5621
  },
5622
  "US": {
5623
  "accuracy": 1.0,
5624
- "count": 223
5625
  }
5626
  }
5627
  },
5628
  "add_S4": {
5629
- "full_accuracy": 0.59,
5630
- "n_examples": 100,
 
5631
  "per_subtask": {
5632
  "SA": {
5633
  "accuracy": 1.0,
5634
- "count": 104
5635
  },
5636
  "SC": {
5637
  "accuracy": 1.0,
5638
- "count": 106
5639
  },
5640
  "SS": {
5641
  "accuracy": 1.0,
5642
- "count": 23
5643
  },
5644
  "UC": {
5645
- "accuracy": 0.75,
5646
- "count": 160
5647
  },
5648
  "US": {
5649
- "accuracy": 0.9739413680781759,
5650
- "count": 307
5651
  }
5652
  }
5653
  },
5654
  "add_S5": {
5655
- "full_accuracy": 0.34,
5656
- "n_examples": 100,
 
5657
  "per_subtask": {
5658
  "SA": {
5659
  "accuracy": 1.0,
5660
- "count": 100
5661
  },
5662
  "SC": {
5663
  "accuracy": 1.0,
5664
- "count": 100
5665
  },
5666
  "UC": {
5667
- "accuracy": 0.39,
5668
- "count": 100
5669
  },
5670
  "US": {
5671
- "accuracy": 0.85,
5672
- "count": 400
5673
  }
5674
  }
5675
  },
5676
  "add_S6": {
5677
- "full_accuracy": 0.45,
5678
- "n_examples": 100,
 
5679
  "per_subtask": {
5680
  "SC": {
5681
  "accuracy": 1.0,
5682
- "count": 100
5683
  },
5684
  "UC": {
5685
- "accuracy": 0.53,
5686
- "count": 100
5687
  },
5688
  "US": {
5689
- "accuracy": 0.82,
5690
- "count": 500
5691
  }
5692
  }
5693
  },
5694
  "add_random": {
5695
- "full_accuracy": 0.945,
 
5696
  "n_examples": 200,
5697
  "per_subtask": {
5698
  "SA": {
5699
- "accuracy": 0.9932885906040269,
5700
- "count": 447
5701
  },
5702
  "SC": {
5703
- "accuracy": 0.996875,
5704
- "count": 320
5705
  },
5706
  "SS": {
5707
  "accuracy": 1.0,
5708
- "count": 56
5709
  },
5710
  "UC": {
5711
- "accuracy": 0.9867674858223062,
5712
- "count": 529
5713
  },
5714
  "US": {
5715
  "accuracy": 1.0,
5716
- "count": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5717
  }
5718
  }
5719
  },
5720
  "add_C3": {
5721
- "full_accuracy": 0.78,
5722
- "n_examples": 100,
 
5723
  "per_subtask": {
5724
  "SA": {
5725
  "accuracy": 1.0,
5726
- "count": 300
5727
  },
5728
  "SC": {
5729
  "accuracy": 1.0,
5730
- "count": 100
5731
  },
5732
  "UC": {
5733
- "accuracy": 0.8860103626943006,
5734
- "count": 193
5735
  },
5736
  "US": {
5737
  "accuracy": 1.0,
5738
- "count": 107
5739
  }
5740
  }
5741
  },
5742
  "add_C4": {
5743
- "full_accuracy": 0.77,
5744
- "n_examples": 100,
 
5745
  "per_subtask": {
5746
  "SA": {
5747
  "accuracy": 1.0,
5748
- "count": 200
5749
  },
5750
  "SC": {
5751
  "accuracy": 1.0,
5752
- "count": 100
5753
  },
5754
  "UC": {
5755
- "accuracy": 0.90625,
5756
- "count": 256
5757
  },
5758
  "US": {
5759
- "accuracy": 0.9722222222222222,
5760
- "count": 144
5761
  }
5762
  }
5763
  },
5764
  "add_C5": {
5765
- "full_accuracy": 0.75,
5766
- "n_examples": 100,
 
5767
  "per_subtask": {
5768
  "SA": {
5769
  "accuracy": 1.0,
5770
- "count": 100
5771
  },
5772
  "SC": {
5773
- "accuracy": 0.99,
5774
- "count": 100
5775
  },
5776
  "UC": {
5777
- "accuracy": 0.9281045751633987,
5778
- "count": 306
5779
  },
5780
  "US": {
5781
- "accuracy": 0.9587628865979382,
5782
- "count": 194
5783
  }
5784
  }
5785
  },
5786
  "add_C6": {
5787
- "full_accuracy": 0.69,
5788
- "n_examples": 100,
 
5789
  "per_subtask": {
5790
  "SC": {
5791
  "accuracy": 1.0,
5792
- "count": 100
5793
  },
5794
  "UC": {
5795
- "accuracy": 0.912568306010929,
5796
- "count": 366
5797
  },
5798
  "US": {
5799
- "accuracy": 0.9829059829059829,
5800
- "count": 234
5801
  }
5802
  }
5803
  },
5804
  "sub_M0": {
5805
- "full_accuracy": 0.87,
5806
- "n_examples": 100,
 
5807
  "per_subtask": {
5808
  "MD": {
5809
- "accuracy": 0.978369384359401,
5810
- "count": 601
5811
  },
5812
  "ME": {
5813
  "accuracy": 1.0,
5814
- "count": 99
5815
  }
5816
  }
5817
  },
5818
  "sub_M1": {
5819
- "full_accuracy": 0.88,
5820
- "n_examples": 100,
 
5821
  "per_subtask": {
5822
  "MD": {
5823
- "accuracy": 0.982078853046595,
5824
- "count": 279
5825
  },
5826
  "MB": {
5827
- "accuracy": 0.993103448275862,
5828
- "count": 145
5829
  },
5830
  "ME": {
5831
  "accuracy": 1.0,
5832
- "count": 24
5833
  },
5834
  "UB": {
5835
- "accuracy": 0.9761904761904762,
5836
- "count": 252
5837
  }
5838
  }
5839
  },
5840
  "sub_M2": {
5841
- "full_accuracy": 0.68,
5842
- "n_examples": 100,
 
5843
  "per_subtask": {
5844
  "MD": {
5845
- "accuracy": 0.9859154929577465,
5846
- "count": 213
5847
  },
5848
  "MB": {
5849
- "accuracy": 0.9823008849557522,
5850
- "count": 113
5851
  },
5852
  "ME": {
5853
  "accuracy": 1.0,
5854
- "count": 85
5855
  },
5856
  "UB": {
5857
- "accuracy": 0.8342541436464088,
5858
- "count": 181
5859
  },
5860
  "UD": {
5861
  "accuracy": 1.0,
5862
- "count": 108
5863
  }
5864
  }
5865
  },
5866
  "sub_M3": {
5867
- "full_accuracy": 0.5,
5868
- "n_examples": 100,
 
5869
  "per_subtask": {
5870
  "MD": {
5871
- "accuracy": 0.9888268156424581,
5872
- "count": 179
5873
  },
5874
  "MB": {
5875
- "accuracy": 0.9514563106796117,
5876
- "count": 103
5877
  },
5878
  "ME": {
5879
  "accuracy": 1.0,
5880
- "count": 56
5881
  },
5882
  "UB": {
5883
- "accuracy": 0.6778523489932886,
5884
- "count": 149
5885
  },
5886
  "UD": {
5887
  "accuracy": 1.0,
5888
- "count": 213
5889
  }
5890
  }
5891
  },
5892
  "sub_M4": {
5893
- "full_accuracy": 0.33,
5894
- "n_examples": 100,
 
5895
  "per_subtask": {
5896
  "MD": {
5897
  "accuracy": 1.0,
5898
- "count": 200
5899
  },
5900
  "MB": {
5901
  "accuracy": 1.0,
5902
- "count": 100
5903
  },
5904
  "UB": {
5905
- "accuracy": 0.43,
5906
- "count": 100
5907
  },
5908
  "UD": {
5909
- "accuracy": 0.9333333333333333,
5910
- "count": 300
5911
  }
5912
  }
5913
  },
5914
  "sub_M5": {
5915
  "full_accuracy": 0.08,
5916
- "n_examples": 100,
 
5917
  "per_subtask": {
5918
  "MD": {
5919
  "accuracy": 1.0,
5920
- "count": 100
5921
  },
5922
  "MB": {
5923
  "accuracy": 1.0,
5924
- "count": 100
5925
  },
5926
  "UB": {
5927
- "accuracy": 0.36,
5928
- "count": 100
5929
  },
5930
  "UD": {
5931
- "accuracy": 0.725,
5932
- "count": 400
5933
  }
5934
  }
5935
  },
5936
  "sub_random": {
5937
- "full_accuracy": 0.85,
 
5938
  "n_examples": 200,
5939
  "per_subtask": {
5940
  "MD": {
5941
- "accuracy": 0.99,
5942
- "count": 600
5943
  },
5944
  "MB": {
5945
- "accuracy": 0.9812734082397003,
5946
- "count": 267
5947
  },
5948
  "ME": {
5949
  "accuracy": 1.0,
5950
  "count": 53
5951
  },
5952
  "UB": {
5953
- "accuracy": 0.9567198177676538,
5954
- "count": 439
5955
  },
5956
  "UD": {
5957
  "accuracy": 1.0,
5958
- "count": 41
5959
  }
5960
  }
5961
  },
5962
  "sub_B3": {
5963
- "full_accuracy": 0.69,
5964
- "n_examples": 100,
 
5965
  "per_subtask": {
5966
  "MD": {
5967
- "accuracy": 0.9833333333333333,
5968
- "count": 300
5969
  },
5970
  "MB": {
5971
- "accuracy": 0.99,
5972
- "count": 100
5973
  },
5974
  "UB": {
5975
- "accuracy": 0.8730964467005076,
5976
- "count": 197
5977
  },
5978
  "UD": {
5979
- "accuracy": 0.9805825242718447,
5980
- "count": 103
5981
  }
5982
  }
5983
  },
5984
  "sub_B4": {
5985
- "full_accuracy": 0.69,
5986
- "n_examples": 100,
 
5987
  "per_subtask": {
5988
  "MD": {
5989
  "accuracy": 1.0,
5990
- "count": 200
5991
  },
5992
  "MB": {
5993
  "accuracy": 1.0,
5994
- "count": 100
5995
  },
5996
  "UB": {
5997
- "accuracy": 0.8785425101214575,
5998
- "count": 247
5999
  },
6000
  "UD": {
6001
- "accuracy": 0.9738562091503268,
6002
- "count": 153
6003
  }
6004
  }
6005
  },
6006
  "sub_B5": {
6007
  "full_accuracy": 0.46,
6008
- "n_examples": 100,
 
6009
  "per_subtask": {
6010
  "MD": {
6011
  "accuracy": 1.0,
6012
- "count": 100
6013
  },
6014
  "MB": {
6015
  "accuracy": 1.0,
6016
- "count": 100
6017
  },
6018
  "UB": {
6019
- "accuracy": 0.8288590604026845,
6020
- "count": 298
6021
  },
6022
  "UD": {
6023
- "accuracy": 0.9603960396039604,
6024
- "count": 202
6025
  }
6026
  }
6027
  }
6028
  },
6029
  "summary": {
6030
- "overall_accuracy": 0.6870833333333334,
6031
- "total_examples": 2400,
6032
- "n_splits": 22
 
6033
  }
6034
  },
6035
  "sorl_overall_accuracy": 0.6870833333333334,
 
5030
  "K": null,
5031
  "mode": "sft",
5032
  "n_digits": 6,
5033
+ "n_per_split": 50
5034
  },
5035
  "splits": {
5036
  "add_S0": {
5037
+ "full_accuracy": 0.04,
5038
+ "digit_accuracy": 0.6428571428571429,
5039
+ "n_examples": 50,
5040
  "per_subtask": {
5041
  "SA": {
5042
+ "accuracy": 0.6033898305084746,
5043
+ "count": 295
5044
  },
5045
  "SS": {
5046
+ "accuracy": 0.8545454545454545,
5047
+ "count": 55
5048
  }
5049
  }
5050
  },
5051
  "add_S1": {
5052
  "full_accuracy": 0.0,
5053
+ "digit_accuracy": 0.5542857142857143,
5054
+ "n_examples": 50,
5055
  "per_subtask": {
5056
  "SA": {
5057
+ "accuracy": 0.5952380952380952,
5058
+ "count": 126
5059
  },
5060
  "SC": {
5061
+ "accuracy": 0.4050632911392405,
5062
+ "count": 79
5063
  },
5064
  "SS": {
5065
+ "accuracy": 0.8571428571428571,
5066
+ "count": 21
5067
  },
5068
  "UC": {
5069
+ "accuracy": 0.5564516129032258,
5070
+ "count": 124
5071
  }
5072
  }
5073
  },
5074
  "add_S2": {
5075
+ "full_accuracy": 0.0,
5076
+ "digit_accuracy": 0.54,
5077
+ "n_examples": 50,
5078
  "per_subtask": {
5079
  "SA": {
5080
+ "accuracy": 0.6666666666666666,
5081
+ "count": 75
5082
  },
5083
  "SC": {
5084
+ "accuracy": 0.3225806451612903,
5085
+ "count": 62
5086
  },
5087
  "SS": {
5088
+ "accuracy": 0.6923076923076923,
5089
+ "count": 39
5090
  },
5091
  "UC": {
5092
+ "accuracy": 0.5405405405405406,
5093
+ "count": 111
5094
  },
5095
  "US": {
5096
+ "accuracy": 0.5079365079365079,
5097
+ "count": 63
5098
  }
5099
  }
5100
  },
5101
  "add_S3": {
5102
+ "full_accuracy": 0.02,
5103
+ "digit_accuracy": 0.4742857142857143,
5104
+ "n_examples": 50,
5105
  "per_subtask": {
5106
  "SA": {
5107
+ "accuracy": 0.6166666666666667,
5108
+ "count": 60
5109
  },
5110
  "SC": {
5111
+ "accuracy": 0.49122807017543857,
5112
+ "count": 57
5113
  },
5114
  "SS": {
5115
+ "accuracy": 0.631578947368421,
5116
+ "count": 19
5117
  },
5118
  "UC": {
5119
+ "accuracy": 0.47115384615384615,
5120
+ "count": 104
5121
  },
5122
  "US": {
5123
+ "accuracy": 0.36363636363636365,
5124
+ "count": 110
5125
  }
5126
  }
5127
  },
5128
  "add_S4": {
5129
+ "full_accuracy": 0.02,
5130
+ "digit_accuracy": 0.4057142857142857,
5131
+ "n_examples": 50,
5132
  "per_subtask": {
5133
  "SA": {
5134
+ "accuracy": 0.6666666666666666,
5135
+ "count": 48
5136
  },
5137
  "SC": {
5138
+ "accuracy": 0.23076923076923078,
5139
+ "count": 52
5140
  },
5141
  "SS": {
5142
+ "accuracy": 0.8571428571428571,
5143
+ "count": 7
5144
  },
5145
  "UC": {
5146
+ "accuracy": 0.5280898876404494,
5147
+ "count": 89
5148
  },
5149
  "US": {
5150
+ "accuracy": 0.2922077922077922,
5151
+ "count": 154
5152
  }
5153
  }
5154
  },
5155
  "add_S5": {
5156
+ "full_accuracy": 0.0,
5157
+ "digit_accuracy": 0.18571428571428572,
5158
+ "n_examples": 50,
5159
  "per_subtask": {
5160
  "SA": {
5161
+ "accuracy": 0.66,
5162
+ "count": 50
5163
  },
5164
  "SC": {
5165
+ "accuracy": 0.28,
5166
+ "count": 50
5167
  },
5168
  "UC": {
5169
+ "accuracy": 0.26,
5170
+ "count": 50
5171
  },
5172
  "US": {
5173
+ "accuracy": 0.025,
5174
+ "count": 200
5175
  }
5176
  }
5177
  },
5178
  "add_S6": {
5179
+ "full_accuracy": 0.14,
5180
+ "digit_accuracy": 0.5857142857142857,
5181
+ "n_examples": 50,
5182
  "per_subtask": {
5183
  "SC": {
5184
+ "accuracy": 0.22,
5185
+ "count": 50
5186
  },
5187
  "UC": {
5188
+ "accuracy": 0.78,
5189
+ "count": 50
5190
  },
5191
  "US": {
5192
+ "accuracy": 0.62,
5193
+ "count": 250
5194
  }
5195
  }
5196
  },
5197
  "add_random": {
5198
+ "full_accuracy": 0.015,
5199
+ "digit_accuracy": 0.5328571428571428,
5200
  "n_examples": 200,
5201
  "per_subtask": {
5202
  "SA": {
5203
+ "accuracy": 0.5777262180974478,
5204
+ "count": 431
5205
  },
5206
  "SC": {
5207
+ "accuracy": 0.4462025316455696,
5208
+ "count": 316
5209
  },
5210
  "SS": {
5211
+ "accuracy": 0.8205128205128205,
5212
+ "count": 39
5213
  },
5214
  "UC": {
5215
+ "accuracy": 0.5285714285714286,
5216
+ "count": 560
5217
  },
5218
  "US": {
5219
+ "accuracy": 0.5185185185185185,
5220
+ "count": 54
5221
+ }
5222
+ }
5223
+ },
5224
+ "add_C1": {
5225
+ "full_accuracy": 0.0,
5226
+ "digit_accuracy": 0.5342857142857143,
5227
+ "n_examples": 50,
5228
+ "per_subtask": {
5229
+ "SA": {
5230
+ "accuracy": 0.592,
5231
+ "count": 250
5232
+ },
5233
+ "SC": {
5234
+ "accuracy": 0.32,
5235
+ "count": 50
5236
+ },
5237
+ "UC": {
5238
+ "accuracy": 0.46,
5239
+ "count": 50
5240
+ }
5241
+ }
5242
+ },
5243
+ "add_C2": {
5244
+ "full_accuracy": 0.0,
5245
+ "digit_accuracy": 0.4542857142857143,
5246
+ "n_examples": 50,
5247
+ "per_subtask": {
5248
+ "SA": {
5249
+ "accuracy": 0.55,
5250
+ "count": 200
5251
+ },
5252
+ "SC": {
5253
+ "accuracy": 0.18,
5254
+ "count": 50
5255
+ },
5256
+ "UC": {
5257
+ "accuracy": 0.4457831325301205,
5258
+ "count": 83
5259
+ },
5260
+ "US": {
5261
+ "accuracy": 0.17647058823529413,
5262
+ "count": 17
5263
  }
5264
  }
5265
  },
5266
  "add_C3": {
5267
  "full_accuracy": 0.0,
5268
+ "digit_accuracy": 0.4342857142857143,
5269
+ "n_examples": 50,
5270
  "per_subtask": {
5271
  "SA": {
5272
+ "accuracy": 0.5866666666666667,
5273
+ "count": 150
5274
  },
5275
  "SC": {
5276
+ "accuracy": 0.22,
5277
+ "count": 50
5278
  },
5279
  "UC": {
5280
+ "accuracy": 0.42,
5281
+ "count": 100
5282
  },
5283
  "US": {
5284
+ "accuracy": 0.22,
5285
+ "count": 50
5286
  }
5287
  }
5288
  },
5289
  "add_C4": {
5290
  "full_accuracy": 0.0,
5291
+ "digit_accuracy": 0.43714285714285717,
5292
+ "n_examples": 50,
5293
  "per_subtask": {
5294
  "SA": {
5295
+ "accuracy": 0.7,
5296
+ "count": 100
5297
  },
5298
  "SC": {
5299
+ "accuracy": 0.34,
5300
+ "count": 50
5301
  },
5302
  "UC": {
5303
+ "accuracy": 0.3181818181818182,
5304
+ "count": 132
5305
  },
5306
  "US": {
5307
+ "accuracy": 0.35294117647058826,
5308
+ "count": 68
5309
  }
5310
  }
5311
  },
5312
  "add_C5": {
5313
+ "full_accuracy": 0.02,
5314
+ "digit_accuracy": 0.3485714285714286,
5315
+ "n_examples": 50,
5316
  "per_subtask": {
5317
  "SA": {
5318
+ "accuracy": 0.62,
5319
+ "count": 50
5320
  },
5321
  "SC": {
5322
+ "accuracy": 0.28,
5323
+ "count": 50
5324
  },
5325
  "UC": {
5326
+ "accuracy": 0.3972602739726027,
5327
+ "count": 146
5328
  },
5329
  "US": {
5330
+ "accuracy": 0.18269230769230768,
5331
+ "count": 104
5332
  }
5333
  }
5334
  },
5335
  "add_C6": {
5336
+ "full_accuracy": 0.0,
5337
+ "digit_accuracy": 0.4857142857142857,
5338
+ "n_examples": 50,
5339
  "per_subtask": {
5340
  "SC": {
5341
+ "accuracy": 0.24,
5342
+ "count": 50
5343
  },
5344
  "UC": {
5345
+ "accuracy": 0.49206349206349204,
5346
+ "count": 189
5347
  },
5348
  "US": {
5349
+ "accuracy": 0.5855855855855856,
5350
+ "count": 111
5351
  }
5352
  }
5353
  },
5354
  "sub_M0": {
5355
+ "full_accuracy": 0.16,
5356
+ "digit_accuracy": 0.7828571428571428,
5357
+ "n_examples": 50,
5358
  "per_subtask": {
5359
  "MD": {
5360
+ "accuracy": 0.7623762376237624,
5361
+ "count": 303
5362
  },
5363
  "ME": {
5364
+ "accuracy": 0.9148936170212766,
5365
+ "count": 47
5366
  }
5367
  }
5368
  },
5369
  "sub_M1": {
5370
+ "full_accuracy": 0.1,
5371
+ "digit_accuracy": 0.74,
5372
+ "n_examples": 50,
5373
  "per_subtask": {
5374
  "MD": {
5375
+ "accuracy": 0.8439716312056738,
5376
+ "count": 141
5377
  },
5378
  "MB": {
5379
+ "accuracy": 0.7222222222222222,
5380
+ "count": 72
5381
  },
5382
  "ME": {
5383
+ "accuracy": 0.7777777777777778,
5384
+ "count": 18
5385
  },
5386
  "UB": {
5387
+ "accuracy": 0.6218487394957983,
5388
+ "count": 119
5389
  }
5390
  }
5391
  },
5392
  "sub_M2": {
5393
+ "full_accuracy": 0.06,
5394
+ "digit_accuracy": 0.6457142857142857,
5395
+ "n_examples": 50,
5396
  "per_subtask": {
5397
  "MD": {
5398
+ "accuracy": 0.8482142857142857,
5399
+ "count": 112
5400
  },
5401
  "MB": {
5402
+ "accuracy": 0.5094339622641509,
5403
+ "count": 53
5404
  },
5405
  "ME": {
5406
+ "accuracy": 0.851063829787234,
5407
+ "count": 47
5408
  },
5409
  "UB": {
5410
+ "accuracy": 0.4823529411764706,
5411
+ "count": 85
5412
  },
5413
  "UD": {
5414
+ "accuracy": 0.4339622641509434,
5415
+ "count": 53
5416
  }
5417
  }
5418
  },
5419
  "sub_M3": {
5420
  "full_accuracy": 0.04,
5421
+ "digit_accuracy": 0.5914285714285714,
5422
+ "n_examples": 50,
5423
  "per_subtask": {
5424
  "MD": {
5425
+ "accuracy": 0.8762886597938144,
5426
+ "count": 97
5427
  },
5428
  "MB": {
5429
+ "accuracy": 0.6862745098039216,
5430
+ "count": 51
5431
  },
5432
  "ME": {
5433
+ "accuracy": 0.9629629629629629,
5434
+ "count": 27
5435
  },
5436
  "UB": {
5437
+ "accuracy": 0.5,
5438
+ "count": 74
5439
  },
5440
  "UD": {
5441
+ "accuracy": 0.2376237623762376,
5442
+ "count": 101
5443
  }
5444
  }
5445
  },
5446
  "sub_M4": {
5447
+ "full_accuracy": 0.02,
5448
+ "digit_accuracy": 0.4514285714285714,
5449
+ "n_examples": 50,
5450
  "per_subtask": {
5451
  "MD": {
5452
+ "accuracy": 0.73,
5453
+ "count": 100
5454
  },
5455
  "MB": {
5456
+ "accuracy": 0.6,
5457
+ "count": 50
5458
  },
5459
  "UB": {
5460
+ "accuracy": 0.54,
5461
+ "count": 50
5462
  },
5463
  "UD": {
5464
+ "accuracy": 0.18666666666666668,
5465
+ "count": 150
5466
  }
5467
  }
5468
  },
5469
  "sub_M5": {
5470
+ "full_accuracy": 0.02,
5471
+ "digit_accuracy": 0.3514285714285714,
5472
+ "n_examples": 50,
5473
  "per_subtask": {
5474
  "MD": {
5475
  "accuracy": 1.0,
5476
+ "count": 50
5477
  },
5478
  "MB": {
5479
+ "accuracy": 0.46,
5480
+ "count": 50
5481
  },
5482
  "UB": {
5483
+ "accuracy": 0.54,
5484
+ "count": 50
5485
  },
5486
  "UD": {
5487
+ "accuracy": 0.115,
5488
+ "count": 200
5489
  }
5490
  }
5491
  },
5492
  "sub_random": {
5493
+ "full_accuracy": 0.055,
5494
+ "digit_accuracy": 0.6971428571428572,
5495
  "n_examples": 200,
5496
  "per_subtask": {
5497
  "MD": {
5498
+ "accuracy": 0.8105263157894737,
5499
+ "count": 570
5500
  },
5501
  "MB": {
5502
+ "accuracy": 0.6462093862815884,
5503
+ "count": 277
5504
  },
5505
  "ME": {
5506
+ "accuracy": 0.9245283018867925,
5507
  "count": 53
5508
  },
5509
  "UB": {
5510
+ "accuracy": 0.583864118895966,
5511
+ "count": 471
5512
  },
5513
  "UD": {
5514
+ "accuracy": 0.3793103448275862,
5515
+ "count": 29
5516
  }
5517
  }
5518
  },
5519
  "sub_B3": {
5520
  "full_accuracy": 0.02,
5521
+ "digit_accuracy": 0.5914285714285714,
5522
+ "n_examples": 50,
5523
  "per_subtask": {
5524
  "MD": {
5525
+ "accuracy": 0.7333333333333333,
5526
+ "count": 150
5527
  },
5528
  "MB": {
5529
+ "accuracy": 0.64,
5530
+ "count": 50
5531
  },
5532
  "UB": {
5533
+ "accuracy": 0.45544554455445546,
5534
+ "count": 101
5535
  },
5536
  "UD": {
5537
+ "accuracy": 0.3877551020408163,
5538
+ "count": 49
5539
  }
5540
  }
5541
  },
5542
  "sub_B4": {
5543
+ "full_accuracy": 0.0,
5544
+ "digit_accuracy": 0.52,
5545
+ "n_examples": 50,
5546
  "per_subtask": {
5547
  "MD": {
5548
+ "accuracy": 0.77,
5549
+ "count": 100
5550
  },
5551
  "MB": {
5552
+ "accuracy": 0.66,
5553
+ "count": 50
5554
  },
5555
  "UB": {
5556
+ "accuracy": 0.4297520661157025,
5557
+ "count": 121
5558
  },
5559
  "UD": {
5560
+ "accuracy": 0.25316455696202533,
5561
+ "count": 79
5562
  }
5563
  }
5564
  },
5565
  "sub_B5": {
5566
+ "full_accuracy": 0.02,
5567
+ "digit_accuracy": 0.5314285714285715,
5568
+ "n_examples": 50,
5569
  "per_subtask": {
5570
  "MD": {
5571
  "accuracy": 1.0,
5572
+ "count": 50
5573
  },
5574
  "MB": {
5575
+ "accuracy": 0.56,
5576
+ "count": 50
5577
  },
5578
  "UB": {
5579
+ "accuracy": 0.48026315789473684,
5580
+ "count": 152
5581
  },
5582
  "UD": {
5583
+ "accuracy": 0.35714285714285715,
5584
+ "count": 98
5585
  }
5586
  }
5587
  }
5588
  },
5589
  "summary": {
5590
+ "overall_accuracy": 0.032,
5591
+ "digit_accuracy": 0.54,
5592
+ "total_examples": 1500,
5593
+ "n_splits": 24
5594
  }
5595
  },
5596
  "sorl_eval": {
 
5599
  "K": 1,
5600
  "mode": "sorl",
5601
  "n_digits": 6,
5602
+ "n_per_split": 50
5603
  },
5604
  "splits": {
5605
  "add_S0": {
5606
  "full_accuracy": 0.98,
5607
+ "digit_accuracy": 0.9971428571428571,
5608
+ "n_examples": 50,
5609
  "per_subtask": {
5610
  "SA": {
5611
+ "accuracy": 0.9966101694915255,
5612
+ "count": 295
5613
  },
5614
  "SS": {
5615
  "accuracy": 1.0,
5616
+ "count": 55
5617
  }
5618
  }
5619
  },
5620
  "add_S1": {
5621
+ "full_accuracy": 0.98,
5622
+ "digit_accuracy": 0.9971428571428571,
5623
+ "n_examples": 50,
5624
  "per_subtask": {
5625
  "SA": {
5626
+ "accuracy": 1.0,
5627
+ "count": 126
5628
  },
5629
  "SC": {
5630
+ "accuracy": 1.0,
5631
+ "count": 79
5632
  },
5633
  "SS": {
5634
+ "accuracy": 1.0,
5635
+ "count": 21
5636
  },
5637
  "UC": {
5638
+ "accuracy": 0.9919354838709677,
5639
+ "count": 124
5640
  }
5641
  }
5642
  },
5643
  "add_S2": {
5644
+ "full_accuracy": 0.66,
5645
+ "digit_accuracy": 0.9514285714285714,
5646
+ "n_examples": 50,
5647
  "per_subtask": {
5648
  "SA": {
5649
+ "accuracy": 1.0,
5650
+ "count": 75
5651
  },
5652
  "SC": {
5653
+ "accuracy": 0.967741935483871,
5654
+ "count": 62
5655
  },
5656
  "SS": {
5657
+ "accuracy": 0.9743589743589743,
5658
+ "count": 39
5659
  },
5660
  "UC": {
5661
+ "accuracy": 0.8738738738738738,
5662
+ "count": 111
5663
  },
5664
  "US": {
5665
  "accuracy": 1.0,
5666
+ "count": 63
5667
  }
5668
  }
5669
  },
5670
  "add_S3": {
5671
+ "full_accuracy": 0.7,
5672
+ "digit_accuracy": 0.9514285714285714,
5673
+ "n_examples": 50,
5674
  "per_subtask": {
5675
  "SA": {
5676
  "accuracy": 1.0,
5677
+ "count": 60
5678
  },
5679
  "SC": {
5680
+ "accuracy": 0.9649122807017544,
5681
+ "count": 57
5682
  },
5683
  "SS": {
5684
  "accuracy": 1.0,
5685
+ "count": 19
5686
  },
5687
  "UC": {
5688
+ "accuracy": 0.8557692307692307,
5689
+ "count": 104
5690
  },
5691
  "US": {
5692
  "accuracy": 1.0,
5693
+ "count": 110
5694
  }
5695
  }
5696
  },
5697
  "add_S4": {
5698
+ "full_accuracy": 0.76,
5699
+ "digit_accuracy": 0.9542857142857143,
5700
+ "n_examples": 50,
5701
  "per_subtask": {
5702
  "SA": {
5703
  "accuracy": 1.0,
5704
+ "count": 48
5705
  },
5706
  "SC": {
5707
  "accuracy": 1.0,
5708
+ "count": 52
5709
  },
5710
  "SS": {
5711
  "accuracy": 1.0,
5712
+ "count": 7
5713
  },
5714
  "UC": {
5715
+ "accuracy": 0.8651685393258427,
5716
+ "count": 89
5717
  },
5718
  "US": {
5719
+ "accuracy": 0.974025974025974,
5720
+ "count": 154
5721
  }
5722
  }
5723
  },
5724
  "add_S5": {
5725
+ "full_accuracy": 0.2,
5726
+ "digit_accuracy": 0.7742857142857142,
5727
+ "n_examples": 50,
5728
  "per_subtask": {
5729
  "SA": {
5730
  "accuracy": 1.0,
5731
+ "count": 50
5732
  },
5733
  "SC": {
5734
  "accuracy": 1.0,
5735
+ "count": 50
5736
  },
5737
  "UC": {
5738
+ "accuracy": 0.22,
5739
+ "count": 50
5740
  },
5741
  "US": {
5742
+ "accuracy": 0.8,
5743
+ "count": 200
5744
  }
5745
  }
5746
  },
5747
  "add_S6": {
5748
+ "full_accuracy": 0.42,
5749
+ "digit_accuracy": 0.7914285714285715,
5750
+ "n_examples": 50,
5751
  "per_subtask": {
5752
  "SC": {
5753
  "accuracy": 1.0,
5754
+ "count": 50
5755
  },
5756
  "UC": {
5757
+ "accuracy": 0.54,
5758
+ "count": 50
5759
  },
5760
  "US": {
5761
+ "accuracy": 0.8,
5762
+ "count": 250
5763
  }
5764
  }
5765
  },
5766
  "add_random": {
5767
+ "full_accuracy": 0.935,
5768
+ "digit_accuracy": 0.9907142857142858,
5769
  "n_examples": 200,
5770
  "per_subtask": {
5771
  "SA": {
5772
+ "accuracy": 0.9976798143851509,
5773
+ "count": 431
5774
  },
5775
  "SC": {
5776
+ "accuracy": 1.0,
5777
+ "count": 316
5778
  },
5779
  "SS": {
5780
  "accuracy": 1.0,
5781
+ "count": 39
5782
  },
5783
  "UC": {
5784
+ "accuracy": 0.9785714285714285,
5785
+ "count": 560
5786
  },
5787
  "US": {
5788
  "accuracy": 1.0,
5789
+ "count": 54
5790
+ }
5791
+ }
5792
+ },
5793
+ "add_C1": {
5794
+ "full_accuracy": 1.0,
5795
+ "digit_accuracy": 1.0,
5796
+ "n_examples": 50,
5797
+ "per_subtask": {
5798
+ "SA": {
5799
+ "accuracy": 1.0,
5800
+ "count": 250
5801
+ },
5802
+ "SC": {
5803
+ "accuracy": 1.0,
5804
+ "count": 50
5805
+ },
5806
+ "UC": {
5807
+ "accuracy": 1.0,
5808
+ "count": 50
5809
+ }
5810
+ }
5811
+ },
5812
+ "add_C2": {
5813
+ "full_accuracy": 0.82,
5814
+ "digit_accuracy": 0.9742857142857143,
5815
+ "n_examples": 50,
5816
+ "per_subtask": {
5817
+ "SA": {
5818
+ "accuracy": 0.995,
5819
+ "count": 200
5820
+ },
5821
+ "SC": {
5822
+ "accuracy": 1.0,
5823
+ "count": 50
5824
+ },
5825
+ "UC": {
5826
+ "accuracy": 0.9036144578313253,
5827
+ "count": 83
5828
+ },
5829
+ "US": {
5830
+ "accuracy": 1.0,
5831
+ "count": 17
5832
  }
5833
  }
5834
  },
5835
  "add_C3": {
5836
+ "full_accuracy": 0.64,
5837
+ "digit_accuracy": 0.9485714285714286,
5838
+ "n_examples": 50,
5839
  "per_subtask": {
5840
  "SA": {
5841
  "accuracy": 1.0,
5842
+ "count": 150
5843
  },
5844
  "SC": {
5845
  "accuracy": 1.0,
5846
+ "count": 50
5847
  },
5848
  "UC": {
5849
+ "accuracy": 0.82,
5850
+ "count": 100
5851
  },
5852
  "US": {
5853
  "accuracy": 1.0,
5854
+ "count": 50
5855
  }
5856
  }
5857
  },
5858
  "add_C4": {
5859
+ "full_accuracy": 0.84,
5860
+ "digit_accuracy": 0.9771428571428571,
5861
+ "n_examples": 50,
5862
  "per_subtask": {
5863
  "SA": {
5864
  "accuracy": 1.0,
5865
+ "count": 100
5866
  },
5867
  "SC": {
5868
  "accuracy": 1.0,
5869
+ "count": 50
5870
  },
5871
  "UC": {
5872
+ "accuracy": 0.9393939393939394,
5873
+ "count": 132
5874
  },
5875
  "US": {
5876
+ "accuracy": 1.0,
5877
+ "count": 68
5878
  }
5879
  }
5880
  },
5881
  "add_C5": {
5882
+ "full_accuracy": 0.7,
5883
+ "digit_accuracy": 0.9457142857142857,
5884
+ "n_examples": 50,
5885
  "per_subtask": {
5886
  "SA": {
5887
  "accuracy": 1.0,
5888
+ "count": 50
5889
  },
5890
  "SC": {
5891
+ "accuracy": 1.0,
5892
+ "count": 50
5893
  },
5894
  "UC": {
5895
+ "accuracy": 0.8972602739726028,
5896
+ "count": 146
5897
  },
5898
  "US": {
5899
+ "accuracy": 0.9615384615384616,
5900
+ "count": 104
5901
  }
5902
  }
5903
  },
5904
  "add_C6": {
5905
+ "full_accuracy": 0.8,
5906
+ "digit_accuracy": 0.9628571428571429,
5907
+ "n_examples": 50,
5908
  "per_subtask": {
5909
  "SC": {
5910
  "accuracy": 1.0,
5911
+ "count": 50
5912
  },
5913
  "UC": {
5914
+ "accuracy": 0.9417989417989417,
5915
+ "count": 189
5916
  },
5917
  "US": {
5918
+ "accuracy": 0.9819819819819819,
5919
+ "count": 111
5920
  }
5921
  }
5922
  },
5923
  "sub_M0": {
5924
+ "full_accuracy": 0.86,
5925
+ "digit_accuracy": 0.98,
5926
+ "n_examples": 50,
5927
  "per_subtask": {
5928
  "MD": {
5929
+ "accuracy": 0.976897689768977,
5930
+ "count": 303
5931
  },
5932
  "ME": {
5933
  "accuracy": 1.0,
5934
+ "count": 47
5935
  }
5936
  }
5937
  },
5938
  "sub_M1": {
5939
+ "full_accuracy": 0.84,
5940
+ "digit_accuracy": 0.9771428571428571,
5941
+ "n_examples": 50,
5942
  "per_subtask": {
5943
  "MD": {
5944
+ "accuracy": 0.9645390070921985,
5945
+ "count": 141
5946
  },
5947
  "MB": {
5948
+ "accuracy": 0.9861111111111112,
5949
+ "count": 72
5950
  },
5951
  "ME": {
5952
  "accuracy": 1.0,
5953
+ "count": 18
5954
  },
5955
  "UB": {
5956
+ "accuracy": 0.9831932773109243,
5957
+ "count": 119
5958
  }
5959
  }
5960
  },
5961
  "sub_M2": {
5962
+ "full_accuracy": 0.66,
5963
+ "digit_accuracy": 0.9485714285714286,
5964
+ "n_examples": 50,
5965
  "per_subtask": {
5966
  "MD": {
5967
+ "accuracy": 1.0,
5968
+ "count": 112
5969
  },
5970
  "MB": {
5971
+ "accuracy": 0.9811320754716981,
5972
+ "count": 53
5973
  },
5974
  "ME": {
5975
  "accuracy": 1.0,
5976
+ "count": 47
5977
  },
5978
  "UB": {
5979
+ "accuracy": 0.8,
5980
+ "count": 85
5981
  },
5982
  "UD": {
5983
  "accuracy": 1.0,
5984
+ "count": 53
5985
  }
5986
  }
5987
  },
5988
  "sub_M3": {
5989
+ "full_accuracy": 0.46,
5990
+ "digit_accuracy": 0.9142857142857143,
5991
+ "n_examples": 50,
5992
  "per_subtask": {
5993
  "MD": {
5994
+ "accuracy": 1.0,
5995
+ "count": 97
5996
  },
5997
  "MB": {
5998
+ "accuracy": 0.9411764705882353,
5999
+ "count": 51
6000
  },
6001
  "ME": {
6002
  "accuracy": 1.0,
6003
+ "count": 27
6004
  },
6005
  "UB": {
6006
+ "accuracy": 0.6351351351351351,
6007
+ "count": 74
6008
  },
6009
  "UD": {
6010
  "accuracy": 1.0,
6011
+ "count": 101
6012
  }
6013
  }
6014
  },
6015
  "sub_M4": {
6016
+ "full_accuracy": 0.32,
6017
+ "digit_accuracy": 0.8942857142857142,
6018
+ "n_examples": 50,
6019
  "per_subtask": {
6020
  "MD": {
6021
  "accuracy": 1.0,
6022
+ "count": 100
6023
  },
6024
  "MB": {
6025
  "accuracy": 1.0,
6026
+ "count": 50
6027
  },
6028
  "UB": {
6029
+ "accuracy": 0.4,
6030
+ "count": 50
6031
  },
6032
  "UD": {
6033
+ "accuracy": 0.9533333333333334,
6034
+ "count": 150
6035
  }
6036
  }
6037
  },
6038
  "sub_M5": {
6039
  "full_accuracy": 0.08,
6040
+ "digit_accuracy": 0.7657142857142857,
6041
+ "n_examples": 50,
6042
  "per_subtask": {
6043
  "MD": {
6044
  "accuracy": 1.0,
6045
+ "count": 50
6046
  },
6047
  "MB": {
6048
  "accuracy": 1.0,
6049
+ "count": 50
6050
  },
6051
  "UB": {
6052
+ "accuracy": 0.5,
6053
+ "count": 50
6054
  },
6055
  "UD": {
6056
+ "accuracy": 0.715,
6057
+ "count": 200
6058
  }
6059
  }
6060
  },
6061
  "sub_random": {
6062
+ "full_accuracy": 0.82,
6063
+ "digit_accuracy": 0.9735714285714285,
6064
  "n_examples": 200,
6065
  "per_subtask": {
6066
  "MD": {
6067
+ "accuracy": 0.987719298245614,
6068
+ "count": 570
6069
  },
6070
  "MB": {
6071
+ "accuracy": 0.9711191335740073,
6072
+ "count": 277
6073
  },
6074
  "ME": {
6075
  "accuracy": 1.0,
6076
  "count": 53
6077
  },
6078
  "UB": {
6079
+ "accuracy": 0.9532908704883227,
6080
+ "count": 471
6081
  },
6082
  "UD": {
6083
  "accuracy": 1.0,
6084
+ "count": 29
6085
  }
6086
  }
6087
  },
6088
  "sub_B3": {
6089
+ "full_accuracy": 0.74,
6090
+ "digit_accuracy": 0.9628571428571429,
6091
+ "n_examples": 50,
6092
  "per_subtask": {
6093
  "MD": {
6094
+ "accuracy": 0.9933333333333333,
6095
+ "count": 150
6096
  },
6097
  "MB": {
6098
+ "accuracy": 0.98,
6099
+ "count": 50
6100
  },
6101
  "UB": {
6102
+ "accuracy": 0.900990099009901,
6103
+ "count": 101
6104
  },
6105
  "UD": {
6106
+ "accuracy": 0.9795918367346939,
6107
+ "count": 49
6108
  }
6109
  }
6110
  },
6111
  "sub_B4": {
6112
+ "full_accuracy": 0.64,
6113
+ "digit_accuracy": 0.9428571428571428,
6114
+ "n_examples": 50,
6115
  "per_subtask": {
6116
  "MD": {
6117
  "accuracy": 1.0,
6118
+ "count": 100
6119
  },
6120
  "MB": {
6121
  "accuracy": 1.0,
6122
+ "count": 50
6123
  },
6124
  "UB": {
6125
+ "accuracy": 0.8347107438016529,
6126
+ "count": 121
6127
  },
6128
  "UD": {
6129
+ "accuracy": 1.0,
6130
+ "count": 79
6131
  }
6132
  }
6133
  },
6134
  "sub_B5": {
6135
  "full_accuracy": 0.46,
6136
+ "digit_accuracy": 0.9114285714285715,
6137
+ "n_examples": 50,
6138
  "per_subtask": {
6139
  "MD": {
6140
  "accuracy": 1.0,
6141
+ "count": 50
6142
  },
6143
  "MB": {
6144
  "accuracy": 1.0,
6145
+ "count": 50
6146
  },
6147
  "UB": {
6148
+ "accuracy": 0.8223684210526315,
6149
+ "count": 152
6150
  },
6151
  "UD": {
6152
+ "accuracy": 0.9591836734693877,
6153
+ "count": 98
6154
  }
6155
  }
6156
  }
6157
  },
6158
  "summary": {
6159
+ "overall_accuracy": 0.7193333333333334,
6160
+ "digit_accuracy": 0.9459047619047619,
6161
+ "total_examples": 1500,
6162
+ "n_splits": 24
6163
  }
6164
  },
6165
  "sorl_overall_accuracy": 0.6870833333333334,