| { | |
| "args": { | |
| "bundle_cache": ".cache/probe_10s_all_memory_skipfusion.json", | |
| "model_name": "prajjwal1/bert-tiny", | |
| "top_k": 10, | |
| "diagnose_top_k": 50, | |
| "split": "interleave", | |
| "train_fraction": 0.5, | |
| "seed": 99, | |
| "epochs": 3, | |
| "batch_size": 16, | |
| "score_batch_size": 64, | |
| "lr": 2e-05, | |
| "weight_decay": 0.01, | |
| "max_len": 192, | |
| "negatives_per_case": 16, | |
| "max_pos_weight": 8.0, | |
| "max_replacements": 1, | |
| "margin_grid": [ | |
| 0.0 | |
| ], | |
| "device": "cuda", | |
| "save_model": ".cache/locomo_bert_tiny_reranker_10sall_seed99", | |
| "progress_every": 125, | |
| "print_margin_sweep": false, | |
| "outcome_report": ".cache/transformer_outcomes_10sall_full_memory_seed99_saved.json", | |
| "print_outcomes": 0 | |
| }, | |
| "metrics": { | |
| "chosen_margin": 0.0, | |
| "train": { | |
| "cases": 766, | |
| "baseline_hit": 465, | |
| "wide_hit": 615, | |
| "rank_or_topk_miss": 150, | |
| "learned_top_hit": 537, | |
| "learned_rank_or_topk_added": 98, | |
| "merge_hit": 504, | |
| "merge_added": 40, | |
| "merge_lost": 1, | |
| "merge_rank_or_topk_added": 40 | |
| }, | |
| "held": { | |
| "cases": 765, | |
| "baseline_hit": 466, | |
| "wide_hit": 609, | |
| "rank_or_topk_miss": 143, | |
| "learned_top_hit": 507, | |
| "learned_rank_or_topk_added": 90, | |
| "merge_hit": 493, | |
| "merge_added": 28, | |
| "merge_lost": 1, | |
| "merge_rank_or_topk_added": 28 | |
| } | |
| } | |
| } |