Blockwise-OAT / benchmarks /verify_policy_bs8.json
hackhackhack66666's picture
Add Blockwise-OAT baseline artifacts (benchmarks/verify_policy_bs8.json)
d1076d2 verified
Raw
History Blame Contribute Delete
1.73 kB
{
"ar_shapes": {
"action": [
8,
16,
7
],
"action_pred": [
8,
32,
7
]
},
"batch_size": 8,
"benchmark": {
"ar_mean_sec": 0.03135459750890732,
"blockwise_mean_sec": 0.026641907282173635,
"prefix_len": 4,
"speedup": 1.1768901218978038,
"total_tokens": 8
},
"blockwise_shapes": {
"action": [
8,
16,
7
],
"action_pred": [
8,
32,
7
],
"action_tokens": [
8,
8
]
},
"checkpoint": "output/baselines/original_oat/hf/policy_ep-0250_sr-0.596.ckpt",
"checks": {
"blockwise_numerics_finite": true,
"blockwise_shapes_match_ar": true,
"fallback_shapes_match_ar": true,
"speedup_gt_1": true,
"tail_param_ratio_ok": true
},
"device": "cuda:0",
"fallback_shapes": {
"action": [
8,
16,
7
],
"action_pred": [
8,
32,
7
]
},
"param_counts": {
"ar_model": 5023488,
"min_ratio_required": 0.35,
"tail_decoder": 4525568,
"tail_to_ar_ratio": 0.9008816185088926
},
"passed": true,
"prefix_len": 4,
"refine_iters": 1,
"sim_eval_commands": {
"ar": "python scripts/eval_policy_sim.py --checkpoint output/baselines/original_oat/hf/policy_ep-0250_sr-0.596.ckpt --output_dir output/eval_blockwise_verify/ar --n-test-per-task 5 --overwrite",
"blockwise": "python scripts/eval_policy_sim.py --checkpoint output/baselines/original_oat/hf/policy_ep-0250_sr-0.596.ckpt --output_dir output/eval_blockwise_verify/blockwise --n-test-per-task 5 --overwrite --use-blockwise --blockwise-prefix-len 4 --blockwise-refine-iters 1"
},
"tail_checkpoint": "output/blockwise/original_oat_tail_p4_r1.pt"
}