| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S512_D4096", "batch": 1, "seq_len": 512, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.028261012630537152, "p50": 0.028820009902119637, "p90": 0.029031012672930956, "mean": 0.05126321339048445, "iqr": 0.0007199996616691351, "raw_times": [0.1418930187355727, 0.029031012672930956, 0.02831101301126182, 0.028820009902119637, 0.028261012630537152], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 2.3812620202079415, "peak_bytes": 37765120, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015869140625, "mse": 1.1563301086425781e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S512_D8192", "batch": 1, "seq_len": 512, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.035909994039684534, "p50": 0.036250014090910554, "p90": 0.037470977986231446, "mean": 0.03669860307127237, "iqr": 0.00132995774038136, "raw_times": [0.036250014090910554, 0.037470977986231446, 0.036141020245850086, 0.035909994039684534, 0.037721008993685246], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 2.0535940129775554, "peak_bytes": 75530240, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0014801025390625, "mse": 1.0848045349121094e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S1024_D4096", "batch": 1, "seq_len": 1024, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.03473099786788225, "p50": 0.03543100319802761, "p90": 0.035711011150851846, "mean": 0.03537900047376752, "iqr": 0.0005500041879713535, "raw_times": [0.03516100696288049, 0.035860983189195395, 0.03473099786788225, 0.035711011150851846, 0.03543100319802761], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.037071004044264555, "peak_bytes": 75513856, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S1024_D8192", "batch": 1, "seq_len": 1024, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.04888101830147207, "p50": 0.049672002205625176, "p90": 0.04987101419828832, "mean": 0.04960519727319479, "iqr": 0.0004300381988286972, "raw_times": [0.049440975999459624, 0.04888101830147207, 0.05016097566112876, 0.049672002205625176, 0.04987101419828832], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.05065201548859477, "peak_bytes": 151027712, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.001495361328125, "mse": 1.0967254638671875e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S2048_D4096", "batch": 1, "seq_len": 2048, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.21544500486925244, "p50": 0.21836499217897654, "p90": 0.2194250118918717, "mean": 0.2186850004363805, "iqr": 0.0022300228010863066, "raw_times": [0.21719498909078538, 0.2194250118918717, 0.22299500415101647, 0.21544500486925244, 0.21836499217897654], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.23716501891613007, "peak_bytes": 151011328, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015411376953125, "mse": 1.1205673217773438e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S2048_D8192", "batch": 1, "seq_len": 2048, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.5248420056886971, "p50": 0.5344109958969057, "p90": 0.5452120094560087, "mean": 0.5406358046457171, "iqr": 0.01667998731136322, "raw_times": [0.5701819900423288, 0.5344109958969057, 0.5248420056886971, 0.5452120094560087, 0.5285320221446455], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 0.5195809935685247, "peak_bytes": 302022656, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1086463928222656e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S4096_D4096", "batch": 1, "seq_len": 4096, "hidden_dim": 4096, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 2.4987450160551816, "p50": 2.611225994769484, "p90": 2.619047008920461, "mean": 2.587354398565367, "iqr": 0.04660102422349155, "raw_times": [2.4987450160551816, 2.5724459846969694, 2.611225994769484, 2.6353079883847386, 2.619047008920461], "has_warnings": false, "reps": 5, "warmup": 2}, "compile_ms": 2.529365010559559, "peak_bytes": 302006272, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.00154876708984375, "mse": 1.1265277862548828e-05, "ref": "layer_norm_fp32"}, "err": null} | |
| {"ts": "2025-10-23T16:37:07Z", "run": "631b665f33e446ad84f5b71484e6366b", "impl": "torch_layer_norm", "tags": {"family": "torch", "op": "layer_norm"}, "wl": {"name": "llama_S4096_D8192", "batch": 1, "seq_len": 4096, "hidden_dim": 8192, "dtype": "bfloat16", "device": "cuda", "seed": 0}, "env": {"torch": "2.8.0+cu128", "cuda": "12.8", "gpu": "NVIDIA L4", "sm": "8.9", "py": "3.12.7", "plat": "Linux-5.15.0-1084-aws-x86_64-with-glibc2.31"}, "lat_ms": {"p10": 0.5760230123996735, "p50": 1.025342004140839, "p90": 1.0687129979487509, "mean": 0.911241804715246, "iqr": 0.3739079984370619, "raw_times": [0.5760230123996735, 0.694804999511689, 1.025342004140839, 1.0687129979487509, 1.1913260095752776], "has_warnings": true, "reps": 5, "warmup": 2}, "compile_ms": 0.5854929913766682, "peak_bytes": 604012544, "ok": false, "absmax": 0.03125, "corr": {"ok": false, "rtol": 0.001, "atol": 0.001, "absmax": 0.03125, "mae": 0.0015106201171875, "mse": 1.1026859283447266e-05, "ref": "layer_norm_fp32"}, "err": null} | |