kernels-community
/

activation

Kernels

Model card Files Files and versions

xet

Community

drbh HF Staff commited on 10 days ago

Commit

6ff8e1a

verified ·

1 Parent(s): bc4c477

feat: use latest benchmark format

Browse files

Files changed (1) hide show

benchmark.py +58 -20

benchmark.py CHANGED Viewed

@@ -2,27 +2,65 @@ import torch
 from kernels.benchmark import Benchmark
-class SiluBenchmark(Benchmark):
-    """Benchmark for SwiGLU activation kernel."""
     kernel_id = "kernels-community/activation"
     seed = 42
-    def setup(self):
-        # Input has shape (num_tokens, 2 * hidden_dim)
-        num_tokens, hidden_dim = 128, 512
-        self.x = torch.randn(
-            num_tokens, 2 * hidden_dim, device="cuda", dtype=torch.float16
-        )
-        self.out = torch.empty(
-            num_tokens, hidden_dim, device="cuda", dtype=torch.float16
-        )
-    def benchmark_silu_and_mul(self):
-        self.kernel.silu_and_mul(self.out, self.x)
-    def verify_silu_and_mul(self):
-        # Reference: SwiGLU computes silu(x[:d]) * x[d:]
-        d = self.x.shape[-1] // 2
-        ref = torch.nn.functional.silu(self.x[..., :d]) * self.x[..., d:]
-        return torch.allclose(self.out, ref, atol=1e-3, rtol=1e-3)

 from kernels.benchmark import Benchmark
+def setup_silu_tensors(self, num_tokens: int, hidden_dim: int, dtype=torch.float16):
+    self.x = torch.randn(num_tokens, 2 * hidden_dim, device="cuda", dtype=dtype)
+    self.out = torch.empty(num_tokens, hidden_dim, device="cuda", dtype=dtype)
+def verify_silu(self):
+    d = self.x.shape[-1] // 2
+    ref = torch.nn.functional.silu(self.x[..., :d]) * self.x[..., d:]
+    return torch.allclose(self.out, ref, atol=1e-3, rtol=1e-3)
+class SiluWorkloads(Benchmark):
+    kernel_id = "kernels-community/activation"
+    seed = 42
+    x: torch.Tensor  # kernel specific input var
+    out: torch.Tensor  # kernel specific output var
+    # Workload 1
+    def setup_small(self):
+        setup_silu_tensors(self, num_tokens=32, hidden_dim=256)
+    def benchmark_small(self):
+        self.kernel.silu_and_mul(self.out, self.x)  # type: ignore
+    def verify_small(self):
+        return verify_silu(self)
+    # Workload 2
+    def setup_medium(self):
+        setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048)
+    def benchmark_medium(self):
+        self.kernel.silu_and_mul(self.out, self.x)  # type: ignore
+    def verify_medium(self):
+        return verify_silu(self)
+class SiluWorkloads2(Benchmark):
     kernel_id = "kernels-community/activation"
     seed = 42
+    x: torch.Tensor  # kernel specific input var
+    out: torch.Tensor  # kernel specific output var
+    # Workload 1
+    def setup_small(self):
+        setup_silu_tensors(self, num_tokens=32, hidden_dim=256)
+    def benchmark_small(self):
+        self.kernel.silu_and_mul(self.out, self.x)  # type: ignore
+    def verify_small(self):
+        return verify_silu(self)
+    # Workload 2
+    def setup_medium(self):
+        setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048)
+    def benchmark_medium(self):
+        self.kernel.silu_and_mul(self.out, self.x)  # type: ignore
+    # Note: show case without a verify