drbh HF Staff commited on
Commit
6ff8e1a
·
verified ·
1 Parent(s): bc4c477

feat: use latest benchmark format

Browse files
Files changed (1) hide show
  1. benchmark.py +58 -20
benchmark.py CHANGED
@@ -2,27 +2,65 @@ import torch
2
  from kernels.benchmark import Benchmark
3
 
4
 
5
- class SiluBenchmark(Benchmark):
6
- """Benchmark for SwiGLU activation kernel."""
 
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  kernel_id = "kernels-community/activation"
9
  seed = 42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def setup(self):
12
- # Input has shape (num_tokens, 2 * hidden_dim)
13
- num_tokens, hidden_dim = 128, 512
14
- self.x = torch.randn(
15
- num_tokens, 2 * hidden_dim, device="cuda", dtype=torch.float16
16
- )
17
- self.out = torch.empty(
18
- num_tokens, hidden_dim, device="cuda", dtype=torch.float16
19
- )
20
-
21
- def benchmark_silu_and_mul(self):
22
- self.kernel.silu_and_mul(self.out, self.x)
23
-
24
- def verify_silu_and_mul(self):
25
- # Reference: SwiGLU computes silu(x[:d]) * x[d:]
26
- d = self.x.shape[-1] // 2
27
- ref = torch.nn.functional.silu(self.x[..., :d]) * self.x[..., d:]
28
- return torch.allclose(self.out, ref, atol=1e-3, rtol=1e-3)
 
2
  from kernels.benchmark import Benchmark
3
 
4
 
5
+ def setup_silu_tensors(self, num_tokens: int, hidden_dim: int, dtype=torch.float16):
6
+ self.x = torch.randn(num_tokens, 2 * hidden_dim, device="cuda", dtype=dtype)
7
+ self.out = torch.empty(num_tokens, hidden_dim, device="cuda", dtype=dtype)
8
 
9
+
10
+ def verify_silu(self):
11
+ d = self.x.shape[-1] // 2
12
+ ref = torch.nn.functional.silu(self.x[..., :d]) * self.x[..., d:]
13
+ return torch.allclose(self.out, ref, atol=1e-3, rtol=1e-3)
14
+
15
+
16
+ class SiluWorkloads(Benchmark):
17
+ kernel_id = "kernels-community/activation"
18
+ seed = 42
19
+ x: torch.Tensor # kernel specific input var
20
+ out: torch.Tensor # kernel specific output var
21
+
22
+ # Workload 1
23
+ def setup_small(self):
24
+ setup_silu_tensors(self, num_tokens=32, hidden_dim=256)
25
+
26
+ def benchmark_small(self):
27
+ self.kernel.silu_and_mul(self.out, self.x) # type: ignore
28
+
29
+ def verify_small(self):
30
+ return verify_silu(self)
31
+
32
+ # Workload 2
33
+ def setup_medium(self):
34
+ setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048)
35
+
36
+ def benchmark_medium(self):
37
+ self.kernel.silu_and_mul(self.out, self.x) # type: ignore
38
+
39
+ def verify_medium(self):
40
+ return verify_silu(self)
41
+
42
+
43
+ class SiluWorkloads2(Benchmark):
44
  kernel_id = "kernels-community/activation"
45
  seed = 42
46
+ x: torch.Tensor # kernel specific input var
47
+ out: torch.Tensor # kernel specific output var
48
+
49
+ # Workload 1
50
+ def setup_small(self):
51
+ setup_silu_tensors(self, num_tokens=32, hidden_dim=256)
52
+
53
+ def benchmark_small(self):
54
+ self.kernel.silu_and_mul(self.out, self.x) # type: ignore
55
+
56
+ def verify_small(self):
57
+ return verify_silu(self)
58
+
59
+ # Workload 2
60
+ def setup_medium(self):
61
+ setup_silu_tensors(self, num_tokens=1024, hidden_dim=2048)
62
+
63
+ def benchmark_medium(self):
64
+ self.kernel.silu_and_mul(self.out, self.x) # type: ignore
65
 
66
+ # Note: show case without a verify