Upload folder using huggingface_hub
Browse files- flash_attn/impls/flash_attention.html +2 -0
- flash_attn/impls/hf_kernels_flash_attn3.html +2 -0
- flash_attn/impls/mem_efficient_attention.html +1 -0
- flash_attn/impls/sage_attention.html +1 -0
- flash_attn/impls/xformers.html +1 -0
- flash_attn/results/artifacts/combine/latency.svg +1 -1
- flash_attn/results/combined_results.html +21 -21
flash_attn/impls/flash_attention.html
CHANGED
|
@@ -3847,6 +3847,7 @@ Cell: nv | 4.06s
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('nv')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('nv')">Copy</button>
|
| 3849 |
<a href="cells/nv.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3850 |
</div>
|
| 3851 |
<div id="code-nv" class="cell-code" data-lines="3">
|
| 3852 |
<div class="code-wrap">
|
|
@@ -3908,6 +3909,7 @@ Cell: benchmark | 38.14s
|
|
| 3908 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3909 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3910 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3911 |
</div>
|
| 3912 |
<div id="code-benchmark" class="cell-code" data-lines="66">
|
| 3913 |
<div class="code-wrap">
|
|
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('nv')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('nv')">Copy</button>
|
| 3849 |
<a href="cells/nv.py" target="_blank" class="raw-btn">Raw</a>
|
| 3850 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/flash_attention.md" target="_blank" class="github-btn">GitHub</a>
|
| 3851 |
</div>
|
| 3852 |
<div id="code-nv" class="cell-code" data-lines="3">
|
| 3853 |
<div class="code-wrap">
|
|
|
|
| 3909 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3910 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3911 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3912 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/flash_attention.md" target="_blank" class="github-btn">GitHub</a>
|
| 3913 |
</div>
|
| 3914 |
<div id="code-benchmark" class="cell-code" data-lines="66">
|
| 3915 |
<div class="code-wrap">
|
flash_attn/impls/hf_kernels_flash_attn3.html
CHANGED
|
@@ -3847,6 +3847,8 @@ Cell: benchmark | 40.68s
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
|
|
|
| 3850 |
</div>
|
| 3851 |
<div id="code-benchmark" class="cell-code" data-lines="71">
|
| 3852 |
<div class="code-wrap">
|
|
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3850 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/hf_kernels_flash_attn3.md" target="_blank" class="github-btn">GitHub</a>
|
| 3851 |
+
<a href="https://huggingface.co/kernels-community/flash-attn3" target="_blank" class="hf-btn">🤗 HF</a>
|
| 3852 |
</div>
|
| 3853 |
<div id="code-benchmark" class="cell-code" data-lines="71">
|
| 3854 |
<div class="code-wrap">
|
flash_attn/impls/mem_efficient_attention.html
CHANGED
|
@@ -3847,6 +3847,7 @@ Cell: benchmark | 39.23s
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3850 |
</div>
|
| 3851 |
<div id="code-benchmark" class="cell-code" data-lines="68">
|
| 3852 |
<div class="code-wrap">
|
|
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3850 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/mem_efficient_attention.md" target="_blank" class="github-btn">GitHub</a>
|
| 3851 |
</div>
|
| 3852 |
<div id="code-benchmark" class="cell-code" data-lines="68">
|
| 3853 |
<div class="code-wrap">
|
flash_attn/impls/sage_attention.html
CHANGED
|
@@ -3847,6 +3847,7 @@ Cell: benchmark | 41.27s
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3850 |
</div>
|
| 3851 |
<div id="code-benchmark" class="cell-code" data-lines="79">
|
| 3852 |
<div class="code-wrap">
|
|
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3850 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/sage_attention.md" target="_blank" class="github-btn">GitHub</a>
|
| 3851 |
</div>
|
| 3852 |
<div id="code-benchmark" class="cell-code" data-lines="79">
|
| 3853 |
<div class="code-wrap">
|
flash_attn/impls/xformers.html
CHANGED
|
@@ -3847,6 +3847,7 @@ Cell: benchmark | 41.87s
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 3850 |
</div>
|
| 3851 |
<div id="code-benchmark" class="cell-code" data-lines="68">
|
| 3852 |
<div class="code-wrap">
|
|
|
|
| 3847 |
| <button class="run-btn" onclick="runCell('benchmark')">▶ run</button>
|
| 3848 |
<button class="copy-btn" onclick="copyCell('benchmark')">Copy</button>
|
| 3849 |
<a href="cells/benchmark.py" target="_blank" class="raw-btn">Raw</a>
|
| 3850 |
+
<a href="https://github.com/huggingface/kernels-uvnotes/blob/main/flash_attn/impls/xformers.md" target="_blank" class="github-btn">GitHub</a>
|
| 3851 |
</div>
|
| 3852 |
<div id="code-benchmark" class="cell-code" data-lines="68">
|
| 3853 |
<div class="code-wrap">
|
flash_attn/results/artifacts/combine/latency.svg
CHANGED
|
|
Git LFS Details
|
|
|
Git LFS Details
|
flash_attn/results/combined_results.html
CHANGED
|
@@ -4012,7 +4012,7 @@ using cross-file dependencies.</p>
|
|
| 4012 |
<rdf:RDF>
|
| 4013 |
<ns2:Work>
|
| 4014 |
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
| 4015 |
-
<dc:date>2025-10-14T20:
|
| 4016 |
<dc:format>image/svg+xml</dc:format>
|
| 4017 |
<dc:creator>
|
| 4018 |
<ns2:Agent>
|
|
@@ -5210,7 +5210,7 @@ using cross-file dependencies.</p>
|
|
| 5210 |
<span onclick="toggleOutput('combine')" style="cursor: pointer;">▶ output</span>
|
| 5211 |
<span id="uv-indicator-combine" onclick="toggleUvLogsFromHeader('combine')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5212 |
</span> |
|
| 5213 |
-
Cell: combine |
|
| 5214 |
| <button class="run-btn" onclick="runCell('combine')">▶ run</button>
|
| 5215 |
<button class="copy-btn" onclick="copyCell('combine')">Copy</button>
|
| 5216 |
<a href="cells/combine.py" target="_blank" class="raw-btn">Raw</a>
|
|
@@ -5959,28 +5959,28 @@ Total records: 42
|
|
| 5959 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5960 |
<div class="uv-logs-content" style="display: none;">
|
| 5961 |
Updating https://github.com/drbh/kernels-benchmark-tools.git (main)
|
| 5962 |
-
Downloading nvidia-
|
| 5963 |
-
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5964 |
-
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 5965 |
-
Downloading networkx (1.9MiB)
|
| 5966 |
Downloading sympy (6.0MiB)
|
| 5967 |
-
Downloading nvidia-
|
| 5968 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5969 |
-
Downloading nvidia-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5970 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
| 5971 |
-
Downloading numpy (15.9MiB)
|
| 5972 |
-
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5973 |
-
Downloading nvidia-curand-cu12 (60.7MiB)
|
| 5974 |
-
Downloading setuptools (1.1MiB)
|
| 5975 |
Downloading matplotlib (8.3MiB)
|
| 5976 |
-
Downloading fonttools (4.7MiB)
|
| 5977 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
| 5978 |
-
Downloading pillow (6.3MiB)
|
| 5979 |
-
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 5980 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 5981 |
-
Downloading
|
| 5982 |
-
Downloading
|
|
|
|
|
|
|
|
|
|
| 5983 |
Downloading torch (846.8MiB)
|
|
|
|
|
|
|
| 5984 |
Updated https://github.com/drbh/kernels-benchmark-tools.git (f457279bca6573cd2fa54a74e67118f5e6b7a31c)
|
| 5985 |
Building kernels-benchmark-tools @ git+https://github.com/drbh/kernels-benchmark-tools.git@f457279bca6573cd2fa54a74e67118f5e6b7a31c
|
| 5986 |
Downloading nvidia-cufile-cu12
|
|
@@ -5993,20 +5993,20 @@ Downloading torch (846.8MiB)
|
|
| 5993 |
Downloading matplotlib
|
| 5994 |
Downloading nvidia-cuda-cupti-cu12
|
| 5995 |
Downloading numpy
|
| 5996 |
-
Downloading sympy
|
| 5997 |
Downloading nvidia-nvjitlink-cu12
|
|
|
|
| 5998 |
Downloading nvidia-curand-cu12
|
| 5999 |
Downloading nvidia-cuda-nvrtc-cu12
|
| 6000 |
Downloading triton
|
| 6001 |
Downloading nvidia-cufft-cu12
|
| 6002 |
Downloading nvidia-cusolver-cu12
|
| 6003 |
-
Downloading nvidia-cusparselt-cu12
|
| 6004 |
Downloading nvidia-cusparse-cu12
|
|
|
|
| 6005 |
Downloading nvidia-nccl-cu12
|
| 6006 |
Downloading nvidia-cublas-cu12
|
| 6007 |
Downloading nvidia-cudnn-cu12
|
| 6008 |
Downloading torch
|
| 6009 |
-
Installed 37 packages in
|
| 6010 |
</div>
|
| 6011 |
</div>
|
| 6012 |
<div class="cell-artifacts">
|
|
@@ -6800,7 +6800,7 @@ Installed 37 packages in 254ms
|
|
| 6800 |
<rdf:RDF>
|
| 6801 |
<ns2:Work>
|
| 6802 |
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
| 6803 |
-
<dc:date>2025-10-14T20:
|
| 6804 |
<dc:format>image/svg+xml</dc:format>
|
| 6805 |
<dc:creator>
|
| 6806 |
<ns2:Agent>
|
|
|
|
| 4012 |
<rdf:RDF>
|
| 4013 |
<ns2:Work>
|
| 4014 |
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
| 4015 |
+
<dc:date>2025-10-14T20:47:15.973248</dc:date>
|
| 4016 |
<dc:format>image/svg+xml</dc:format>
|
| 4017 |
<dc:creator>
|
| 4018 |
<ns2:Agent>
|
|
|
|
| 5210 |
<span onclick="toggleOutput('combine')" style="cursor: pointer;">▶ output</span>
|
| 5211 |
<span id="uv-indicator-combine" onclick="toggleUvLogsFromHeader('combine')" style="cursor: pointer;">▶ uv-logs</span>
|
| 5212 |
</span> |
|
| 5213 |
+
Cell: combine | 34.17s
|
| 5214 |
| <button class="run-btn" onclick="runCell('combine')">▶ run</button>
|
| 5215 |
<button class="copy-btn" onclick="copyCell('combine')">Copy</button>
|
| 5216 |
<a href="cells/combine.py" target="_blank" class="raw-btn">Raw</a>
|
|
|
|
| 5959 |
<div class="uv-logs-header" onclick="toggleUvLogs(this)">▶ UV Install Logs</div>
|
| 5960 |
<div class="uv-logs-content" style="display: none;">
|
| 5961 |
Updating https://github.com/drbh/kernels-benchmark-tools.git (main)
|
| 5962 |
+
Downloading nvidia-curand-cu12 (60.7MiB)
|
|
|
|
|
|
|
|
|
|
| 5963 |
Downloading sympy (6.0MiB)
|
| 5964 |
+
Downloading nvidia-cusolver-cu12 (255.1MiB)
|
| 5965 |
Downloading nvidia-cusparselt-cu12 (273.9MiB)
|
| 5966 |
+
Downloading nvidia-nccl-cu12 (307.4MiB)
|
| 5967 |
+
Downloading nvidia-cuda-nvrtc-cu12 (84.0MiB)
|
| 5968 |
+
Downloading nvidia-cufft-cu12 (184.2MiB)
|
| 5969 |
+
Downloading fonttools (4.7MiB)
|
| 5970 |
+
Downloading kiwisolver (1.4MiB)
|
| 5971 |
+
Downloading nvidia-cublas-cu12 (566.8MiB)
|
| 5972 |
Downloading nvidia-nvjitlink-cu12 (37.4MiB)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5973 |
Downloading matplotlib (8.3MiB)
|
|
|
|
| 5974 |
Downloading nvidia-cusparse-cu12 (274.9MiB)
|
|
|
|
|
|
|
| 5975 |
Downloading nvidia-cuda-cupti-cu12 (9.8MiB)
|
| 5976 |
+
Downloading setuptools (1.1MiB)
|
| 5977 |
+
Downloading networkx (1.9MiB)
|
| 5978 |
+
Downloading numpy (15.9MiB)
|
| 5979 |
+
Downloading pillow (6.3MiB)
|
| 5980 |
+
Downloading nvidia-cufile-cu12 (1.1MiB)
|
| 5981 |
Downloading torch (846.8MiB)
|
| 5982 |
+
Downloading triton (148.4MiB)
|
| 5983 |
+
Downloading nvidia-cudnn-cu12 (674.0MiB)
|
| 5984 |
Updated https://github.com/drbh/kernels-benchmark-tools.git (f457279bca6573cd2fa54a74e67118f5e6b7a31c)
|
| 5985 |
Building kernels-benchmark-tools @ git+https://github.com/drbh/kernels-benchmark-tools.git@f457279bca6573cd2fa54a74e67118f5e6b7a31c
|
| 5986 |
Downloading nvidia-cufile-cu12
|
|
|
|
| 5993 |
Downloading matplotlib
|
| 5994 |
Downloading nvidia-cuda-cupti-cu12
|
| 5995 |
Downloading numpy
|
|
|
|
| 5996 |
Downloading nvidia-nvjitlink-cu12
|
| 5997 |
+
Downloading sympy
|
| 5998 |
Downloading nvidia-curand-cu12
|
| 5999 |
Downloading nvidia-cuda-nvrtc-cu12
|
| 6000 |
Downloading triton
|
| 6001 |
Downloading nvidia-cufft-cu12
|
| 6002 |
Downloading nvidia-cusolver-cu12
|
|
|
|
| 6003 |
Downloading nvidia-cusparse-cu12
|
| 6004 |
+
Downloading nvidia-cusparselt-cu12
|
| 6005 |
Downloading nvidia-nccl-cu12
|
| 6006 |
Downloading nvidia-cublas-cu12
|
| 6007 |
Downloading nvidia-cudnn-cu12
|
| 6008 |
Downloading torch
|
| 6009 |
+
Installed 37 packages in 256ms
|
| 6010 |
</div>
|
| 6011 |
</div>
|
| 6012 |
<div class="cell-artifacts">
|
|
|
|
| 6800 |
<rdf:RDF>
|
| 6801 |
<ns2:Work>
|
| 6802 |
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
| 6803 |
+
<dc:date>2025-10-14T20:47:15.973248</dc:date>
|
| 6804 |
<dc:format>image/svg+xml</dc:format>
|
| 6805 |
<dc:creator>
|
| 6806 |
<ns2:Agent>
|