Spaces:
Running
on
Zero
Running
on
Zero
| #!/usr/bin/env python3 | |
| """ | |
| Compile CUDA kernels locally for deployment to Hugging Face Spaces. | |
| """ | |
| import sys | |
| import os | |
| import torch | |
| from pathlib import Path | |
| # Suppress warnings for cleaner output | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| print("=" * 60) | |
| print("StyleForge CUDA Kernel Compiler") | |
| print("=" * 60) | |
| print() | |
| # Check CUDA availability | |
| if not torch.cuda.is_available(): | |
| print("ERROR: CUDA is not available on this system.") | |
| print("This script requires a CUDA-capable GPU.") | |
| sys.exit(1) | |
| print(f"CUDA Version: {torch.version.cuda}") | |
| print(f"PyTorch Version: {torch.__version__}") | |
| print(f"GPU: {torch.cuda.get_device_name(0)}") | |
| # Get compute capability | |
| major, minor = torch.cuda.get_device_capability(0) | |
| compute_capability = f"{major}.{minor}" | |
| print(f"Compute Capability: {compute_capability}") | |
| print() | |
| # Create prebuilt directory | |
| prebuilt_dir = Path("kernels/prebuilt") | |
| prebuilt_dir.mkdir(exist_ok=True, parents=True) | |
| print("Compiling CUDA kernels...") | |
| print("-" * 60) | |
| try: | |
| # Import PyTorch CUDA extension utilities | |
| from torch.utils.cpp_extension import load_inline, CUDA_HOME | |
| if CUDA_HOME is None: | |
| print("ERROR: CUDA_HOME is not set. CUDA toolkit may not be installed.") | |
| sys.exit(1) | |
| print(f"CUDA Home: {CUDA_HOME}") | |
| # Read CUDA source | |
| kernel_path = Path("kernels/instance_norm.cu") | |
| if not kernel_path.exists(): | |
| print(f"ERROR: Kernel source not found at {kernel_path}") | |
| sys.exit(1) | |
| cuda_source = kernel_path.read_text() | |
| print(f"Loaded CUDA source: {len(cuda_source)} bytes") | |
| # Architecture-specific flags for Hugging Face GPUs | |
| extra_cuda_cflags = ['-O3', '--use_fast_math'] | |
| hf_arch_flags = [ | |
| '-gencode=arch=compute_70,code=sm_70', # V100 | |
| '-gencode=arch=compute_75,code=sm_75', # T4 | |
| '-gencode=arch=compute_80,code=sm_80', # A100 | |
| ] | |
| extra_cuda_cflags.extend(hf_arch_flags) | |
| print("Build flags:", ' '.join(extra_cuda_cflags)) | |
| print() | |
| print("Compiling... (this may take 1-2 minutes)") | |
| # Compile the kernel | |
| # Note: PyTorch 2.x requires cpp_sources even if empty (bindings are in CUDA) | |
| module = load_inline( | |
| name='fused_instance_norm', | |
| cpp_sources=[], # Empty since bindings are in the .cu file | |
| cuda_sources=[cuda_source], | |
| extra_cuda_cflags=extra_cuda_cflags, | |
| verbose=False | |
| ) | |
| print() | |
| print("-" * 60) | |
| print("Compilation successful!") | |
| print() | |
| # Find the compiled library | |
| import torch.utils.cpp_extension | |
| build_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False)) | |
| print(f"Build directory: {build_dir}") | |
| so_files = list(build_dir.rglob("*.so")) + list(build_dir.rglob("*.pyd")) | |
| if not so_files: | |
| print("ERROR: No compiled .so/.pyd file found") | |
| sys.exit(1) | |
| # Copy to prebuilt directory | |
| import shutil | |
| for src_file in so_files: | |
| dst_file = prebuilt_dir / src_file.name | |
| shutil.copy2(src_file, dst_file) | |
| size_kb = dst_file.stat().st_size / 1024 | |
| print(f"Copied: {dst_file.name} ({size_kb:.1f} KB)") | |
| print() | |
| print("=" * 60) | |
| print("Kernel compilation complete!") | |
| print(f"Pre-compiled kernels saved to: {prebuilt_dir}") | |
| print() | |
| print("Download the .so file and add it to your local repo:") | |
| print(" kernels/prebuilt/" + list(prebuilt_dir.glob("*.so"))[0].name if list(prebuilt_dir.glob("*.so")) else "") | |
| print("=" * 60) | |
| except Exception as e: | |
| print() | |
| print("-" * 60) | |
| print("ERROR: Compilation failed!") | |
| print(f"Details: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| sys.exit(1) | |