Spaces:

oliau
/

StyleForge

Running on Zero

StyleForge / compile_kernels.py

github-actions[bot]

Deploy from GitHub - 2026-01-19 04:19:46

20cfecf 11 days ago

3.69 kB

	#!/usr/bin/env python3
	"""
	Compile CUDA kernels locally for deployment to Hugging Face Spaces.
	"""

	import sys
	import os
	import torch
	from pathlib import Path

	# Suppress warnings for cleaner output
	import warnings
	warnings.filterwarnings('ignore')

	print("=" * 60)
	print("StyleForge CUDA Kernel Compiler")
	print("=" * 60)
	print()

	# Check CUDA availability
	if not torch.cuda.is_available():
	print("ERROR: CUDA is not available on this system.")
	print("This script requires a CUDA-capable GPU.")
	sys.exit(1)

	print(f"CUDA Version: {torch.version.cuda}")
	print(f"PyTorch Version: {torch.__version__}")
	print(f"GPU: {torch.cuda.get_device_name(0)}")

	# Get compute capability
	major, minor = torch.cuda.get_device_capability(0)
	compute_capability = f"{major}.{minor}"
	print(f"Compute Capability: {compute_capability}")
	print()

	# Create prebuilt directory
	prebuilt_dir = Path("kernels/prebuilt")
	prebuilt_dir.mkdir(exist_ok=True, parents=True)

	print("Compiling CUDA kernels...")
	print("-" * 60)

	try:
	# Import PyTorch CUDA extension utilities
	from torch.utils.cpp_extension import load_inline, CUDA_HOME

	if CUDA_HOME is None:
	print("ERROR: CUDA_HOME is not set. CUDA toolkit may not be installed.")
	sys.exit(1)

	print(f"CUDA Home: {CUDA_HOME}")

	# Read CUDA source
	kernel_path = Path("kernels/instance_norm.cu")
	if not kernel_path.exists():
	print(f"ERROR: Kernel source not found at {kernel_path}")
	sys.exit(1)

	cuda_source = kernel_path.read_text()
	print(f"Loaded CUDA source: {len(cuda_source)} bytes")

	# Architecture-specific flags for Hugging Face GPUs
	extra_cuda_cflags = ['-O3', '--use_fast_math']
	hf_arch_flags = [
	'-gencode=arch=compute_70,code=sm_70', # V100
	'-gencode=arch=compute_75,code=sm_75', # T4
	'-gencode=arch=compute_80,code=sm_80', # A100
	]
	extra_cuda_cflags.extend(hf_arch_flags)

	print("Build flags:", ' '.join(extra_cuda_cflags))
	print()
	print("Compiling... (this may take 1-2 minutes)")

	# Compile the kernel
	# Note: PyTorch 2.x requires cpp_sources even if empty (bindings are in CUDA)
	module = load_inline(
	name='fused_instance_norm',
	cpp_sources=[], # Empty since bindings are in the .cu file
	cuda_sources=[cuda_source],
	extra_cuda_cflags=extra_cuda_cflags,
	verbose=False
	)

	print()
	print("-" * 60)
	print("Compilation successful!")
	print()

	# Find the compiled library
	import torch.utils.cpp_extension
	build_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
	print(f"Build directory: {build_dir}")

	so_files = list(build_dir.rglob(".so")) + list(build_dir.rglob(".pyd"))

	if not so_files:
	print("ERROR: No compiled .so/.pyd file found")
	sys.exit(1)

	# Copy to prebuilt directory
	import shutil
	for src_file in so_files:
	dst_file = prebuilt_dir / src_file.name
	shutil.copy2(src_file, dst_file)
	size_kb = dst_file.stat().st_size / 1024
	print(f"Copied: {dst_file.name} ({size_kb:.1f} KB)")

	print()
	print("=" * 60)
	print("Kernel compilation complete!")
	print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
	print()
	print("Download the .so file and add it to your local repo:")
	print(" kernels/prebuilt/" + list(prebuilt_dir.glob(".so"))[0].name if list(prebuilt_dir.glob(".so")) else "")
	print("=" * 60)

	except Exception as e:
	print()
	print("-" * 60)
	print("ERROR: Compilation failed!")
	print(f"Details: {e}")
	import traceback
	traceback.print_exc()
	sys.exit(1)