Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| ZeroGPU-compatible startup script to handle NVMe offloading directory setup | |
| """ | |
| import os | |
| import sys | |
| import subprocess | |
| def setup_zerogpu_environment(): | |
| """ | |
| Setup ZeroGPU environment with proper offload directory handling | |
| """ | |
| print("Setting up ZeroGPU environment...") | |
| # Primary ZeroGPU offload directory | |
| zerogpu_offload_dir = '/data-nvme/zerogpu-offload' | |
| # Try to create the expected NVMe directory | |
| if not os.path.exists(zerogpu_offload_dir): | |
| try: | |
| # Try to create the directory | |
| os.makedirs(zerogpu_offload_dir, exist_ok=True) | |
| # Test write permissions | |
| test_file = os.path.join(zerogpu_offload_dir, 'test_write.tmp') | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| print(f"✅ Created ZeroGPU offload directory: {zerogpu_offload_dir}") | |
| except (OSError, PermissionError) as e: | |
| print(f"⚠️ Cannot create NVMe directory ({e}), setting up fallback...") | |
| setup_fallback_offload() | |
| else: | |
| print(f"✅ ZeroGPU offload directory exists: {zerogpu_offload_dir}") | |
| def setup_fallback_offload(): | |
| """ | |
| Setup fallback offload directory when NVMe is not available | |
| """ | |
| # Try different fallback locations | |
| fallback_options = [ | |
| '/tmp/zerogpu-offload', | |
| '/home/user/zerogpu-offload', | |
| './zerogpu-offload' | |
| ] | |
| for fallback_dir in fallback_options: | |
| try: | |
| os.makedirs(fallback_dir, exist_ok=True) | |
| # Test write permissions | |
| test_file = os.path.join(fallback_dir, 'test_write.tmp') | |
| with open(test_file, 'w') as f: | |
| f.write('test') | |
| os.remove(test_file) | |
| # Set environment variables for ZeroGPU to use this directory | |
| os.environ['ZEROGPU_OFFLOAD_DIR'] = fallback_dir | |
| print(f"✅ Using fallback offload directory: {fallback_dir}") | |
| return | |
| except Exception as e: | |
| print(f"❌ Failed to setup {fallback_dir}: {e}") | |
| continue | |
| # If all fallbacks fail, disable offloading | |
| print("⚠️ All offload directories failed, disabling ZeroGPU offloading") | |
| os.environ['ZEROGPU_DISABLE_OFFLOAD'] = '1' | |
| def setup_memory_optimization(): | |
| """ | |
| Setup PyTorch memory optimization for ZeroGPU | |
| """ | |
| memory_config = { | |
| 'PYTORCH_CUDA_ALLOC_CONF': 'expandable_segments:True', | |
| 'CUDA_LAUNCH_BLOCKING': '0', # Allow async operations | |
| 'PYTORCH_NO_CUDA_MEMORY_CACHING': '0', # Enable memory caching | |
| } | |
| for key, value in memory_config.items(): | |
| os.environ[key] = value | |
| print(f"Set {key}={value}") | |
| def check_disk_space(directory, required_gb=50): | |
| """ | |
| Check if there's enough disk space for offloading | |
| """ | |
| try: | |
| import shutil | |
| free_bytes = shutil.disk_usage(directory).free | |
| free_gb = free_bytes / (1024**3) | |
| print(f"Available disk space in {directory}: {free_gb:.1f}GB") | |
| return free_gb >= required_gb | |
| except Exception as e: | |
| print(f"Could not check disk space: {e}") | |
| return True # Assume it's okay if we can't check | |
| if __name__ == "__main__": | |
| try: | |
| setup_zerogpu_environment() | |
| setup_memory_optimization() | |
| # Check if we have enough disk space | |
| offload_dir = os.environ.get('ZEROGPU_OFFLOAD_DIR', '/data-nvme/zerogpu-offload') | |
| if os.path.exists(offload_dir): | |
| if not check_disk_space(offload_dir): | |
| print("⚠️ Low disk space, consider cleaning up or using smaller models") | |
| print("🚀 ZeroGPU environment setup complete!") | |
| except Exception as e: | |
| print(f"❌ Error setting up ZeroGPU environment: {e}") | |
| print("Continuing with default configuration...") |