Spaces:
Runtime error
Runtime error
| from modules_forge.initialization import initialize_forge | |
| initialize_forge() | |
| import sys | |
| import types | |
| import os | |
| import torch | |
| import inspect | |
| import functools | |
| import gradio.oauth | |
| import gradio.routes | |
| import contextlib | |
| from backend import memory_management | |
| from backend.operations import DynamicSwapInstaller | |
| from diffusers.models import modeling_utils as diffusers_modeling_utils | |
| from transformers import modeling_utils as transformers_modeling_utils | |
| from backend.attention import AttentionProcessorForge | |
| from starlette.requests import Request | |
| _original_init = Request.__init__ | |
| def patched_init(self, scope, receive=None, send=None): | |
| if 'session' not in scope: | |
| scope['session'] = dict() | |
| _original_init(self, scope, receive, send) | |
| return | |
| Request.__init__ = patched_init | |
| gradio.oauth.attach_oauth = lambda x: None | |
| gradio.routes.attach_oauth = lambda x: None | |
| ALWAYS_SWAP = False | |
| module_in_gpu: torch.nn.Module = None | |
| gpu = memory_management.get_torch_device() | |
| cpu = torch.device('cpu') | |
| diffusers_modeling_utils.get_parameter_device = lambda *args, **kwargs: gpu | |
| transformers_modeling_utils.get_parameter_device = lambda *args, **kwargs: gpu | |
| def unload_module(): | |
| global module_in_gpu | |
| if module_in_gpu is None: | |
| return | |
| DynamicSwapInstaller.uninstall_model(module_in_gpu) | |
| module_in_gpu.to(cpu) | |
| print(f'Move module to CPU: {type(module_in_gpu).__name__}') | |
| module_in_gpu = None | |
| memory_management.soft_empty_cache() | |
| return | |
| def greedy_move_to_gpu(model, model_gpu_memory_when_using_cpu_swap): | |
| mem_counter = 0 | |
| memory_in_swap = 0 | |
| for m in model.modules(): | |
| if hasattr(m, "weight"): | |
| module_mem = memory_management.module_size(m) | |
| if mem_counter + module_mem < model_gpu_memory_when_using_cpu_swap: | |
| m.to(gpu) | |
| mem_counter += module_mem | |
| else: | |
| m.to(cpu) | |
| memory_in_swap += module_mem | |
| print(f"[Memory Management] Loaded to CPU Swap: {memory_in_swap / (1024 * 1024):.2f} MB") | |
| print(f"[Memory Management] Loaded to GPU: {mem_counter / (1024 * 1024):.2f} MB") | |
| return | |
| def load_module(m): | |
| global module_in_gpu | |
| if module_in_gpu == m: | |
| return | |
| unload_module() | |
| model_memory = memory_management.module_size(m) | |
| current_free_mem = memory_management.get_free_memory(gpu) | |
| inference_memory = 1.5 * 1024 * 1024 * 1024 # memory_management.minimum_inference_memory() # TODO: connect to main memory system | |
| estimated_remaining_memory = current_free_mem - model_memory - inference_memory | |
| print(f"[Memory Management] Current Free GPU Memory: {current_free_mem / (1024 * 1024):.2f} MB") | |
| print(f"[Memory Management] Required Model Memory: {model_memory / (1024 * 1024):.2f} MB") | |
| print(f"[Memory Management] Required Inference Memory: {inference_memory / (1024 * 1024):.2f} MB") | |
| print(f"[Memory Management] Estimated Remaining GPU Memory: {estimated_remaining_memory / (1024 * 1024):.2f} MB") | |
| is_torch_jit = 'ScriptModule' in type(m).__name__ | |
| if is_torch_jit: | |
| print(f'Detected torch jit module: {type(m).__name__}') | |
| if (ALWAYS_SWAP or estimated_remaining_memory < 0) and not is_torch_jit: | |
| print(f'Move module to SWAP: {type(m).__name__}') | |
| DynamicSwapInstaller.install_model(m, target_device=gpu) | |
| model_gpu_memory_when_using_cpu_swap = memory_management.compute_model_gpu_memory_when_using_cpu_swap(current_free_mem, inference_memory) | |
| greedy_move_to_gpu(m, model_gpu_memory_when_using_cpu_swap) | |
| else: | |
| print(f'Move module to GPU: {type(m).__name__}') | |
| m.to(gpu) | |
| module_in_gpu = m | |
| return | |
| class GPUObject: | |
| def __init__(self): | |
| self.module_list = [] | |
| def __enter__(self): | |
| self.original_init = torch.nn.Module.__init__ | |
| def patched_init(module, *args, **kwargs): | |
| self.module_list.append(module) | |
| return self.original_init(module, *args, **kwargs) | |
| torch.nn.Module.__init__ = patched_init | |
| return self | |
| def __exit__(self, exc_type, exc_val, exc_tb): | |
| torch.nn.Module.__init__ = self.original_init | |
| self.module_list = list(set(self.module_list)) | |
| self.to(device=torch.device('cpu')) | |
| memory_management.soft_empty_cache() | |
| return | |
| def to(self, device): | |
| for module in self.module_list: | |
| module.to(device) | |
| print(f'Forge Space: Moved {len(self.module_list)} Modules to {device}') | |
| return self | |
| def gpu(self): | |
| self.to(device=gpu) | |
| return self | |
| def capture_gpu_object(capture=True): | |
| if capture: | |
| return GPUObject() | |
| else: | |
| return contextlib.nullcontext() | |
| def GPU(gpu_objects=None, manual_load=False, **kwargs): | |
| gpu_objects = gpu_objects or [] | |
| if not isinstance(gpu_objects, (list, tuple)): | |
| gpu_objects = [gpu_objects] | |
| def decorator(func): | |
| def wrapper(*args, **kwargs): | |
| print("Entering Forge Space GPU ...") | |
| memory_management.unload_all_models() | |
| if not manual_load: | |
| for o in gpu_objects: | |
| o.gpu() | |
| result = func(*args, **kwargs) | |
| print("Cleaning Forge Space GPU ...") | |
| unload_module() | |
| for o in gpu_objects: | |
| o.to(device=torch.device('cpu')) | |
| memory_management.soft_empty_cache() | |
| return result | |
| return wrapper | |
| return decorator | |
| def convert_root_path(): | |
| frame = inspect.currentframe().f_back | |
| caller_file = frame.f_code.co_filename | |
| caller_file = os.path.abspath(caller_file) | |
| result = os.path.join(os.path.dirname(caller_file), 'huggingface_space_mirror') | |
| return result + '/' | |
| def download_single_file( | |
| url: str, | |
| *, | |
| model_dir: str, | |
| progress: bool = True, | |
| file_name: str | None = None, | |
| hash_prefix: str | None = None, | |
| ) -> str: | |
| os.makedirs(model_dir, exist_ok=True) | |
| if not file_name: | |
| from urllib.parse import urlparse | |
| parts = urlparse(url) | |
| file_name = os.path.basename(parts.path) | |
| cached_file = os.path.abspath(os.path.join(model_dir, file_name)) | |
| if not os.path.exists(cached_file): | |
| tmp_filename = cached_file + '.tmp' | |
| print(f'Downloading: "{url}" to {cached_file} using temp file {tmp_filename}\n') | |
| from torch.hub import download_url_to_file | |
| download_url_to_file(url, tmp_filename, progress=progress, hash_prefix=hash_prefix) | |
| os.replace(tmp_filename, cached_file) | |
| return cached_file | |
| def automatically_move_to_gpu_when_forward(m: torch.nn.Module, target_model: torch.nn.Module = None): | |
| if target_model is None: | |
| target_model = m | |
| def patch_method(method_name): | |
| if not hasattr(m, method_name): | |
| return | |
| if not hasattr(m, 'forge_space_hooked_names'): | |
| m.forge_space_hooked_names = [] | |
| if method_name in m.forge_space_hooked_names: | |
| print(f'Already hooked {type(m).__name__}.{method_name}') | |
| return | |
| print(f'Automatic hook: {type(m).__name__}.{method_name}') | |
| original_method = getattr(m, method_name) | |
| def patched_method(*args, **kwargs): | |
| load_module(target_model) | |
| return original_method(*args, **kwargs) | |
| setattr(m, method_name, patched_method) | |
| m.forge_space_hooked_names.append(method_name) | |
| return | |
| for method_name in ['forward', 'encode', 'decode']: | |
| patch_method(method_name) | |
| return | |
| def automatically_move_pipeline_components(pipe): | |
| for attr_name in dir(pipe): | |
| attr_value = getattr(pipe, attr_name, None) | |
| if isinstance(attr_value, torch.nn.Module): | |
| automatically_move_to_gpu_when_forward(attr_value) | |
| return | |
| def change_attention_from_diffusers_to_forge(m): | |
| m.set_attn_processor(AttentionProcessorForge()) | |
| return | |
| # diffusers version fix | |
| import diffusers.models | |
| diffusers.models.embeddings.PositionNet = diffusers.models.embeddings.GLIGENTextBoundingboxProjection | |
| import diffusers.models.transformers.dual_transformer_2d | |
| dual_transformer_2d = types.ModuleType('diffusers.models.dual_transformer_2d') | |
| dual_transformer_2d.__dict__.update(diffusers.models.transformers.dual_transformer_2d.__dict__) | |
| sys.modules['diffusers.models.dual_transformer_2d'] = dual_transformer_2d | |
| import diffusers.models.transformers.transformer_2d | |
| transformer_2d = types.ModuleType('diffusers.models.transformer_2d') | |
| transformer_2d.__dict__.update(diffusers.models.transformers.transformer_2d.__dict__) | |
| sys.modules['diffusers.models.transformer_2d'] = transformer_2d | |