moistdio's picture
Upload folder using huggingface_hub
6831a54 verified
from modules_forge.initialization import initialize_forge
initialize_forge()
import os
import torch
import inspect
import functools
import gradio.oauth
import gradio.routes
from backend import memory_management
from backend.operations import DynamicSwapInstaller
from diffusers.models import modeling_utils as diffusers_modeling_utils
from transformers import modeling_utils as transformers_modeling_utils
from backend.attention import AttentionProcessorForge
from starlette.requests import Request
_original_init = Request.__init__
def patched_init(self, scope, receive=None, send=None):
if 'session' not in scope:
scope['session'] = dict()
_original_init(self, scope, receive, send)
return
Request.__init__ = patched_init
gradio.oauth.attach_oauth = lambda x: None
gradio.routes.attach_oauth = lambda x: None
ALWAYS_SWAP = False
module_in_gpu: torch.nn.Module = None
gpu = memory_management.get_torch_device()
cpu = torch.device('cpu')
diffusers_modeling_utils.get_parameter_device = lambda *args, **kwargs: gpu
transformers_modeling_utils.get_parameter_device = lambda *args, **kwargs: gpu
def unload_module():
global module_in_gpu
if module_in_gpu is None:
return
DynamicSwapInstaller.uninstall_model(module_in_gpu)
module_in_gpu.to(cpu)
print(f'Move module to CPU: {type(module_in_gpu).__name__}')
module_in_gpu = None
memory_management.soft_empty_cache()
return
def load_module(m):
global module_in_gpu
if module_in_gpu == m:
return
unload_module()
model_memory = memory_management.module_size(m)
current_free_mem = memory_management.get_free_memory(gpu)
inference_memory = 1.5 * 1024 * 1024 * 1024 # memory_management.minimum_inference_memory() # TODO: connect to main memory system
estimated_remaining_memory = current_free_mem - model_memory - inference_memory
print(f"[Memory Management] Current Free GPU Memory: {current_free_mem / (1024 * 1024):.2f} MB")
print(f"[Memory Management] Required Model Memory: {model_memory / (1024 * 1024):.2f} MB")
print(f"[Memory Management] Required Inference Memory: {inference_memory / (1024 * 1024):.2f} MB")
print(f"[Memory Management] Estimated Remaining GPU Memory: {estimated_remaining_memory / (1024 * 1024):.2f} MB")
if ALWAYS_SWAP or estimated_remaining_memory < 0:
print(f'Move module to SWAP: {type(m).__name__}')
DynamicSwapInstaller.install_model(m, target_device=gpu)
else:
print(f'Move module to GPU: {type(m).__name__}')
m.to(gpu)
module_in_gpu = m
return
class GPUObject:
def __init__(self):
self.module_list = []
def __enter__(self):
self.original_init = torch.nn.Module.__init__
self.original_to = torch.nn.Module.to
def patched_init(module, *args, **kwargs):
self.module_list.append(module)
return self.original_init(module, *args, **kwargs)
def patched_to(module, *args, **kwargs):
self.module_list.append(module)
return self.original_to(module, *args, **kwargs)
torch.nn.Module.__init__ = patched_init
torch.nn.Module.to = patched_to
return self
def __exit__(self, exc_type, exc_val, exc_tb):
torch.nn.Module.__init__ = self.original_init
torch.nn.Module.to = self.original_to
self.module_list = set(self.module_list)
self.to(device=torch.device('cpu'))
memory_management.soft_empty_cache()
return
def to(self, device):
for module in self.module_list:
module.to(device)
print(f'Forge Space: Moved {len(self.module_list)} Modules to {device}')
return self
def gpu(self):
self.to(device=gpu)
return self
def capture_gpu_object():
return GPUObject()
def GPU(gpu_objects=None, manual_load=False):
gpu_objects = gpu_objects or []
if not isinstance(gpu_objects, (list, tuple)):
gpu_objects = [gpu_objects]
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
print("Entering Forge Space GPU ...")
memory_management.unload_all_models()
if not manual_load:
for o in gpu_objects:
o.gpu()
result = func(*args, **kwargs)
print("Cleaning Forge Space GPU ...")
unload_module()
for o in gpu_objects:
o.to(device=torch.device('cpu'))
memory_management.soft_empty_cache()
return result
return wrapper
return decorator
def convert_root_path():
frame = inspect.currentframe().f_back
caller_file = frame.f_code.co_filename
caller_file = os.path.abspath(caller_file)
result = os.path.join(os.path.dirname(caller_file), 'huggingface_space_mirror')
return result + '/'
def automatically_move_to_gpu_when_forward(m: torch.nn.Module, target_model: torch.nn.Module = None):
if target_model is None:
target_model = m
def patch_method(method_name):
if not hasattr(m, method_name):
return
if not hasattr(m, 'forge_space_hooked_names'):
m.forge_space_hooked_names = []
if method_name in m.forge_space_hooked_names:
return
print(f'Automatic hook: {type(m).__name__}.{method_name}')
original_method = getattr(m, method_name)
def patched_method(*args, **kwargs):
load_module(target_model)
return original_method(*args, **kwargs)
setattr(m, method_name, patched_method)
m.forge_space_hooked_names.append(method_name)
return
for method_name in ['forward', 'encode', 'decode']:
patch_method(method_name)
return
def automatically_move_pipeline_components(pipe):
for attr_name in dir(pipe):
attr_value = getattr(pipe, attr_name, None)
if isinstance(attr_value, torch.nn.Module):
automatically_move_to_gpu_when_forward(attr_value)
return
def change_attention_from_diffusers_to_forge(m):
m.set_attn_processor(AttentionProcessorForge())
return