Spaces:

crash10155
/

Switcher

Paused

File size: 13,768 Bytes

from typing import Any, List, Callable, Dict, Optional
import cv2
import threading
import numpy
from functools import lru_cache
from pathlib import Path

import SwitcherAI.processors.frame.core as frame_processors
from SwitcherAI.typing import Frame, Face
from SwitcherAI.utilities import conditional_download, resolve_relative_path

# Global variables (maintaining your original structure)
FRAME_PROCESSOR = None
THREAD_SEMAPHORE = threading.Semaphore(1)
THREAD_LOCK = threading.Lock()
NAME = 'FACEFUSION.FRAME_PROCESSOR.FRAME_ENHANCER'

# Enhanced model configuration inspired by FaceFusion
@lru_cache(maxsize=None)
def get_model_config() -> Dict[str, Any]:
    """Get model configuration with enhanced options"""
    base_path = resolve_relative_path('../.assets/models')
    if isinstance(base_path, str):
        base_path = Path(base_path)
    
    return {
        'real_esrgan_x4': {
            'model_path': base_path / 'RealESRGAN_x4plus.pth',
            'scale': 4,
            'tile_size': 256,
            'tile_pad': 16,
            'num_feat': 64,
            'num_block': 23,
            'num_grow_ch': 32
        }
    }


def get_frame_processor() -> Any:
    global FRAME_PROCESSOR

    with THREAD_LOCK:
        if FRAME_PROCESSOR is None:
            try:
                # Import Real-ESRGAN components
                from basicsr.archs.rrdbnet_arch import RRDBNet
                from realesrgan import RealESRGANer
                import torch
                
                config = get_model_config()['real_esrgan_x4']
                model_path = config['model_path']
                
                # Check if model exists
                if not model_path.exists():
                    print(f"⚠️ Real-ESRGAN model not found at: {model_path}")
                    print("🔄 Attempting to download model...")
                    if not pre_check():
                        print("❌ Failed to download Real-ESRGAN model")
                        return None
                
                FRAME_PROCESSOR = RealESRGANer(
                    model_path=str(model_path),
                    model=RRDBNet(
                        num_in_ch=3,
                        num_out_ch=3,
                        num_feat=config['num_feat'],
                        num_block=config['num_block'],
                        num_grow_ch=config['num_grow_ch'],
                        scale=config['scale']
                    ),
                    device=frame_processors.get_device(),
                    tile=config['tile_size'],
                    tile_pad=config['tile_pad'],
                    pre_pad=0,
                    scale=config['scale']
                )
                
                # Ensure CUDA device is set if available
                if torch.cuda.is_available():
                    torch.cuda.set_device(0)
                
                print("✅ Real-ESRGAN frame processor initialized")
                
            except ImportError as e:
                print(f"⚠️ Real-ESRGAN not available: {e}")
                print("💡 Install with: pip install realesrgan basicsr")
                FRAME_PROCESSOR = None
            except Exception as e:
                print(f"⚠️ Failed to initialize Real-ESRGAN: {e}")
                FRAME_PROCESSOR = None

    return FRAME_PROCESSOR


def clear_frame_processor() -> None:
    global FRAME_PROCESSOR
    FRAME_PROCESSOR = None


def pre_check() -> bool:
    """Download required models for frame enhancement"""
    try:
        download_directory_path = resolve_relative_path('../.assets/models')
        
        # Ensure download directory exists
        if isinstance(download_directory_path, str):
            download_directory_path = Path(download_directory_path)
        download_directory_path.mkdir(parents=True, exist_ok=True)
        
        # Download Real-ESRGAN model
        model_urls = [
            'https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth'
        ]
        
        conditional_download(str(download_directory_path), model_urls)
        
        # Verify the model was downloaded
        model_path = download_directory_path / 'RealESRGAN_x4plus.pth'
        if model_path.exists() and model_path.stat().st_size > 0:
            print(f"✅ Real-ESRGAN model verified: {model_path.stat().st_size / (1024*1024):.1f}MB")
            return True
        else:
            print("❌ Real-ESRGAN model download failed or file is empty")
            return False
            
    except Exception as e:
        print(f"❌ Real-ESRGAN pre-check failed: {e}")
        return False


def pre_process() -> bool:
    """Pre-process check with model validation"""
    try:
        # Check if processor is available
        processor = get_frame_processor()
        if processor is None:
            print("⚠️ Real-ESRGAN not available, frame enhancement will be skipped")
            return False
        
        return True
        
    except Exception as e:
        print(f"⚠️ Frame enhancement pre-process failed: {e}")
        return False


def post_process() -> None:
    clear_frame_processor()
    # Clear cache as in FaceFusion version
    get_model_config.cache_clear()


def create_tile_frames(temp_vision_frame: Frame, tile_size: tuple = (256, 256)) -> tuple:
    """
    Enhanced tiling function inspired by FaceFusion for better memory management
    """
    height, width = temp_vision_frame.shape[:2]
    tile_height, tile_width = tile_size[0], tile_size[1]
    
    # Calculate padding
    pad_height = (tile_height - height % tile_height) % tile_height
    pad_width = (tile_width - width % tile_width) % tile_width
    
    # Pad the frame
    if pad_height > 0 or pad_width > 0:
        temp_vision_frame = numpy.pad(
            temp_vision_frame, 
            ((0, pad_height), (0, pad_width), (0, 0)), 
            mode='reflect'
        )
    
    # Create tiles
    tiles = []
    padded_height, padded_width = temp_vision_frame.shape[:2]
    
    for y in range(0, padded_height, tile_height):
        for x in range(0, padded_width, tile_width):
            tile = temp_vision_frame[y:y+tile_height, x:x+tile_width]
            tiles.append(tile)
    
    return tiles, pad_width, pad_height


def merge_tile_frames(tiles: List[Frame], original_width: int, original_height: int, 
                     pad_width: int, pad_height: int, tile_size: tuple) -> Frame:
    """
    Enhanced tile merging function inspired by FaceFusion
    """
    tile_height, tile_width = tile_size[0], tile_size[1]
    padded_height = original_height + pad_height
    padded_width = original_width + pad_width
    
    # Reconstruct the image from tiles
    result = numpy.zeros((padded_height, padded_width, 3), dtype=numpy.uint8)
    tile_idx = 0
    
    for y in range(0, padded_height, tile_height):
        for x in range(0, padded_width, tile_width):
            if tile_idx < len(tiles):
                tile = tiles[tile_idx]
                result[y:y+tile_height, x:x+tile_width] = tile
                tile_idx += 1
    
    # Remove padding and return to original size
    if pad_height > 0 or pad_width > 0:
        result = result[:original_height, :original_width]
    
    return result


def enhance_frame_with_tiling(temp_frame: Frame) -> Frame:
    """
    Enhanced frame enhancement with improved tiling (inspired by FaceFusion)
    """
    try:
        processor = get_frame_processor()
        if processor is None:
            print("⚠️ Real-ESRGAN processor not available, returning original frame")
            return temp_frame
        
        config = get_model_config()['real_esrgan_x4']
        tile_size = (config['tile_size'], config['tile_size'])
        scale = config['scale']
        
        # Create tiles for processing
        tiles, pad_width, pad_height = create_tile_frames(temp_frame, tile_size)
        enhanced_tiles = []
        
        with THREAD_SEMAPHORE:
            for tile in tiles:
                try:
                    # Process each tile individually to manage memory
                    enhanced_tile, _ = processor.enhance(tile, outscale=scale)
                    enhanced_tiles.append(enhanced_tile)
                except Exception as e:
                    print(f"⚠️ Tile enhancement failed: {e}")
                    # Use original tile if enhancement fails
                    enhanced_tiles.append(tile)
        
        # Merge tiles back together
        original_height, original_width = temp_frame.shape[:2]
        enhanced_frame = merge_tile_frames(
            enhanced_tiles, 
            original_width * scale, 
            original_height * scale,
            pad_width * scale, 
            pad_height * scale,
            (tile_size[0] * scale, tile_size[1] * scale)
        )
        
        return enhanced_frame
        
    except Exception as e:
        print(f"⚠️ Enhanced tiling failed: {e}")
        return temp_frame


def enhance_frame(temp_frame: Frame) -> Frame:
    """
    Main enhancement function with fallback to original method
    """
    try:
        processor = get_frame_processor()
        if processor is None:
            print("⚠️ Frame enhancer not available, returning original frame")
            return temp_frame
        
        # Try enhanced tiling method first
        try:
            return enhance_frame_with_tiling(temp_frame)
        except Exception as e:
            print(f"⚠️ Tiling method failed: {e}, trying simple enhancement")
            
            # Fallback to original method
            with THREAD_SEMAPHORE:
                enhanced_frame, _ = processor.enhance(temp_frame, outscale=1)
                return enhanced_frame
                
    except Exception as e:
        print(f"⚠️ Frame enhancement failed completely: {e}")
        return temp_frame


def blend_frame(original_frame: Frame, enhanced_frame: Frame, blend_ratio: float = 0.8) -> Frame:
    """
    Blend original and enhanced frames (inspired by FaceFusion)
    """
    try:
        if original_frame.shape != enhanced_frame.shape:
            original_frame = cv2.resize(original_frame, (enhanced_frame.shape[1], enhanced_frame.shape[0]))
        
        # Convert blend ratio (0-1 where 1 = full enhancement)
        return cv2.addWeighted(original_frame, 1 - blend_ratio, enhanced_frame, blend_ratio, 0)
    except Exception as e:
        print(f"⚠️ Frame blending failed: {e}")
        return enhanced_frame


def process_frame(source_face: Face, reference_face: Face, temp_frame: Frame) -> Frame:
    """
    Main processing function (maintains your original interface)
    """
    try:
        return enhance_frame(temp_frame)
    except Exception as e:
        print(f"⚠️ Error in process_frame: {e}")
        return temp_frame


def process_frames(source_path: str, temp_frame_paths: List[str], update: Callable[[], None]) -> None:
    """
    Process multiple frames (maintains your original interface)
    """
    try:
        processor = get_frame_processor()
        if processor is None:
            print("⚠️ Frame enhancer not available, skipping frame enhancement")
            if update:
                update()
            return
        
        for temp_frame_path in temp_frame_paths:
            try:
                temp_frame = cv2.imread(temp_frame_path)
                if temp_frame is not None:
                    result_frame = process_frame(None, None, temp_frame)
                    cv2.imwrite(temp_frame_path, result_frame)
                else:
                    print(f"⚠️ Failed to read frame: {temp_frame_path}")
                    
            except Exception as e:
                print(f"⚠️ Error processing frame {temp_frame_path}: {e}")
                
            if update:
                update()
                
    except Exception as e:
        print(f"⚠️ Error in process_frames: {e}")


def process_image(source_path: str, target_path: str, output_path: str) -> None:
    """
    Process single image (maintains your original interface)
    """
    try:
        processor = get_frame_processor()
        if processor is None:
            print("⚠️ Frame enhancer not available, copying original image")
            import shutil
            shutil.copy2(target_path, output_path)
            return
        
        target_frame = cv2.imread(target_path)
        if target_frame is not None:
            result = process_frame(None, None, target_frame)
            cv2.imwrite(output_path, result)
        else:
            print(f"⚠️ Failed to read image: {target_path}")
            
    except Exception as e:
        print(f"⚠️ Error in process_image: {e}")


def process_video(source_path: str, temp_frame_paths: List[str]) -> None:
    """
    Process video frames (maintains your original interface)
    """
    try:
        frame_processors.process_video(None, temp_frame_paths, process_frames)
    except Exception as e:
        print(f"⚠️ Error in process_video: {e}")


# Additional utility functions inspired by FaceFusion
def get_model_scale() -> int:
    """Get the current model's scale factor"""
    try:
        return get_model_config()['real_esrgan_x4']['scale']
    except:
        return 1


def prepare_frame(frame: Frame) -> Frame:
    """Prepare frame for processing"""
    try:
        if frame.dtype != numpy.uint8:
            frame = frame.astype(numpy.uint8)
        return frame
    except:
        return frame


def normalize_frame(frame: Frame) -> Frame:
    """Normalize frame after processing"""
    try:
        return numpy.clip(frame, 0, 255).astype(numpy.uint8)
    except:
        return frame