| |
| import torch |
| from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, AutoencoderKL |
| from PIL import Image, ImageFilter, ImageEnhance |
| import time |
| import cv2 |
| import numpy as np |
| from torchvision import transforms |
|
|
| print("="*60) |
| print("FACE-FIX: REINE QUALITÄTSVERBESSERUNG - MINIMALE ÄNDERUNG") |
| print("="*60) |
|
|
| _components_loaded = False |
| _controlnet_depth = None |
| _controlnet_pose = None |
| _pipeline = None |
|
|
| def _initialize_components(): |
| """Lade nur notwendige Komponenten""" |
| global _components_loaded, _controlnet_depth, _controlnet_pose |
| |
| if _components_loaded: |
| return True |
| |
| print("⚠️ Lade nur OpenPose (Depth wird deaktiviert)...") |
| |
| try: |
| |
| _controlnet_pose = ControlNetModel.from_pretrained( |
| "lllyasviel/sd-controlnet-openpose", |
| torch_dtype=torch.float16 |
| ) |
| print("✅ OpenPose geladen") |
| |
| |
| _controlnet_depth = None |
| |
| _components_loaded = True |
| return True |
| except Exception as e: |
| print(f"❌ Fehler: {e}") |
| return False |
|
|
| def _extract_precise_pose(image): |
| """SEHR PRÄZISE Pose-Extraktion nur für Gesicht""" |
| try: |
| img_array = np.array(image.convert("RGB")) |
| gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY) |
| |
| |
| edges = cv2.Canny(gray, 15, 45) |
| |
| |
| face_cascade = cv2.CascadeClassifier( |
| cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' |
| ) |
| faces = face_cascade.detectMultiScale(gray, 1.1, 4) |
| |
| |
| pose_map = np.zeros_like(img_array) |
| |
| |
| if len(faces) > 0: |
| for (x, y, w, h) in faces: |
| |
| face_region = edges[y:y+h, x:x+w] |
| |
| threshold = np.percentile(face_region[face_region > 0], 90) |
| face_region[face_region < threshold] = 0 |
| pose_map[y:y+h, x:x+w, 0] = face_region |
| else: |
| |
| pose_map[..., 0] = edges * 0.3 |
| |
| return Image.fromarray(pose_map) |
| except: |
| |
| gray = cv2.cvtColor(np.array(image.convert("RGB")), cv2.COLOR_RGB2GRAY) |
| edges = cv2.Canny(gray, 10, 30) * 0.2 |
| return Image.fromarray(edges).convert("RGB") |
|
|
| def _apply_face_enhancement(image): |
| """EINFACHE Face Enhancement ohne AI""" |
| try: |
| img_array = np.array(image.convert("RGB")) |
| |
| |
| sharpened = cv2.filter2D(img_array, -1, |
| np.array([[-0.5, -0.5, -0.5], |
| [-0.5, 5.0, -0.5], |
| [-0.5, -0.5, -0.5]]) / 3.0) |
| |
| |
| denoised = cv2.fastNlMeansDenoisingColored(sharpened, None, 3, 3, 7, 21) |
| |
| |
| lab = cv2.cvtColor(denoised, cv2.COLOR_RGB2LAB) |
| l, a, b = cv2.split(lab) |
| clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8,8)) |
| l = clahe.apply(l) |
| enhanced = cv2.merge([l, a, b]) |
| enhanced = cv2.cvtColor(enhanced, cv2.COLOR_LAB2RGB) |
| |
| return Image.fromarray(enhanced) |
| except: |
| return image |
|
|
| def apply_facefix(image: Image.Image, prompt: str, negative_prompt: str, seed: int, model_id: str): |
| """ |
| SUPER-SUBTILE QUALITÄTSVERBESSERUNG |
| |
| Strategie: |
| 1. NUR OpenPose (kein Depth - das verändert zu viel) |
| 2. SEHR niedrige ControlNet-Stärke |
| 3. Fast kein CFG Scale |
| 4. Identischer Prompt |
| """ |
| print("\n" + "🎯"*50) |
| print("SUBTILE QUALITÄTSVERBESSERUNG") |
| print(f" Größe: {image.size}") |
| print("🎯"*50) |
| |
| start_time = time.time() |
| |
| |
| print("\n⚡ OPTION 1: Einfache non-AI Verbesserung...") |
| enhanced = _apply_face_enhancement(image) |
| |
| |
| use_ai_enhancement = False |
| |
| if not use_ai_enhancement: |
| duration = time.time() - start_time |
| print(f"✅ Non-AI Verbesserung in {duration:.1f}s") |
| return enhanced |
| |
| |
| print("⚠️ Starte MINIMALE AI-Verbesserung...") |
| |
| if not _initialize_components(): |
| return enhanced |
| |
| |
| original_size = image.size |
| control_size = (512, 512) |
| resized_image = image.resize(control_size, Image.Resampling.LANCZOS) |
| |
| |
| pose_img = _extract_precise_pose(resized_image) |
| pose_img.save("debug_minimal_pose.png") |
| |
| |
| global _pipeline |
| if _pipeline is None: |
| try: |
| print("🔄 Lade Pipeline...") |
| _pipeline = StableDiffusionControlNetPipeline.from_pretrained( |
| model_id, |
| controlnet=[_controlnet_pose], |
| torch_dtype=torch.float16, |
| safety_checker=None, |
| requires_safety_checker=False, |
| ) |
| |
| _pipeline.enable_attention_slicing() |
| _pipeline.enable_vae_slicing() |
| |
| print("✅ Pipeline geladen") |
| except Exception as e: |
| print(f"❌ Pipeline Fehler: {e}") |
| return enhanced |
| |
| try: |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| print(f" Device: {device}") |
| pipeline = _pipeline.to(device) |
| |
| |
| |
| |
| |
| |
| print("\n⚙️ EXTREM SUBTILE PARAMETER:") |
| print(" • OpenPose Strength: 0.3 (SEHR NIEDRIG)") |
| print(" • Steps: 15 (wenig)") |
| print(" • CFG: 2.0 (fast kein Guidance)") |
| print(" • Gleicher Seed") |
| |
| result = pipeline( |
| prompt=prompt, |
| negative_prompt=f"{negative_prompt}, deformed, blurry", |
| image=[pose_img], |
| controlnet_conditioning_scale=[0.3], |
| num_inference_steps=15, |
| guidance_scale=2.0, |
| generator=torch.Generator(device).manual_seed(seed + 100), |
| height=512, |
| width=512, |
| ).images[0] |
| |
| |
| if original_size != (512, 512): |
| result = result.resize(original_size, Image.Resampling.LANCZOS) |
| |
| |
| result_array = np.array(result).astype(float) |
| original_array = np.array(image).astype(float) |
| |
| |
| blended = (original_array * 0.7 + result_array * 0.3).astype(np.uint8) |
| final_result = Image.fromarray(blended) |
| |
| duration = time.time() - start_time |
| print(f"\n✅ SUBTILE VERBESSERUNG in {duration:.1f}s") |
| print(f" • 70% Original, 30% AI") |
| print(f" • OpenPose: 0.3") |
| print(f" • CFG: 2.0") |
| |
| return final_result |
| |
| except Exception as e: |
| print(f"\n❌ AI-Verbesserung fehlgeschlagen: {e}") |
| return enhanced |
|
|
| print("="*60) |
| print("FACE-FIX: REINE QUALITÄTSVERBESSERUNG") |
| print("="*60) |