Spaces:

Astridkraft
/

Stable-ControlNet-GPU

Paused

App Files Files Community

Astridkraft commited on 23 days ago

Commit

eb80db4

verified ·

1 Parent(s): fa3db93

Update controlnet_module.py

Browse files

Files changed (1) hide show

controlnet_module.py +53 -78

controlnet_module.py CHANGED Viewed

@@ -103,11 +103,10 @@ class ControlNetProcessor:
             print(f"⚠️ Fehler beim Glätten der Maske: {e}")
             return mask_array
     def create_sam_mask(self, image, bbox_coords, mode):
         """
         ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
-        Sonderbehandlung für face_only_change: Arbeitet auf Bildausschnitt
         """
         try:
             print("#" * 80)
@@ -170,7 +169,7 @@ class ControlNetProcessor:
                 print(f"   📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
                 print(f"   📐 Maximale BBox-Dimension: {bbox_max_dim} px")
-                # ERHÖHT: Crop-Größe berechnen (BBox × 2.5 für mehr Kontext)
                 crop_size = int(bbox_max_dim * 2.5)
                 print(f"   🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.5)")
@@ -265,41 +264,18 @@ class ControlNetProcessor:
             print(f"   BBox Koordinaten: [{x1}, {y1}, {x2}, {y2}]")
             print(f"   BBox Dimensionen: {x2-x1}px × {y2-y1}px")
-            # 3. Vorbereitung für SAM2
             print("-" * 60)
             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
-            # ============================================================
-            # NEU: ERWEITERTE SAM-EINGABE FÜR GESICHTSMODUS
-            # ============================================================
-            print("🎯 SCHRITT 4-5: ERWEITERTE SAM-PROMPTING")
-            bbox_width = x2 - x1
-            bbox_height = y2 - y1
-            # Für Gesichtsmodus: Verstärkte BBox-Prompts
-            if mode == "face_only_change":
-                # 1. Haupt-BBox (ursprüngliche Koordinaten)
-                input_boxes = [[[x1, y1, x2, y2]]]
-                # 2. ERWEITERTE BBox für Gesichtskontext (15% größer)
-                expand_factor = 0.15
-                expanded_x1 = max(0, int(x1 - bbox_width * expand_factor))
-                expanded_y1 = max(0, int(y1 - bbox_height * expand_factor))
-                expanded_x2 = min(image.width, int(x2 + bbox_width * expand_factor))
-                expanded_y2 = min(image.height, int(y2 + bbox_height * expand_factor))
-                input_boxes.append([[expanded_x1, expanded_y1, expanded_x2, expanded_y2]])
-                print(f"   Haupt-BBox: [{x1}, {y1}, {x2}, {y2}]")
-                print(f"   Erweiterte BBox: [{expanded_x1}, {expanded_y1}, {expanded_x2}, {expanded_y2}]")
-                print(f"   Anzahl BBox-Prompts: {len(input_boxes)}")
-            else:
-                # Standard für andere Modi
-                input_boxes = [[[x1, y1, x2, y2]]]
-                print(f"   Standard-BBox: [{x1}, {y1}, {x2}, {y2}]")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
@@ -555,7 +531,7 @@ class ControlNetProcessor:
             if mode == "face_only_change":
                 print("👤 GESICHTS-SPEZIFISCHES POSTPROCESSING")
-                # 1. Größte zusammenhängende Komponente finden (sollte der Kopf sein)
                 labeled_array, num_features = ndimage.label(mask_array)
                 if num_features > 0:
@@ -569,37 +545,7 @@ class ControlNetProcessor:
                     # NUR die größte Komponente behalten (der Kopf)
                     mask_array = np.where(labeled_array == largest_component_idx, mask_array, 0)
-                    # 2. FORMBASIERTE OPTIMIERUNG FÜR KOPF
-                    print("   🎯 Formbasierte Optimierung für Kopf")
-                    # Hole die Region-Eigenschaften für die größte Komponente
-                    labeled_single = np.where(labeled_array == largest_component_idx, 1, 0).astype(np.uint8)
-                    regions = measure.regionprops(labeled_single)
-                    if regions:
-                        region = regions[0]
-                        # Erweiterte Bounding Box für Kopf (etwas größer)
-                        minr, minc, maxr, maxc = region.bbox
-                        head_bbox_height = maxr - minr
-                        head_bbox_width = maxc - minc
-                        # Kopf sollte etwa 1.2-1.5 mal höher als breit sein
-                        aspect_ratio = head_bbox_height / head_bbox_width if head_bbox_width > 0 else 1.0
-                        print(f"   📏 Kopf-BBox: {head_bbox_width}×{head_bbox_height} (Ratio: {aspect_ratio:.2f})")
-                        # Wenn der Kopf zu "flach" ist (z.B. nur Haare), vertikal erweitern
-                        if aspect_ratio < 1.0 and head_bbox_height < bbox_height * 0.8:
-                            print(f"   ⬇️  Kopf zu flach, vertikal erweitern")
-                            expand_y = int((bbox_height * 0.8 - head_bbox_height) / 2)
-                            minr = max(0, minr - expand_y)
-                            maxr = min(mask_array.shape[0], maxr + expand_y)
-                            # Fülle den erweiterten Bereich
-                            mask_array[minr:maxr, minc:maxc] = 255
-                    # 3. MORPHOLOGISCHE OPERATIONEN FÜR SAUBEREN KOPF
                     print("   ⚙️  Morphologische Operationen für sauberen Kopf")
                     # Zuerst CLOSE, um kleine Löcher im Kopf zu füllen
@@ -611,33 +557,49 @@ class ControlNetProcessor:
                     kernel_open = np.ones((5, 5), np.uint8)
                     mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=1)
                     print("     • MORPH_OPEN (5x5) - Rauschen entfernen")
-                    # Sanfte Glättung der Kanten
-                    mask_array = cv2.GaussianBlur(mask_array, (5, 5), 1.0)
-                    mask_array = (mask_array > 127).astype(np.uint8) * 255
-                    print("     • GaussianBlur + Re-Threshold - Glatte Kanten")
-                # 4. MASKE ZURÜCK AUF ORIGINALGRÖSSE (nur für face_only_change)
                 print("-" * 60)
-                print("🔄 MASKE VOM AUSSCHNITT ZURÜCK AUF ORIGINALGRÖSSE")
                 temp_mask = Image.fromarray(mask_array).convert("L")
                 print(f"   Maskengröße auf Ausschnitt: {temp_mask.size}")
                 final_mask = Image.new("L", original_image.size, 0)
                 print(f"   Leere Maske in Originalgröße: {final_mask.size}")
-                final_mask.paste(temp_mask, (crop_x1, crop_y1))
-                print(f"   Maskenposition im Original: ({crop_x1}, {crop_y1})")
                 mask_array = np.array(final_mask)
                 print(f"   ✅ Maske zurück auf Originalgröße skaliert: {mask_array.shape}")
                 image = original_image
                 print(f"   🔄 Bild-Referenz wieder auf Original gesetzt: {image.size}")
             elif mode == "focus_change":
                 print("🎯 FOCUS-CHANGE POSTPROCESSING")
                 mask_array = mask_array.copy()
                 # Größte weiße Komponente behalten (Person)
@@ -655,6 +617,13 @@ class ControlNetProcessor:
             elif mode == "environment_change":
                 print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
                 mask_array = 255 - mask_array  # Invertiere Maske
                 print("   ✅ Maske invertiert (Person schwarz, Hintergrund weiß)")
@@ -682,10 +651,8 @@ class ControlNetProcessor:
                 # Warnungen basierend auf Abdeckung
                 if coverage_ratio < 0.7:
                     print(f"   ⚠️  WARNUNG: Geringe Gesichtsabdeckung ({coverage_ratio:.1%})")
-                    print(f"   💡 Tipp: BBox könnte zu groß sein oder SAM erkennt Gesicht nicht vollständig")
                 elif coverage_ratio > 1.3:
                     print(f"   ⚠️  WARNUNG: Sehr hohe Gesichtsabdeckung ({coverage_ratio:.1%})")
-                    print(f"   💡 Tipp: Maske könnte zu viel Hintergrund enthalten")
                 elif 0.8 <= coverage_ratio <= 1.2:
                     print(f"   ✅ OPTIMALE Gesichtsabdeckung ({coverage_ratio:.1%})")
@@ -711,9 +678,17 @@ class ControlNetProcessor:
             print(f"Fehler: {str(e)[:200]}")
             import traceback
             traceback.print_exc()
             print("ℹ️ Fallback auf rechteckige Maske")
-            return self._create_rectangular_mask(image, bbox_coords, mode)
     def _create_rectangular_mask(self, image, bbox_coords, mode):
         """Fallback: Erstellt rechteckige Maske"""

             print(f"⚠️ Fehler beim Glätten der Maske: {e}")
             return mask_array
     def create_sam_mask(self, image, bbox_coords, mode):
         """
         ERWEITERTE Funktion: Erstellt präzise Maske mit SAM 2
+        Korrigierte Version für face_only_change mit einzelner BBox
         """
         try:
             print("#" * 80)
                 print(f"   📏 BBox Dimensionen: {bbox_width} × {bbox_height} px")
                 print(f"   📐 Maximale BBox-Dimension: {bbox_max_dim} px")
+                # Crop-Größe berechnen (BBox × 2.5)
                 crop_size = int(bbox_max_dim * 2.5)
                 print(f"   🎯 Ziel-Crop-Größe: {crop_size} × {crop_size} px (BBox × 2.5)")
             print(f"   BBox Koordinaten: [{x1}, {y1}, {x2}, {y2}]")
             print(f"   BBox Dimensionen: {x2-x1}px × {y2-y1}px")
+            # 3. Vorbereitung für SAM2 - WICHTIG: NUR EINE BBOX
             print("-" * 60)
             print("🖼️  BILDAUFBEREITUNG FÜR SAM 2")
             image_np = np.array(image.convert("RGB"))
+            # KORREKTUR: Immer nur eine BBox verwenden (SAM 2 erwartet genau 1)
+            input_boxes = [[[x1, y1, x2, y2]]]
+            print(f"   Konvertiere Bild zu NumPy Array: {image_np.shape}")
+            print(f"   Erstelle EINZIGE Input Box: {input_boxes}")
+            print("   ℹ️  SAM 2 erwartet genau eine BBox pro Vorhersage")
+            print("🎯 SCHRITT 4-5: SAM MIT BOX-PROMPT")
             print("   Verarbeite Bild mit SAM 2 Processor...")
             inputs = self.sam_processor(
                 image_np,
             if mode == "face_only_change":
                 print("👤 GESICHTS-SPEZIFISCHES POSTPROCESSING")
+                # 1. Größte zusammenhängende Komponente finden
                 labeled_array, num_features = ndimage.label(mask_array)
                 if num_features > 0:
                     # NUR die größte Komponente behalten (der Kopf)
                     mask_array = np.where(labeled_array == largest_component_idx, mask_array, 0)
+                    # 2. MORPHOLOGISCHE OPERATIONEN FÜR SAUBEREN KOPF
                     print("   ⚙️  Morphologische Operationen für sauberen Kopf")
                     # Zuerst CLOSE, um kleine Löcher im Kopf zu füllen
                     kernel_open = np.ones((5, 5), np.uint8)
                     mask_array = cv2.morphologyEx(mask_array, cv2.MORPH_OPEN, kernel_open, iterations=1)
                     print("     • MORPH_OPEN (5x5) - Rauschen entfernen")
+                # ============================================================
+                # KRITISCH: MASKE IMMER ZURÜCK AUF ORIGINALGRÖSSE (auch bei Fallback!)
+                # ============================================================
                 print("-" * 60)
+                print("🔄 MASKE IMMER ZURÜCK AUF ORIGINALGRÖSSE TRANSFORMIEREN")
+                # WICHTIG: Immer die richtigen Crop-Koordinaten verwenden
                 temp_mask = Image.fromarray(mask_array).convert("L")
                 print(f"   Maskengröße auf Ausschnitt: {temp_mask.size}")
+                # Maske auf ORIGINALBILDGRÖSSE bringen
                 final_mask = Image.new("L", original_image.size, 0)
                 print(f"   Leere Maske in Originalgröße: {final_mask.size}")
+                # Immer die gespeicherten Crop-Koordinaten verwenden
+                if crop_x1 is not None and crop_y1 is not None:
+                    final_mask.paste(temp_mask, (crop_x1, crop_y1))
+                    print(f"   Maskenposition im Original: ({crop_x1}, {crop_y1})")
+                else:
+                    # Fallback: Zentrieren
+                    x_offset = (original_image.width - temp_mask.width) // 2
+                    y_offset = (original_image.height - temp_mask.height) // 2
+                    final_mask.paste(temp_mask, (x_offset, y_offset))
+                    print(f"   ⚠️  Keine Crop-Koordinaten, zentriert: ({x_offset}, {y_offset})")
                 mask_array = np.array(final_mask)
                 print(f"   ✅ Maske zurück auf Originalgröße skaliert: {mask_array.shape}")
+                # Bild-Referenz zurücksetzen
                 image = original_image
                 print(f"   🔄 Bild-Referenz wieder auf Original gesetzt: {image.size}")
             elif mode == "focus_change":
                 print("🎯 FOCUS-CHANGE POSTPROCESSING")
+                # Für focus_change: Originalbildgröße beibehalten
+                if image.size != original_image.size:
+                    print(f"   ⚠️  Bildgröße angepasst: {image.size} → {original_image.size}")
+                    temp_mask = Image.fromarray(mask_array).convert("L")
+                    temp_mask = temp_mask.resize(original_image.size, Image.Resampling.NEAREST)
+                    mask_array = np.array(temp_mask)
                 mask_array = mask_array.copy()
                 # Größte weiße Komponente behalten (Person)
             elif mode == "environment_change":
                 print("🌳 ENVIRONMENT-CHANGE POSTPROCESSING")
+                # Für environment_change: Originalbildgröße beibehalten
+                if image.size != original_image.size:
+                    temp_mask = Image.fromarray(mask_array).convert("L")
+                    temp_mask = temp_mask.resize(original_image.size, Image.Resampling.NEAREST)
+                    mask_array = np.array(temp_mask)
                 mask_array = 255 - mask_array  # Invertiere Maske
                 print("   ✅ Maske invertiert (Person schwarz, Hintergrund weiß)")
                 # Warnungen basierend auf Abdeckung
                 if coverage_ratio < 0.7:
                     print(f"   ⚠️  WARNUNG: Geringe Gesichtsabdeckung ({coverage_ratio:.1%})")
                 elif coverage_ratio > 1.3:
                     print(f"   ⚠️  WARNUNG: Sehr hohe Gesichtsabdeckung ({coverage_ratio:.1%})")
                 elif 0.8 <= coverage_ratio <= 1.2:
                     print(f"   ✅ OPTIMALE Gesichtsabdeckung ({coverage_ratio:.1%})")
             print(f"Fehler: {str(e)[:200]}")
             import traceback
             traceback.print_exc()
+            # WICHTIG: Im Fallback immer die richtige Größe zurückgeben
             print("ℹ️ Fallback auf rechteckige Maske")
+            fallback_mask = self._create_rectangular_mask(original_image, original_bbox, mode)
+            # Sicherstellen, dass die Maske die richtige Größe hat
+            if fallback_mask.size != original_image.size:
+                print(f"   ⚠️  Fallback-Maske angepasst: {fallback_mask.size} → {original_image.size}")
+                fallback_mask = fallback_mask.resize(original_image.size, Image.Resampling.NEAREST)
+            return fallback_mask
     def _create_rectangular_mask(self, image, bbox_coords, mode):
         """Fallback: Erstellt rechteckige Maske"""