Spaces:

RollAI
/

ChatWithTranscriptStaging

Running

File size: 54,011 Bytes

import base64
import os
from io import BytesIO

import cv2
import gradio as gr
import numpy as np
import pyrebase
import requests
from openai import OpenAI
from PIL import Image, ImageDraw, ImageFont
from ultralytics import YOLO

from prompts import remove_unwanted_prompt

model = YOLO("yolo11n.pt")


def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3):
    """
    Extract the middle thumbnail from a sprite sheet, handling different aspect ratios
    and removing padding.

    Args:
        input_image: PIL Image
        grid_size: Tuple of (columns, rows)
        padding: Number of padding pixels on each side (default 3)

    Returns:
        PIL.Image: The middle thumbnail image with padding removed
    """
    sprite_sheet = input_image

    # Calculate thumbnail dimensions based on actual sprite sheet size
    sprite_width, sprite_height = sprite_sheet.size
    thumb_width_with_padding = sprite_width // grid_size[0]
    thumb_height_with_padding = sprite_height // grid_size[1]

    # Remove padding to get actual image dimensions
    thumb_width = thumb_width_with_padding - (2 * padding)  # 726 - 6 = 720
    thumb_height = thumb_height_with_padding - (2 * padding)  # varies based on input

    # Calculate the middle position
    total_thumbs = grid_size[0] * grid_size[1]
    middle_index = total_thumbs // 2

    # Calculate row and column of middle thumbnail
    middle_row = middle_index // grid_size[0]
    middle_col = middle_index % grid_size[0]

    # Calculate pixel coordinates for cropping, including padding offset
    left = (middle_col * thumb_width_with_padding) + padding
    top = (middle_row * thumb_height_with_padding) + padding
    right = left + thumb_width  # Don't add padding here
    bottom = top + thumb_height  # Don't add padding here

    # Crop and return the middle thumbnail
    middle_thumb = sprite_sheet.crop((left, top, right, bottom))
    return middle_thumb


def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str:
    """
    Convert a PIL image to a base64 string.

    Args:
        image: PIL Image object
        format: Image format to use for encoding (default: PNG)

    Returns:
        Base64 encoded string of the image
    """
    buffered = BytesIO()
    image.save(buffered, format=format)
    return base64.b64encode(buffered.getvalue()).decode("utf-8")


def add_top_numbers(
    input_image,
    num_divisions=20,
    margin=90,
    font_size=70,
    dot_spacing=20,
):
    """
    Add numbered divisions across the top and bottom of any image with dotted vertical lines.

    Args:
        input_image (Image): PIL Image
        num_divisions (int): Number of divisions to create
        margin (int): Size of margin in pixels for numbers
        font_size (int): Font size for numbers
        dot_spacing (int): Spacing between dots in pixels
    """
    # Load the image
    original_image = input_image

    # Create new image with extra space for numbers on top and bottom
    new_width = original_image.width
    new_height = original_image.height + (
        2 * margin
    )  # Add margin to both top and bottom
    new_image = Image.new("RGB", (new_width, new_height), "white")

    # Paste original image in the middle
    new_image.paste(original_image, (0, margin))

    # Initialize drawing context
    draw = ImageDraw.Draw(new_image)

    try:
        font = ImageFont.truetype("arial.ttf", font_size)
    except OSError:
        print("Using default font")
        font = ImageFont.load_default(size=font_size)

    # Calculate division width
    division_width = original_image.width / num_divisions

    # Draw division numbers and dotted lines
    for i in range(num_divisions):
        x = (i * division_width) + (division_width / 2)

        # Draw number at top
        draw.text((x, margin // 2), str(i + 1), fill="black", font=font, anchor="mm")

        # Draw number at bottom
        draw.text(
            (x, new_height - (margin // 2)),
            str(i + 1),
            fill="black",
            font=font,
            anchor="mm",
        )

        # Draw dotted line from top margin to bottom margin
        y_start = margin
        y_end = new_height - margin

        # Draw dots with specified spacing
        current_y = y_start
        while current_y < y_end:
            draw.circle(
                [x - 1, current_y - 1, x + 1, current_y + 1],
                fill="black",
                width=5,
                radius=3,
            )
            current_y += dot_spacing

    return new_image


def analyze_image(numbered_input_image: Image, prompt, input_image, ct):
    """
    Perform inference on an image using GPT-4V.

    Args:
        numbered_input_image (Image): PIL Image
        prompt (str): The prompt/question about the image
        input_image (Image): input image without numbers

    Returns:
        str: The model's response
    """
    client = OpenAI()
    base64_image = encode_image_to_base64(numbered_input_image, format="JPEG")

    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                },
            ],
        }
    ]

    response = client.chat.completions.create(
        model="gpt-4o", messages=messages, max_tokens=300
    )

    messages.extend(
        [
            {"role": "assistant", "content": response.choices[0].message.content},
            {
                "role": "user",
                "content": "please return the response in the json with keys left_row, right_row, and num_of_speakers",
            },
        ],
    )

    response = (
        client.chat.completions.create(model="gpt-4o", messages=messages)
        .choices[0]
        .message.content
    )

    left_index = response.find("{")
    right_index = response.rfind("}")

    try:
        if left_index != -1 and right_index != -1:
            print(response[left_index : right_index + 1])
            response_json = eval(response[left_index : right_index + 1])
    except Exception as e:
        print(e)
        return 0, 20

    return (
        response_json["left_row"],
        response_json["right_row"],
        response_json["num_of_speakers"],
    )


def get_sprite_firebase(cid, rsid, uid):
    config = {
        "apiKey": f"{os.getenv('FIREBASE_API_KEY')}",
        "authDomain": f"{os.getenv('FIREBASE_AUTH_DOMAIN')}",
        "databaseURL": f"{os.getenv('FIREBASE_DATABASE_URL')}",
        "projectId": f"{os.getenv('FIREBASE_PROJECT_ID')}",
        "storageBucket": f"{os.getenv('FIREBASE_STORAGE_BUCKET')}",
        "messagingSenderId": f"{os.getenv('FIREBASE_MESSAGING_SENDER_ID')}",
        "appId": f"{os.getenv('FIREBASE_APP_ID')}",
        "measurementId": f"{os.getenv('FIREBASE_MEASUREMENT_ID')}",
    }

    firebase = pyrebase.initialize_app(config)
    db = firebase.database()
    account_id = os.getenv("ROLL_ACCOUNT")

    COLLAB_EDIT_LINK = "collab_sprite_link_handler"

    path = f"{account_id}/{COLLAB_EDIT_LINK}/{uid}/{cid}/{rsid}"

    data = db.child(path).get()
    return data.val()


def find_persons_center(image, num_of_speakers=1):
    """
    Find the center point of the largest num_of_speakers persons in the image.
    If multiple persons are detected, merge the bounding boxes of only the largest ones.

    Args:
        image: CV2/numpy array image
        num_of_speakers: Number of speakers to consider (default: 1)

    Returns:
        int: x-coordinate of the center point of all considered persons
    """
    # Detect persons (class 0 in COCO dataset)
    results = model(image, classes=[0], conf=0.6)

    if not results or len(results[0].boxes) == 0:
        # If no persons detected, return center of image
        return image.shape[1] // 2

    # Get all person boxes
    boxes = results[0].boxes.xyxy.cpu().numpy()

    # Print the number of persons detected (for debugging)
    print(f"Detected {len(boxes)} persons in the image")

    if len(boxes) == 1:
        # If only one person, return center of their bounding box
        x1, _, x2, _ = boxes[0]
        center_x = int((x1 + x2) // 2)
        print(f"Single person detected at center x: {center_x}")
        return center_x
    else:
        # Multiple persons - consider only the largest num_of_speakers boxes

        # Calculate area for each box
        box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes]

        # Sort boxes by area (largest first) and take top num_of_speakers
        sorted_indices = sorted(
            range(len(box_areas)), key=lambda i: box_areas[i], reverse=True
        )

        # Use all available boxes if fewer detected than requested
        num_boxes_to_use = min(num_of_speakers, len(boxes))
        selected_indices = sorted_indices[:num_boxes_to_use]
        selected_boxes = [boxes[i] for i in selected_indices]

        # Create a merged bounding box from selected boxes
        left_x = min(box[0] for box in selected_boxes)
        right_x = max(box[2] for box in selected_boxes)
        merged_center_x = int((left_x + right_x) // 2)

        print(
            f"{num_boxes_to_use} largest persons merged bounding box center x: {merged_center_x}"
        )
        print(f"Merged bounds: left={left_x}, right={right_x}")

        return merged_center_x


def create_layouts(image, left_division, right_division, num_of_speakers):
    """
    Create different layout variations of the image using specific aspect ratios.
    All layout variations will be centered on detected persons.

    Args:
        image: PIL Image
        left_division: Left division index (1-20)
        right_division: Right division index (1-20)

    Returns:
        tuple: (standard_crops, threehalfs_layouts, twothirdhalfs_layouts, twoequalhalfs_layouts, visualization_data)
    """
    # Convert PIL Image to cv2 format
    if isinstance(image, Image.Image):
        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    else:
        image_cv = image.copy()

    # Get image dimensions
    height, width = image_cv.shape[:2]

    # Calculate division width and crop boundaries
    division_width = width / 20  # Assuming 20 divisions
    left_boundary = int((left_division - 1) * division_width)
    right_boundary = int(right_division * division_width)

    # 1. Create cutout image based on divisions
    cutout_image = image_cv[:, left_boundary:right_boundary].copy()
    cutout_width = right_boundary - left_boundary
    cutout_height = cutout_image.shape[0]

    # 2. Run YOLO on cutout to get person bounding box and center
    results = model(cutout_image, classes=[0], conf=0.6)

    # Default center if no detection
    cutout_center_x = cutout_image.shape[1] // 2
    cutout_center_y = cutout_height // 2

    # Default values for bounding box
    person_top = 0.0
    person_height = float(cutout_height)

    if results and len(results[0].boxes) > 0:
        # Get person detection
        boxes = results[0].boxes.xyxy.cpu().numpy()

        if len(boxes) == 1:
            # Single person
            x1, y1, x2, y2 = boxes[0]
            cutout_center_x = int((x1 + x2) // 2)
            cutout_center_y = int((y1 + y2) // 2)
            person_top = y1
            person_height = y2 - y1
        else:
            # Multiple persons - consider only the largest num_of_speakers boxes

            # Calculate area for each box
            box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes]

            # Sort boxes by area (largest first) and take top num_of_speakers
            sorted_indices = sorted(
                range(len(box_areas)), key=lambda i: box_areas[i], reverse=True
            )

            # Use all available boxes if fewer detected than requested
            num_boxes_to_use = min(num_of_speakers, len(boxes))
            selected_indices = sorted_indices[:num_boxes_to_use]
            selected_boxes = [boxes[i] for i in selected_indices]

            # Merge bounding boxes of selected boxes
            left_x = min(box[0] for box in selected_boxes)
            right_x = max(box[2] for box in selected_boxes)
            top_y = min(box[1] for box in selected_boxes)  # Top of highest person
            bottom_y = max(box[3] for box in selected_boxes)  # Bottom of lowest person

            cutout_center_x = int((left_x + right_x) // 2)
            cutout_center_y = int((top_y + bottom_y) // 2)
            person_top = top_y
            person_height = bottom_y - top_y

    # 3. Create 16:9 and 9:16 versions with person properly framed
    aspect_16_9 = 16 / 9
    aspect_9_16 = 9 / 16

    # For 16:9 version (with 5% margin above person)
    target_height_16_9 = int(cutout_width / aspect_16_9)
    if target_height_16_9 <= cutout_height:
        # Calculate 5% of person height for top margin
        top_margin = int(person_height * 0.05)

        # Start 5% above the person's top
        y_start = int(max(0, person_top - top_margin))

        # If this would make the crop exceed the bottom, adjust y_start
        if y_start + target_height_16_9 > cutout_height:
            y_start = int(max(0, cutout_height - target_height_16_9))

        y_end = int(min(cutout_height, y_start + target_height_16_9))
        cutout_16_9 = cutout_image[y_start:y_end, :].copy()
    else:
        # Handle rare case where we need to adjust width (not expected with normal images)
        new_width = int(cutout_height * aspect_16_9)
        x_start = max(
            0, min(cutout_width - new_width, cutout_center_x - new_width // 2)
        )
        x_end = min(cutout_width, x_start + new_width)
        cutout_16_9 = cutout_image[:, x_start:x_end].copy()

    # For 9:16 version (centered on person, adjusted upward for face visibility)
    target_width_9_16 = int(cutout_height * aspect_9_16)

    # Adjust center point upward by 20% of person height to ensure face is visible
    adjusted_center_y = int(cutout_center_y - (person_height * 0.2))

    if target_width_9_16 <= cutout_width:
        # Center horizontally around person
        x_start = int(
            max(
                0,
                min(
                    cutout_width - target_width_9_16,
                    cutout_center_x - target_width_9_16 // 2,
                ),
            )
        )
        x_end = int(min(cutout_width, x_start + target_width_9_16))

        # Use adjusted center point for vertical positioning
        y_start = int(
            max(
                0,
                min(
                    cutout_height - cutout_height,
                    adjusted_center_y - cutout_height // 2,
                ),
            )
        )
        cutout_9_16 = cutout_image[y_start:, x_start:x_end].copy()
    else:
        # Handle rare case where we need to adjust height
        new_height = int(cutout_width / aspect_9_16)

        # Use adjusted center point for vertical positioning
        y_start = int(
            max(0, min(cutout_height - new_height, adjusted_center_y - new_height // 2))
        )
        y_end = int(min(cutout_height, y_start + new_height))
        cutout_9_16 = cutout_image[y_start:y_end, :].copy()

    # 4. Scale the center back to original image coordinates
    original_center_x = left_boundary + cutout_center_x
    original_center_y = cutout_center_y
    original_person_top = person_top

    # Store visualization data for drawing
    visualization_data = {
        "original_center_x": original_center_x,
        "original_center_y": original_center_y,
        "original_person_top": original_person_top,
        "original_person_height": person_height,
        "cutout_bounds": (left_boundary, right_boundary),
    }

    # 5. Create new layout variations - each segment is independently centered on the subject

    # ----- Create crops for threehalfs layout -----
    # For 16:9 (three 5.3:9 segments, each independently centered)
    aspect_5_3_9 = 5.3 / 9

    # Calculate dimensions for each segment
    segment_height_16_9 = cutout_height  # Use full height
    segment_width_16_9 = int(segment_height_16_9 * aspect_5_3_9)

    # Create three segments for 16:9 threehalfs - all centered on the person
    threehalfs_16_9_segments = []
    for i in range(3):
        # Each segment is centered on the person
        segment_x_start = int(
            max(
                0,
                min(
                    cutout_width - segment_width_16_9,
                    cutout_center_x - segment_width_16_9 // 2,
                ),
            )
        )
        segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9))

        # Create the segment
        segment = cutout_image[:, segment_x_start:segment_x_end].copy()

        # Add a label for visualization
        label = f"Part {i+1}"
        cv2.putText(
            segment,
            label,
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.8,
            (255, 255, 255),
            2,
            cv2.LINE_AA,
        )

        threehalfs_16_9_segments.append(segment)

    # For 9:16 (three 9:5.3 segments, each independently centered)
    aspect_9_5_3 = 9 / 5.3

    # Calculate dimensions for each segment
    segment_width_9_16 = cutout_9_16.shape[1]  # Use full width of 9:16 crop
    segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3)

    # Get adjusted center for 9:16 segments (move up by 20% of person height)
    cutout_9_16_center_y = cutout_9_16.shape[0] // 2
    adjusted_9_16_center_y = int(cutout_9_16_center_y - (person_height * 0.2))
    cutout_9_16_height = cutout_9_16.shape[0]

    # Create three segments for 9:16 threehalfs - all centered on the person
    threehalfs_9_16_segments = []

    for i in range(3):
        # Each segment is centered on the person with adjusted center point
        segment_y_start = int(
            max(
                0,
                min(
                    cutout_9_16_height - segment_height_9_16,
                    person_top,
                ),
            )
        )
        segment_y_end = int(
            min(cutout_9_16_height, segment_y_start + segment_height_9_16)
        )

        # Create the segment
        segment = cutout_9_16[segment_y_start:segment_y_end, :].copy()

        # Add a label for visualization
        label = f"Part {i+1}"
        cv2.putText(
            segment,
            label,
            (10, 30),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.8,
            (255, 255, 255),
            2,
            cv2.LINE_AA,
        )

        threehalfs_9_16_segments.append(segment)

    # ----- Create crops for twothirdhalfs layout -----
    # For 16:9 (two segments: 10.6:9 and 5.3:9 OR 5.3:9 and 10.6:9)
    aspect_10_6_9 = 10.6 / 9

    # Calculate dimensions for segments
    segment1_height_16_9 = cutout_height  # Use full height
    segment1_width_16_9 = int(segment1_height_16_9 * aspect_10_6_9)
    segment2_height_16_9 = cutout_height  # Use full height
    segment2_width_16_9 = int(segment2_height_16_9 * aspect_5_3_9)

    # Create segments for 16:9 twothirdhalfs var1 (10.6:9 then 5.3:9)
    # Both segments independently centered on the person

    # First segment (10.6:9)
    segment_x_start = int(
        max(
            0,
            min(
                cutout_width - segment1_width_16_9,
                cutout_center_x - segment1_width_16_9 // 2,
            ),
        )
    )
    segment_x_end = int(min(cutout_width, segment_x_start + segment1_width_16_9))
    segment1 = cutout_image[:, segment_x_start:segment_x_end].copy()

    # Add label
    cv2.putText(
        segment1,
        "10.6:9",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    # Second segment (5.3:9)
    segment_x_start = int(
        max(
            0,
            min(
                cutout_width - segment2_width_16_9,
                cutout_center_x - segment2_width_16_9 // 2,
            ),
        )
    )
    segment_x_end = int(min(cutout_width, segment_x_start + segment2_width_16_9))
    segment2 = cutout_image[:, segment_x_start:segment_x_end].copy()

    # Add label
    cv2.putText(
        segment2,
        "5.3:9",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    twothirdhalfs_16_9_var1_segments = [segment1, segment2]

    # Create segments for 16:9 twothirdhalfs var2 (5.3:9 then 10.6:9)
    # First segment (5.3:9) - reuse segment2 from var1
    # Second segment (10.6:9) - reuse segment1 from var1
    twothirdhalfs_16_9_var2_segments = [segment2.copy(), segment1.copy()]

    # For 9:16 (two segments stacked: 9:10.6 and 9:5.3 OR 9:5.3 and 9:10.6)
    aspect_9_10_6 = 9 / 10.6
    aspect_9_5_3 = 9 / 5.3

    # Calculate dimensions for segments
    segment1_width_9_16 = cutout_9_16.shape[1]  # Use full width of 9:16 crop
    segment1_height_9_16 = int(segment1_width_9_16 / aspect_9_10_6)
    segment2_width_9_16 = cutout_9_16.shape[1]  # Use full width of 9:16 crop
    segment2_height_9_16 = int(segment2_width_9_16 / aspect_9_5_3)

    # Create segments for 9:16 twothirdhalfs var1 (9:10.6 then 9:5.3)
    # Both segments independently centered on the person with adjusted center point

    # First segment (9:10.6)
    segment_y_start = int(
        max(
            0,
            min(
                cutout_9_16_height - segment1_height_9_16,
                adjusted_9_16_center_y - segment1_height_9_16 // 2,
            ),
        )
    )
    segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment1_height_9_16))
    segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy()

    # Add label
    cv2.putText(
        segment1,
        "9:10.6",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    # Second segment (9:5.3)
    segment_y_start = int(
        max(
            0,
            min(
                cutout_9_16_height - segment2_height_9_16,
                person_top,
            ),
        )
    )
    segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment2_height_9_16))
    segment2 = cutout_9_16[segment_y_start:segment_y_end, :].copy()

    # Add label
    cv2.putText(
        segment2,
        "9:5.3",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    twothirdhalfs_9_16_var1_segments = [segment1, segment2]

    # Create segments for 9:16 twothirdhalfs var2 (9:5.3 then 9:10.6)
    # First segment (9:5.3) - reuse segment2 from var1
    # Second segment (9:10.6) - reuse segment1 from var1
    twothirdhalfs_9_16_var2_segments = [segment2.copy(), segment1.copy()]

    # ----- Create crops for twoequalhalfs layout -----
    # For 16:9 (two 8:9 segments side by side)
    aspect_8_9 = 8 / 9

    # Calculate dimensions for segments
    segment_height_16_9_equal = cutout_height  # Use full height
    segment_width_16_9_equal = int(segment_height_16_9_equal * aspect_8_9)

    # Create segments for 16:9 twoequalhalfs - both centered on the person
    # First segment (8:9)
    segment_x_start = int(
        max(
            0,
            min(
                cutout_width - segment_width_16_9_equal,
                cutout_center_x - segment_width_16_9_equal // 2,
            ),
        )
    )
    segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9_equal))
    segment1 = cutout_image[:, segment_x_start:segment_x_end].copy()

    # Add label
    cv2.putText(
        segment1,
        "8:9 (1)",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    # Second segment (identical to first for equal halfs)
    segment2 = segment1.copy()

    # Update label for segment 2
    cv2.putText(
        segment2,
        "8:9 (2)",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    twoequalhalfs_16_9_segments = [segment1, segment2]

    # For 9:16 (two 9:8 segments stacked)
    aspect_9_8 = 9 / 8

    # Calculate dimensions for segments
    segment_width_9_16_equal = cutout_9_16.shape[1]  # Use full width of 9:16 crop
    segment_height_9_16_equal = int(segment_width_9_16_equal / aspect_9_8)

    # Create segments for 9:16 twoequalhalfs - both centered on the person with adjusted center point
    # First segment (9:8)
    segment_y_start = int(
        max(
            0,
            min(
                cutout_9_16_height - segment_height_9_16_equal,
                max(0, person_top - person_height * 0.05),
            ),
        )
    )
    segment_y_end = int(
        min(cutout_9_16_height, segment_y_start + segment_height_9_16_equal)
    )
    segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy()

    # Add label
    cv2.putText(
        segment1,
        "9:8 (1)",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    # Second segment (identical to first for equal halfs)
    segment2 = segment1.copy()

    # Update label for segment 2
    cv2.putText(
        segment2,
        "9:8 (2)",
        (10, 30),
        cv2.FONT_HERSHEY_SIMPLEX,
        0.8,
        (255, 255, 255),
        2,
        cv2.LINE_AA,
    )

    twoequalhalfs_9_16_segments = [segment1, segment2]

    # 6. Create composite layouts by joining segments
    # Function to create a composite image
    def create_composite(segments, horizontal=True):
        if not segments:
            return None

        if horizontal:
            # Calculate the total width and max height
            total_width = sum(segment.shape[1] for segment in segments)
            max_height = max(segment.shape[0] for segment in segments)

            # Create a canvas
            composite = np.zeros((max_height, total_width, 3), dtype=np.uint8)

            # Place segments side by side
            x_offset = 0
            for segment in segments:
                h, w = segment.shape[:2]
                composite[:h, x_offset : x_offset + w] = segment
                x_offset += w

        else:  # vertical stacking
            # Calculate the max width and total height
            max_width = max(segment.shape[1] for segment in segments)
            total_height = sum(segment.shape[0] for segment in segments)

            # Create a canvas
            composite = np.zeros((total_height, max_width, 3), dtype=np.uint8)

            # Place segments top to bottom
            y_offset = 0
            for segment in segments:
                h, w = segment.shape[:2]
                composite[y_offset : y_offset + h, :w] = segment
                y_offset += h

        return composite

    # Create composite layouts
    threehalfs_16_9_composite = create_composite(
        threehalfs_16_9_segments, horizontal=True
    )
    threehalfs_9_16_composite = create_composite(
        threehalfs_9_16_segments, horizontal=False
    )

    twothirdhalfs_16_9_var1_composite = create_composite(
        twothirdhalfs_16_9_var1_segments, horizontal=True
    )
    twothirdhalfs_16_9_var2_composite = create_composite(
        twothirdhalfs_16_9_var2_segments, horizontal=True
    )
    twothirdhalfs_9_16_var1_composite = create_composite(
        twothirdhalfs_9_16_var1_segments, horizontal=False
    )
    twothirdhalfs_9_16_var2_composite = create_composite(
        twothirdhalfs_9_16_var2_segments, horizontal=False
    )

    twoequalhalfs_16_9_composite = create_composite(
        twoequalhalfs_16_9_segments, horizontal=True
    )
    twoequalhalfs_9_16_composite = create_composite(
        twoequalhalfs_9_16_segments, horizontal=False
    )

    # Add labels to all composites
    def add_label(img, label):
        if img is None:
            return None

        font = cv2.FONT_HERSHEY_SIMPLEX
        label_settings = {
            "fontScale": 1.0,
            "fontFace": font,
            "thickness": 2,
        }

        # Draw background for text
        text_size = cv2.getTextSize(
            label,
            fontFace=label_settings["fontFace"],
            fontScale=label_settings["fontScale"],
            thickness=label_settings["thickness"],
        )

        cv2.rectangle(
            img,
            (10, 10),
            (10 + text_size[0][0] + 10, 10 + text_size[0][1] + 10),
            (0, 0, 0),
            -1,
        )  # Black background

        # Draw text
        cv2.putText(
            img,
            label,
            (15, 15 + text_size[0][1]),
            fontFace=label_settings["fontFace"],
            fontScale=label_settings["fontScale"],
            thickness=label_settings["thickness"],
            color=(255, 255, 255),
            lineType=cv2.LINE_AA,
        )
        return img

    # Label the basic crops
    cutout_image_labeled = add_label(cutout_image.copy(), "Cutout")
    cutout_16_9_labeled = add_label(cutout_16_9.copy(), "16:9")
    cutout_9_16_labeled = add_label(cutout_9_16.copy(), "9:16")

    # Label the composite layouts
    threehalfs_16_9_labeled = add_label(threehalfs_16_9_composite, "Three Halfs 16:9")
    threehalfs_9_16_labeled = add_label(threehalfs_9_16_composite, "Three Halfs 9:16")

    twothirdhalfs_16_9_var1_labeled = add_label(
        twothirdhalfs_16_9_var1_composite, "Two Thirds Var1 16:9"
    )
    twothirdhalfs_16_9_var2_labeled = add_label(
        twothirdhalfs_16_9_var2_composite, "Two Thirds Var2 16:9"
    )
    twothirdhalfs_9_16_var1_labeled = add_label(
        twothirdhalfs_9_16_var1_composite, "Two Thirds Var1 9:16"
    )
    twothirdhalfs_9_16_var2_labeled = add_label(
        twothirdhalfs_9_16_var2_composite, "Two Thirds Var2 9:16"
    )

    twoequalhalfs_16_9_labeled = add_label(
        twoequalhalfs_16_9_composite, "Two Equal Halfs 16:9"
    )
    twoequalhalfs_9_16_labeled = add_label(
        twoequalhalfs_9_16_composite, "Two Equal Halfs 9:16"
    )

    # Convert all output images to PIL format
    def cv2_to_pil(img):
        if img is None:
            return None
        return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

    # Convert standard crops
    standard_crops = {
        "cutout": cv2_to_pil(cutout_image_labeled),
        "16:9": cv2_to_pil(cutout_16_9_labeled),
        "9:16": cv2_to_pil(cutout_9_16_labeled),
    }

    # Convert threehalfs layouts
    threehalfs_layouts = {
        "16:9": cv2_to_pil(threehalfs_16_9_labeled),
        "9:16": cv2_to_pil(threehalfs_9_16_labeled),
    }

    # Convert twothirdhalfs layouts
    twothirdhalfs_layouts = {
        "16:9_var1": cv2_to_pil(twothirdhalfs_16_9_var1_labeled),
        "16:9_var2": cv2_to_pil(twothirdhalfs_16_9_var2_labeled),
        "9:16_var1": cv2_to_pil(twothirdhalfs_9_16_var1_labeled),
        "9:16_var2": cv2_to_pil(twothirdhalfs_9_16_var2_labeled),
    }

    # Convert twoequalhalfs layouts
    twoequalhalfs_layouts = {
        "16:9": cv2_to_pil(twoequalhalfs_16_9_labeled),
        "9:16": cv2_to_pil(twoequalhalfs_9_16_labeled),
    }

    return (
        standard_crops,
        threehalfs_layouts,
        twothirdhalfs_layouts,
        twoequalhalfs_layouts,
        visualization_data,
    )


def draw_layout_regions(
    image, left_division, right_division, visualization_data, layout_type
):
    """
    Create a visualization showing the layout regions overlaid on the original image.
    Each region is independently centered on the subject, as in practice different videos
    would be stacked in these layouts.

    Args:
        image: PIL Image
        left_division: Left division index (1-20)
        right_division: Right division index (1-20)
        visualization_data: Dictionary with visualization data from create_layouts
        layout_type: Type of layout to visualize (e.g., "standard", "threehalfs", "twothirdhalfs_var1", etc.)

    Returns:
        PIL Image: Original image with layout regions visualized
    """
    # Convert PIL Image to cv2 format
    if isinstance(image, Image.Image):
        image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    else:
        image_cv = image.copy()

    # Get a clean copy for drawing
    visualization = image_cv.copy()

    # Get image dimensions
    height, width = image_cv.shape[:2]

    # Extract visualization data
    original_center_x = visualization_data["original_center_x"]
    original_center_y = visualization_data["original_center_y"]
    original_person_top = visualization_data["original_person_top"]
    original_person_height = visualization_data["original_person_height"]
    left_boundary, right_boundary = visualization_data["cutout_bounds"]
    cutout_width = right_boundary - left_boundary

    # Define colors for different layouts (BGR format)
    colors = {
        "standard": {"16:9": (0, 255, 0), "9:16": (255, 0, 0)},  # Green, Blue
        "threehalfs": {"16:9": (0, 165, 255), "9:16": (255, 255, 0)},  # Orange, Cyan
        "twothirdhalfs_var1": {
            "16:9": (255, 0, 255),
            "9:16": (128, 0, 128),
        },  # Magenta, Purple
        "twothirdhalfs_var2": {
            "16:9": (0, 255, 255),
            "9:16": (128, 128, 0),
        },  # Yellow, Teal
        "twoequalhalfs": {
            "16:9": (0, 128, 128),
            "9:16": (255, 165, 0),
        },  # Dark Cyan, Blue-Green
    }

    # Define line thickness and font
    thickness = 3
    font = cv2.FONT_HERSHEY_SIMPLEX
    font_scale = 0.8
    font_thickness = 2

    # Draw standard layouts (16:9 and 9:16)
    if layout_type == "standard":
        # Draw 16:9 crop
        aspect_16_9 = 16 / 9
        target_height_16_9 = int(cutout_width / aspect_16_9)

        # Calculate 20% of person height for top margin
        top_margin = int(original_person_height * 0.05)
        y_start = int(max(0, original_person_top - top_margin))
        if y_start + target_height_16_9 > height:
            y_start = int(max(0, height - target_height_16_9))
        y_end = int(min(height, y_start + target_height_16_9))

        cv2.rectangle(
            visualization,
            (left_boundary, y_start),
            (right_boundary, y_end),
            colors["standard"]["16:9"],
            thickness,
        )
        cv2.putText(
            visualization,
            "16:9",
            (left_boundary + 5, y_start + 30),
            font,
            font_scale,
            colors["standard"]["16:9"],
            font_thickness,
        )

        # Draw 9:16 crop
        aspect_9_16 = 9 / 16
        target_width_9_16 = int(height * aspect_9_16)

        x_start = max(
            0,
            min(width - target_width_9_16, original_center_x - target_width_9_16 // 2),
        )
        x_end = x_start + target_width_9_16

        cv2.rectangle(
            visualization,
            (x_start, 0),
            (x_end, height),
            colors["standard"]["9:16"],
            thickness,
        )
        cv2.putText(
            visualization,
            "9:16",
            (x_start + 5, 30),
            font,
            font_scale,
            colors["standard"]["9:16"],
            font_thickness,
        )

    # Draw threehalfs layouts - each segment is centered on the subject
    elif layout_type == "threehalfs":
        # For 16:9 (three 5.3:9 segments side by side - visually only)
        aspect_5_3_9 = 5.3 / 9
        segment_height = height
        segment_width = int(segment_height * aspect_5_3_9)

        # Calculate total width for visualization purposes
        total_width = segment_width * 3
        start_x = max(0, original_center_x - total_width // 2)

        for i in range(3):
            # For visualization, we'll place them side by side
            vis_segment_x_start = start_x + i * segment_width
            vis_segment_x_end = vis_segment_x_start + segment_width

            # But each segment would actually be centered on the subject independently
            # Here we also draw the centered version more faintly
            actual_segment_x_start = max(
                0, min(width - segment_width, original_center_x - segment_width // 2)
            )
            actual_segment_x_end = min(width, actual_segment_x_start + segment_width)

            # Draw the visualization placement (side by side)
            cv2.rectangle(
                visualization,
                (vis_segment_x_start, 0),
                (vis_segment_x_end, segment_height),
                colors["threehalfs"]["16:9"],
                thickness,
            )

            # Draw the actual centered placement with dashed lines
            if i > 0:  # Only draw centered versions for parts 2 and 3
                for j in range(0, segment_height, 20):  # Dashed line effect
                    if j % 40 < 20:  # Skip every other segment
                        cv2.line(
                            visualization,
                            (actual_segment_x_start, j),
                            (actual_segment_x_start, min(j + 20, segment_height)),
                            colors["threehalfs"]["16:9"],
                            1,
                        )
                        cv2.line(
                            visualization,
                            (actual_segment_x_end, j),
                            (actual_segment_x_end, min(j + 20, segment_height)),
                            colors["threehalfs"]["16:9"],
                            1,
                        )

            cv2.putText(
                visualization,
                f"16:9 Part {i+1}",
                (vis_segment_x_start + 5, 30 + i * 30),
                font,
                font_scale,
                colors["threehalfs"]["16:9"],
                font_thickness,
            )

        # For 9:16 (three 9:5.3 segments stacked top to bottom - visually only)
        aspect_9_16 = 9 / 16
        target_width_9_16 = int(height * aspect_9_16)
        x_start = max(
            0,
            min(width - target_width_9_16, original_center_x - target_width_9_16 // 2),
        )
        x_end = x_start + target_width_9_16

        aspect_9_5_3 = 9 / 5.3
        segment_width_9_16 = target_width_9_16
        segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3)

        # Calculate total height for visualization purposes
        total_height = segment_height_9_16 * 3
        start_y = max(0, height // 2 - total_height // 2)

        for i in range(3):
            # For visualization, we'll place them stacked
            vis_segment_y_start = start_y + i * segment_height_9_16
            vis_segment_y_end = min(height, vis_segment_y_start + segment_height_9_16)

            # But each segment would actually be centered on the subject independently
            # Here we also draw the centered version more faintly
            actual_segment_y_start = max(
                0,
                min(
                    height - segment_height_9_16,
                    original_center_y - segment_height_9_16 // 2,
                ),
            )
            actual_segment_y_end = min(
                height, actual_segment_y_start + segment_height_9_16
            )

            # Draw the visualization placement (stacked)
            cv2.rectangle(
                visualization,
                (x_start, vis_segment_y_start),
                (x_end, vis_segment_y_end),
                colors["threehalfs"]["9:16"],
                thickness,
            )

            # Draw the actual centered placement with dashed lines
            if i > 0:  # Only draw centered versions for parts 2 and 3
                for j in range(x_start, x_end, 20):  # Dashed line effect
                    if j % 40 < 20:  # Skip every other segment
                        cv2.line(
                            visualization,
                            (j, actual_segment_y_start),
                            (min(j + 20, x_end), actual_segment_y_start),
                            colors["threehalfs"]["9:16"],
                            1,
                        )
                        cv2.line(
                            visualization,
                            (j, actual_segment_y_end),
                            (min(j + 20, x_end), actual_segment_y_end),
                            colors["threehalfs"]["9:16"],
                            1,
                        )

            cv2.putText(
                visualization,
                f"9:16 Part {i+1}",
                (x_start + 5, vis_segment_y_start + 30),
                font,
                font_scale,
                colors["threehalfs"]["9:16"],
                font_thickness,
            )

    # Draw twothirdhalfs layouts
    elif layout_type == "twothirdhalfs_var1" or layout_type == "twothirdhalfs_var2":
        aspect_key = "16:9" if layout_type.endswith("var1") else "9:16"
        layout_color = colors[
            (
                "twothirdhalfs_var1"
                if layout_type.endswith("var1")
                else "twothirdhalfs_var2"
            )
        ][aspect_key]

        if aspect_key == "16:9":
            # For 16:9 (two segments side by side)
            aspect_10_6_9 = 10.6 / 9
            aspect_5_3_9 = 5.3 / 9

            segment1_height = height
            segment1_width = int(
                segment1_height
                * (aspect_10_6_9 if layout_type.endswith("var1") else aspect_5_3_9)
            )
            segment2_height = height
            segment2_width = int(
                segment2_height
                * (aspect_5_3_9 if layout_type.endswith("var1") else aspect_10_6_9)
            )

            # First segment
            segment_center_x = original_center_x - segment2_width // 4
            segment_x_start = int(
                max(
                    0,
                    min(width - segment1_width, segment_center_x - segment1_width // 2),
                )
            )
            segment_x_end = int(min(width, segment_x_start + segment1_width))

            cv2.rectangle(
                visualization,
                (segment_x_start, 0),
                (segment_x_end, segment1_height),
                layout_color,
                thickness,
            )
            cv2.putText(
                visualization,
                f"16:9 Part 1",
                (segment_x_start + 5, 30),
                font,
                font_scale,
                layout_color,
                font_thickness,
            )

            # Second segment
            segment_center_x = original_center_x + segment1_width // 4
            segment_x_start = int(
                max(
                    0,
                    min(width - segment2_width, segment_center_x - segment2_width // 2),
                )
            )
            segment_x_end = int(min(width, segment_x_start + segment2_width))

            cv2.rectangle(
                visualization,
                (segment_x_start, 0),
                (segment_x_end, segment2_height),
                layout_color,
                thickness,
            )
            cv2.putText(
                visualization,
                f"16:9 Part 2",
                (segment_x_start + 5, 60),
                font,
                font_scale,
                layout_color,
                font_thickness,
            )
        else:  # aspect_key == "9:16"
            # For 9:16 (two segments stacked)
            aspect_9_16 = 9 / 16
            target_width_9_16 = int(height * aspect_9_16)
            x_start = max(
                0,
                min(
                    width - target_width_9_16,
                    original_center_x - target_width_9_16 // 2,
                ),
            )
            x_end = x_start + target_width_9_16

            aspect_9_10_6 = 9 / 10.6
            aspect_9_5_3 = 9 / 5.3

            segment1_width = target_width_9_16
            segment1_height = int(
                segment1_width
                / (aspect_9_10_6 if layout_type.endswith("var1") else aspect_9_5_3)
            )
            segment2_width = target_width_9_16
            segment2_height = int(
                segment2_width
                / (aspect_9_5_3 if layout_type.endswith("var1") else aspect_9_10_6)
            )

            # First segment (top)
            segment_y_start = 0
            segment_y_end = min(height, segment_y_start + segment1_height)

            cv2.rectangle(
                visualization,
                (x_start, segment_y_start),
                (x_end, segment_y_end),
                layout_color,
                thickness,
            )
            cv2.putText(
                visualization,
                f"9:16 Part 1",
                (x_start + 5, segment_y_start + 30),
                font,
                font_scale,
                layout_color,
                font_thickness,
            )

            # Second segment (bottom)
            segment_y_start = segment_y_end
            segment_y_end = min(height, segment_y_start + segment2_height)

            cv2.rectangle(
                visualization,
                (x_start, segment_y_start),
                (x_end, segment_y_end),
                layout_color,
                thickness,
            )
            cv2.putText(
                visualization,
                f"9:16 Part 2",
                (x_start + 5, segment_y_start + 30),
                font,
                font_scale,
                layout_color,
                font_thickness,
            )

    # Draw twoequalhalfs layouts
    elif layout_type == "twoequalhalfs":
        # For 16:9 (two 8:9 segments side by side)
        aspect_8_9 = 8 / 9

        segment_height = height
        segment_width = int(segment_height * aspect_8_9)

        # First segment (left)
        segment_center_x = original_center_x - segment_width // 2
        segment_x_start = int(
            max(0, min(width - segment_width, segment_center_x - segment_width // 2))
        )
        segment_x_end = int(min(width, segment_x_start + segment_width))

        cv2.rectangle(
            visualization,
            (segment_x_start, 0),
            (segment_x_end, segment_height),
            colors["twoequalhalfs"]["16:9"],
            thickness,
        )
        cv2.putText(
            visualization,
            f"16:9 Equal 1",
            (segment_x_start + 5, 30),
            font,
            font_scale,
            colors["twoequalhalfs"]["16:9"],
            font_thickness,
        )

        # Second segment (right)
        segment_center_x = original_center_x + segment_width // 2
        segment_x_start = int(
            max(0, min(width - segment_width, segment_center_x - segment_width // 2))
        )
        segment_x_end = int(min(width, segment_x_start + segment_width))

        cv2.rectangle(
            visualization,
            (segment_x_start, 0),
            (segment_x_end, segment_height),
            colors["twoequalhalfs"]["16:9"],
            thickness,
        )
        cv2.putText(
            visualization,
            f"16:9 Equal 2",
            (segment_x_start + 5, 60),
            font,
            font_scale,
            colors["twoequalhalfs"]["16:9"],
            font_thickness,
        )

        # For 9:16 (two 9:8 segments stacked)
        aspect_9_16 = 9 / 16
        target_width_9_16 = int(height * aspect_9_16)
        x_start = max(
            0,
            min(width - target_width_9_16, original_center_x - target_width_9_16 // 2),
        )
        x_end = x_start + target_width_9_16

        aspect_9_8 = 9 / 8
        segment_width_9_16 = target_width_9_16
        segment_height_9_16 = int(segment_width_9_16 / aspect_9_8)

        # First segment (top)
        segment_y_start = 0
        segment_y_end = min(height, segment_y_start + segment_height_9_16)

        cv2.rectangle(
            visualization,
            (x_start, segment_y_start),
            (x_end, segment_y_end),
            colors["twoequalhalfs"]["9:16"],
            thickness,
        )
        cv2.putText(
            visualization,
            f"9:16 Equal 1",
            (x_start + 5, segment_y_start + 30),
            font,
            font_scale,
            colors["twoequalhalfs"]["9:16"],
            font_thickness,
        )

        # Second segment (bottom)
        segment_y_start = segment_y_end
        segment_y_end = min(height, segment_y_start + segment_height_9_16)

        cv2.rectangle(
            visualization,
            (x_start, segment_y_start),
            (x_end, segment_y_end),
            colors["twoequalhalfs"]["9:16"],
            thickness,
        )
        cv2.putText(
            visualization,
            f"9:16 Equal 2",
            (x_start + 5, segment_y_start + 30),
            font,
            font_scale,
            colors["twoequalhalfs"]["9:16"],
            font_thickness,
        )

    # Draw center point of person(s)
    center_radius = 8
    cv2.circle(
        visualization,
        (original_center_x, original_center_y),
        center_radius,
        (255, 255, 255),
        -1,
    )
    cv2.circle(
        visualization,
        (original_center_x, original_center_y),
        center_radius,
        (0, 0, 0),
        2,
    )

    # Convert back to PIL format
    visualization_pil = Image.fromarray(cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB))

    return visualization_pil


def get_image_crop(cid=None, rsid=None, uid=None, ct=None):
    """
    Function that returns both standard and layout variations for visualization.

    Returns:
        gr.Gallery: Gallery of all generated images
    """
    try:
        sprites_data = get_sprite_firebase(cid, rsid, uid)
        image_paths = [sprite_data["url"] for sprite_data in sprites_data]
        durations = [sprite_data["duration"] for sprite_data in sprites_data]
    except Exception:
        image_paths = [
            # "data/C2-Roll3D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg",
            # "data/E2-HamzaA-i2x-Take2-Nov19.24-PST02.31.31pm.jpg",
            "data/F2-Roll4D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg",
            "data/G2-Roll5D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg",
            "data/C1-Roll10D-i1x-Take2-Mar20.25-PST12.14.56pm.jpg",
            "data/C2-Roll10D-i2x-Take2-Mar20.25-PST12.14.56pm.jpg",
        ]

    # Lists to store all images
    all_images = []
    all_captions = []

    for image_path in image_paths:
        # Load image (from local file or URL)
        try:
            if image_path.startswith(("http://", "https://")):
                response = requests.get(image_path)
                input_image = Image.open(BytesIO(response.content))
            else:
                input_image = Image.open(image_path)
        except Exception as e:
            print(f"Error loading image {image_path}: {e}")
            continue

        # Get the middle thumbnail
        mid_image = get_middle_thumbnail(input_image)

        # Add numbered divisions for GPT-4V analysis
        numbered_mid_image = add_top_numbers(
            input_image=mid_image,
            num_divisions=20,
            margin=50,
            font_size=30,
            dot_spacing=20,
        )

        # Analyze the image to get optimal crop divisions
        # This uses GPT-4V to identify the optimal crop points
        (left_division, right_division, num_of_speakers) = analyze_image(
            numbered_mid_image, remove_unwanted_prompt(1), mid_image, ct
        )

        # Safety check for divisions
        if left_division <= 0:
            left_division = 1
        if right_division > 20:
            right_division = 20
        if left_division >= right_division:
            left_division = 1
            right_division = 20

        print(f"Using divisions: left={left_division}, right={right_division}")

        # Create layouts and cutouts using the new function
        (
            standard_crops,
            threehalfs_layouts,
            twothirdhalfs_layouts,
            twoequalhalfs_layouts,
            visualization_data,
        ) = create_layouts(mid_image, left_division, right_division, num_of_speakers)

        # Create all the required visualizations
        # 1. Standard aspect ratio visualization (16:9 and 9:16)
        standard_visualization = draw_layout_regions(
            mid_image, left_division, right_division, visualization_data, "standard"
        )
        all_images.append(standard_visualization)
        all_captions.append(
            f"Standard Aspect Ratios (16:9 & 9:16) {standard_visualization.size}"
        )

        # Add input and middle image to gallery
        all_images.append(input_image)
        all_captions.append(f"Input Image {input_image.size}")

        all_images.append(mid_image)
        all_captions.append(f"Middle Thumbnail {mid_image.size}")

        # Add standard crops
        for key, crop in standard_crops.items():
            all_images.append(crop)
            all_captions.append(f"{key} {crop.size}")

        # Add threehalfs layouts
        for key, layout in threehalfs_layouts.items():
            all_images.append(layout)
            all_captions.append(f"Three Halfs {key} {layout.size}")

        # Add twothirdhalfs layouts
        for key, layout in twothirdhalfs_layouts.items():
            all_images.append(layout)
            all_captions.append(f"Two-Thirds Halfs {key} {layout.size}")

        # Add twoequalhalfs layouts
        for key, layout in twoequalhalfs_layouts.items():
            all_images.append(layout)
            all_captions.append(f"Two Equal Halfs {key} {layout.size}")

    # Return gallery with all images
    return gr.Gallery(value=list(zip(all_images, all_captions)))