Spaces:
Sleeping
Sleeping
import base64 | |
import os | |
from io import BytesIO | |
import cv2 | |
import gradio as gr | |
import numpy as np | |
import pyrebase | |
import requests | |
from openai import OpenAI | |
from PIL import Image, ImageDraw, ImageFont | |
from ultralytics import YOLO | |
from prompts import remove_unwanted_prompt | |
model = YOLO("yolo11n.pt") | |
def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3): | |
""" | |
Extract the middle thumbnail from a sprite sheet, handling different aspect ratios | |
and removing padding. | |
Args: | |
input_image: PIL Image | |
grid_size: Tuple of (columns, rows) | |
padding: Number of padding pixels on each side (default 3) | |
Returns: | |
PIL.Image: The middle thumbnail image with padding removed | |
""" | |
sprite_sheet = input_image | |
# Calculate thumbnail dimensions based on actual sprite sheet size | |
sprite_width, sprite_height = sprite_sheet.size | |
thumb_width_with_padding = sprite_width // grid_size[0] | |
thumb_height_with_padding = sprite_height // grid_size[1] | |
# Remove padding to get actual image dimensions | |
thumb_width = thumb_width_with_padding - (2 * padding) # 726 - 6 = 720 | |
thumb_height = thumb_height_with_padding - (2 * padding) # varies based on input | |
# Calculate the middle position | |
total_thumbs = grid_size[0] * grid_size[1] | |
middle_index = total_thumbs // 2 | |
# Calculate row and column of middle thumbnail | |
middle_row = middle_index // grid_size[0] | |
middle_col = middle_index % grid_size[0] | |
# Calculate pixel coordinates for cropping, including padding offset | |
left = (middle_col * thumb_width_with_padding) + padding | |
top = (middle_row * thumb_height_with_padding) + padding | |
right = left + thumb_width # Don't add padding here | |
bottom = top + thumb_height # Don't add padding here | |
# Crop and return the middle thumbnail | |
middle_thumb = sprite_sheet.crop((left, top, right, bottom)) | |
return middle_thumb | |
def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str: | |
""" | |
Convert a PIL image to a base64 string. | |
Args: | |
image: PIL Image object | |
format: Image format to use for encoding (default: PNG) | |
Returns: | |
Base64 encoded string of the image | |
""" | |
buffered = BytesIO() | |
image.save(buffered, format=format) | |
return base64.b64encode(buffered.getvalue()).decode("utf-8") | |
def add_top_numbers( | |
input_image, | |
num_divisions=20, | |
margin=90, | |
font_size=70, | |
dot_spacing=20, | |
): | |
""" | |
Add numbered divisions across the top and bottom of any image with dotted vertical lines. | |
Args: | |
input_image (Image): PIL Image | |
num_divisions (int): Number of divisions to create | |
margin (int): Size of margin in pixels for numbers | |
font_size (int): Font size for numbers | |
dot_spacing (int): Spacing between dots in pixels | |
""" | |
# Load the image | |
original_image = input_image | |
# Create new image with extra space for numbers on top and bottom | |
new_width = original_image.width | |
new_height = original_image.height + ( | |
2 * margin | |
) # Add margin to both top and bottom | |
new_image = Image.new("RGB", (new_width, new_height), "white") | |
# Paste original image in the middle | |
new_image.paste(original_image, (0, margin)) | |
# Initialize drawing context | |
draw = ImageDraw.Draw(new_image) | |
try: | |
font = ImageFont.truetype("arial.ttf", font_size) | |
except OSError: | |
print("Using default font") | |
font = ImageFont.load_default(size=font_size) | |
# Calculate division width | |
division_width = original_image.width / num_divisions | |
# Draw division numbers and dotted lines | |
for i in range(num_divisions): | |
x = (i * division_width) + (division_width / 2) | |
# Draw number at top | |
draw.text((x, margin // 2), str(i + 1), fill="black", font=font, anchor="mm") | |
# Draw number at bottom | |
draw.text( | |
(x, new_height - (margin // 2)), | |
str(i + 1), | |
fill="black", | |
font=font, | |
anchor="mm", | |
) | |
# Draw dotted line from top margin to bottom margin | |
y_start = margin | |
y_end = new_height - margin | |
# Draw dots with specified spacing | |
current_y = y_start | |
while current_y < y_end: | |
draw.circle( | |
[x - 1, current_y - 1, x + 1, current_y + 1], | |
fill="black", | |
width=5, | |
radius=3, | |
) | |
current_y += dot_spacing | |
return new_image | |
def analyze_image(numbered_input_image: Image, prompt, input_image, ct): | |
""" | |
Perform inference on an image using GPT-4V. | |
Args: | |
numbered_input_image (Image): PIL Image | |
prompt (str): The prompt/question about the image | |
input_image (Image): input image without numbers | |
Returns: | |
str: The model's response | |
""" | |
client = OpenAI() | |
base64_image = encode_image_to_base64(numbered_input_image, format="JPEG") | |
messages = [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{ | |
"type": "image_url", | |
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, | |
}, | |
], | |
} | |
] | |
response = client.chat.completions.create( | |
model="gpt-4o", messages=messages, max_tokens=300 | |
) | |
messages.extend( | |
[ | |
{"role": "assistant", "content": response.choices[0].message.content}, | |
{ | |
"role": "user", | |
"content": "please return the response in the json with keys left_row, right_row, and num_of_speakers", | |
}, | |
], | |
) | |
response = ( | |
client.chat.completions.create(model="gpt-4o", messages=messages) | |
.choices[0] | |
.message.content | |
) | |
left_index = response.find("{") | |
right_index = response.rfind("}") | |
try: | |
if left_index != -1 and right_index != -1: | |
print(response[left_index : right_index + 1]) | |
response_json = eval(response[left_index : right_index + 1]) | |
except Exception as e: | |
print(e) | |
return 0, 20 | |
return ( | |
response_json["left_row"], | |
response_json["right_row"], | |
response_json["num_of_speakers"], | |
) | |
def get_sprite_firebase(cid, rsid, uid): | |
config = { | |
"apiKey": f"{os.getenv('FIREBASE_API_KEY')}", | |
"authDomain": f"{os.getenv('FIREBASE_AUTH_DOMAIN')}", | |
"databaseURL": f"{os.getenv('FIREBASE_DATABASE_URL')}", | |
"projectId": f"{os.getenv('FIREBASE_PROJECT_ID')}", | |
"storageBucket": f"{os.getenv('FIREBASE_STORAGE_BUCKET')}", | |
"messagingSenderId": f"{os.getenv('FIREBASE_MESSAGING_SENDER_ID')}", | |
"appId": f"{os.getenv('FIREBASE_APP_ID')}", | |
"measurementId": f"{os.getenv('FIREBASE_MEASUREMENT_ID')}", | |
} | |
firebase = pyrebase.initialize_app(config) | |
db = firebase.database() | |
account_id = os.getenv("ROLL_ACCOUNT") | |
COLLAB_EDIT_LINK = "collab_sprite_link_handler" | |
path = f"{account_id}/{COLLAB_EDIT_LINK}/{uid}/{cid}/{rsid}" | |
data = db.child(path).get() | |
return data.val() | |
def find_persons_center(image, num_of_speakers=1): | |
""" | |
Find the center point of the largest num_of_speakers persons in the image. | |
If multiple persons are detected, merge the bounding boxes of only the largest ones. | |
Args: | |
image: CV2/numpy array image | |
num_of_speakers: Number of speakers to consider (default: 1) | |
Returns: | |
int: x-coordinate of the center point of all considered persons | |
""" | |
# Detect persons (class 0 in COCO dataset) | |
results = model(image, classes=[0], conf=0.6) | |
if not results or len(results[0].boxes) == 0: | |
# If no persons detected, return center of image | |
return image.shape[1] // 2 | |
# Get all person boxes | |
boxes = results[0].boxes.xyxy.cpu().numpy() | |
# Print the number of persons detected (for debugging) | |
print(f"Detected {len(boxes)} persons in the image") | |
if len(boxes) == 1: | |
# If only one person, return center of their bounding box | |
x1, _, x2, _ = boxes[0] | |
center_x = int((x1 + x2) // 2) | |
print(f"Single person detected at center x: {center_x}") | |
return center_x | |
else: | |
# Multiple persons - consider only the largest num_of_speakers boxes | |
# Calculate area for each box | |
box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] | |
# Sort boxes by area (largest first) and take top num_of_speakers | |
sorted_indices = sorted( | |
range(len(box_areas)), key=lambda i: box_areas[i], reverse=True | |
) | |
# Use all available boxes if fewer detected than requested | |
num_boxes_to_use = min(num_of_speakers, len(boxes)) | |
selected_indices = sorted_indices[:num_boxes_to_use] | |
selected_boxes = [boxes[i] for i in selected_indices] | |
# Create a merged bounding box from selected boxes | |
left_x = min(box[0] for box in selected_boxes) | |
right_x = max(box[2] for box in selected_boxes) | |
merged_center_x = int((left_x + right_x) // 2) | |
print( | |
f"{num_boxes_to_use} largest persons merged bounding box center x: {merged_center_x}" | |
) | |
print(f"Merged bounds: left={left_x}, right={right_x}") | |
return merged_center_x | |
def create_layouts(image, left_division, right_division, num_of_speakers): | |
""" | |
Create different layout variations of the image using specific aspect ratios. | |
All layout variations will be centered on detected persons. | |
Args: | |
image: PIL Image | |
left_division: Left division index (1-20) | |
right_division: Right division index (1-20) | |
Returns: | |
tuple: (standard_crops, threehalfs_layouts, twothirdhalfs_layouts, twoequalhalfs_layouts, visualization_data) | |
""" | |
# Convert PIL Image to cv2 format | |
if isinstance(image, Image.Image): | |
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
else: | |
image_cv = image.copy() | |
# Get image dimensions | |
height, width = image_cv.shape[:2] | |
# Calculate division width and crop boundaries | |
division_width = width / 20 # Assuming 20 divisions | |
left_boundary = int((left_division - 1) * division_width) | |
right_boundary = int(right_division * division_width) | |
# 1. Create cutout image based on divisions | |
cutout_image = image_cv[:, left_boundary:right_boundary].copy() | |
cutout_width = right_boundary - left_boundary | |
cutout_height = cutout_image.shape[0] | |
# 2. Run YOLO on cutout to get person bounding box and center | |
results = model(cutout_image, classes=[0], conf=0.6) | |
# Default center if no detection | |
cutout_center_x = cutout_image.shape[1] // 2 | |
cutout_center_y = cutout_height // 2 | |
# Default values for bounding box | |
person_top = 0.0 | |
person_height = float(cutout_height) | |
if results and len(results[0].boxes) > 0: | |
# Get person detection | |
boxes = results[0].boxes.xyxy.cpu().numpy() | |
if len(boxes) == 1: | |
# Single person | |
x1, y1, x2, y2 = boxes[0] | |
cutout_center_x = int((x1 + x2) // 2) | |
cutout_center_y = int((y1 + y2) // 2) | |
person_top = y1 | |
person_height = y2 - y1 | |
else: | |
# Multiple persons - consider only the largest num_of_speakers boxes | |
# Calculate area for each box | |
box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] | |
# Sort boxes by area (largest first) and take top num_of_speakers | |
sorted_indices = sorted( | |
range(len(box_areas)), key=lambda i: box_areas[i], reverse=True | |
) | |
# Use all available boxes if fewer detected than requested | |
num_boxes_to_use = min(num_of_speakers, len(boxes)) | |
selected_indices = sorted_indices[:num_boxes_to_use] | |
selected_boxes = [boxes[i] for i in selected_indices] | |
# Merge bounding boxes of selected boxes | |
left_x = min(box[0] for box in selected_boxes) | |
right_x = max(box[2] for box in selected_boxes) | |
top_y = min(box[1] for box in selected_boxes) # Top of highest person | |
bottom_y = max(box[3] for box in selected_boxes) # Bottom of lowest person | |
cutout_center_x = int((left_x + right_x) // 2) | |
cutout_center_y = int((top_y + bottom_y) // 2) | |
person_top = top_y | |
person_height = bottom_y - top_y | |
# 3. Create 16:9 and 9:16 versions with person properly framed | |
aspect_16_9 = 16 / 9 | |
aspect_9_16 = 9 / 16 | |
# For 16:9 version (with 5% margin above person) | |
target_height_16_9 = int(cutout_width / aspect_16_9) | |
if target_height_16_9 <= cutout_height: | |
# Calculate 5% of person height for top margin | |
top_margin = int(person_height * 0.05) | |
# Start 5% above the person's top | |
y_start = int(max(0, person_top - top_margin)) | |
# If this would make the crop exceed the bottom, adjust y_start | |
if y_start + target_height_16_9 > cutout_height: | |
y_start = int(max(0, cutout_height - target_height_16_9)) | |
y_end = int(min(cutout_height, y_start + target_height_16_9)) | |
cutout_16_9 = cutout_image[y_start:y_end, :].copy() | |
else: | |
# Handle rare case where we need to adjust width (not expected with normal images) | |
new_width = int(cutout_height * aspect_16_9) | |
x_start = max( | |
0, min(cutout_width - new_width, cutout_center_x - new_width // 2) | |
) | |
x_end = min(cutout_width, x_start + new_width) | |
cutout_16_9 = cutout_image[:, x_start:x_end].copy() | |
# For 9:16 version (centered on person, adjusted upward for face visibility) | |
target_width_9_16 = int(cutout_height * aspect_9_16) | |
# Adjust center point upward by 20% of person height to ensure face is visible | |
adjusted_center_y = int(cutout_center_y - (person_height * 0.2)) | |
if target_width_9_16 <= cutout_width: | |
# Center horizontally around person | |
x_start = int( | |
max( | |
0, | |
min( | |
cutout_width - target_width_9_16, | |
cutout_center_x - target_width_9_16 // 2, | |
), | |
) | |
) | |
x_end = int(min(cutout_width, x_start + target_width_9_16)) | |
# Use adjusted center point for vertical positioning | |
y_start = int( | |
max( | |
0, | |
min( | |
cutout_height - cutout_height, | |
adjusted_center_y - cutout_height // 2, | |
), | |
) | |
) | |
cutout_9_16 = cutout_image[y_start:, x_start:x_end].copy() | |
else: | |
# Handle rare case where we need to adjust height | |
new_height = int(cutout_width / aspect_9_16) | |
# Use adjusted center point for vertical positioning | |
y_start = int( | |
max(0, min(cutout_height - new_height, adjusted_center_y - new_height // 2)) | |
) | |
y_end = int(min(cutout_height, y_start + new_height)) | |
cutout_9_16 = cutout_image[y_start:y_end, :].copy() | |
# 4. Scale the center back to original image coordinates | |
original_center_x = left_boundary + cutout_center_x | |
original_center_y = cutout_center_y | |
original_person_top = person_top | |
# Store visualization data for drawing | |
visualization_data = { | |
"original_center_x": original_center_x, | |
"original_center_y": original_center_y, | |
"original_person_top": original_person_top, | |
"original_person_height": person_height, | |
"cutout_bounds": (left_boundary, right_boundary), | |
} | |
# 5. Create new layout variations - each segment is independently centered on the subject | |
# ----- Create crops for threehalfs layout ----- | |
# For 16:9 (three 5.3:9 segments, each independently centered) | |
aspect_5_3_9 = 5.3 / 9 | |
# Calculate dimensions for each segment | |
segment_height_16_9 = cutout_height # Use full height | |
segment_width_16_9 = int(segment_height_16_9 * aspect_5_3_9) | |
# Create three segments for 16:9 threehalfs - all centered on the person | |
threehalfs_16_9_segments = [] | |
for i in range(3): | |
# Each segment is centered on the person | |
segment_x_start = int( | |
max( | |
0, | |
min( | |
cutout_width - segment_width_16_9, | |
cutout_center_x - segment_width_16_9 // 2, | |
), | |
) | |
) | |
segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9)) | |
# Create the segment | |
segment = cutout_image[:, segment_x_start:segment_x_end].copy() | |
# Add a label for visualization | |
label = f"Part {i+1}" | |
cv2.putText( | |
segment, | |
label, | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
threehalfs_16_9_segments.append(segment) | |
# For 9:16 (three 9:5.3 segments, each independently centered) | |
aspect_9_5_3 = 9 / 5.3 | |
# Calculate dimensions for each segment | |
segment_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) | |
# Get adjusted center for 9:16 segments (move up by 20% of person height) | |
cutout_9_16_center_y = cutout_9_16.shape[0] // 2 | |
adjusted_9_16_center_y = int(cutout_9_16_center_y - (person_height * 0.2)) | |
cutout_9_16_height = cutout_9_16.shape[0] | |
# Create three segments for 9:16 threehalfs - all centered on the person | |
threehalfs_9_16_segments = [] | |
for i in range(3): | |
# Each segment is centered on the person with adjusted center point | |
segment_y_start = int( | |
max( | |
0, | |
min( | |
cutout_9_16_height - segment_height_9_16, | |
person_top, | |
), | |
) | |
) | |
segment_y_end = int( | |
min(cutout_9_16_height, segment_y_start + segment_height_9_16) | |
) | |
# Create the segment | |
segment = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
# Add a label for visualization | |
label = f"Part {i+1}" | |
cv2.putText( | |
segment, | |
label, | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
threehalfs_9_16_segments.append(segment) | |
# ----- Create crops for twothirdhalfs layout ----- | |
# For 16:9 (two segments: 10.6:9 and 5.3:9 OR 5.3:9 and 10.6:9) | |
aspect_10_6_9 = 10.6 / 9 | |
# Calculate dimensions for segments | |
segment1_height_16_9 = cutout_height # Use full height | |
segment1_width_16_9 = int(segment1_height_16_9 * aspect_10_6_9) | |
segment2_height_16_9 = cutout_height # Use full height | |
segment2_width_16_9 = int(segment2_height_16_9 * aspect_5_3_9) | |
# Create segments for 16:9 twothirdhalfs var1 (10.6:9 then 5.3:9) | |
# Both segments independently centered on the person | |
# First segment (10.6:9) | |
segment_x_start = int( | |
max( | |
0, | |
min( | |
cutout_width - segment1_width_16_9, | |
cutout_center_x - segment1_width_16_9 // 2, | |
), | |
) | |
) | |
segment_x_end = int(min(cutout_width, segment_x_start + segment1_width_16_9)) | |
segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
# Add label | |
cv2.putText( | |
segment1, | |
"10.6:9", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
# Second segment (5.3:9) | |
segment_x_start = int( | |
max( | |
0, | |
min( | |
cutout_width - segment2_width_16_9, | |
cutout_center_x - segment2_width_16_9 // 2, | |
), | |
) | |
) | |
segment_x_end = int(min(cutout_width, segment_x_start + segment2_width_16_9)) | |
segment2 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
# Add label | |
cv2.putText( | |
segment2, | |
"5.3:9", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
twothirdhalfs_16_9_var1_segments = [segment1, segment2] | |
# Create segments for 16:9 twothirdhalfs var2 (5.3:9 then 10.6:9) | |
# First segment (5.3:9) - reuse segment2 from var1 | |
# Second segment (10.6:9) - reuse segment1 from var1 | |
twothirdhalfs_16_9_var2_segments = [segment2.copy(), segment1.copy()] | |
# For 9:16 (two segments stacked: 9:10.6 and 9:5.3 OR 9:5.3 and 9:10.6) | |
aspect_9_10_6 = 9 / 10.6 | |
aspect_9_5_3 = 9 / 5.3 | |
# Calculate dimensions for segments | |
segment1_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
segment1_height_9_16 = int(segment1_width_9_16 / aspect_9_10_6) | |
segment2_width_9_16 = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
segment2_height_9_16 = int(segment2_width_9_16 / aspect_9_5_3) | |
# Create segments for 9:16 twothirdhalfs var1 (9:10.6 then 9:5.3) | |
# Both segments independently centered on the person with adjusted center point | |
# First segment (9:10.6) | |
segment_y_start = int( | |
max( | |
0, | |
min( | |
cutout_9_16_height - segment1_height_9_16, | |
adjusted_9_16_center_y - segment1_height_9_16 // 2, | |
), | |
) | |
) | |
segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment1_height_9_16)) | |
segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
# Add label | |
cv2.putText( | |
segment1, | |
"9:10.6", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
# Second segment (9:5.3) | |
segment_y_start = int( | |
max( | |
0, | |
min( | |
cutout_9_16_height - segment2_height_9_16, | |
person_top, | |
), | |
) | |
) | |
segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment2_height_9_16)) | |
segment2 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
# Add label | |
cv2.putText( | |
segment2, | |
"9:5.3", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
twothirdhalfs_9_16_var1_segments = [segment1, segment2] | |
# Create segments for 9:16 twothirdhalfs var2 (9:5.3 then 9:10.6) | |
# First segment (9:5.3) - reuse segment2 from var1 | |
# Second segment (9:10.6) - reuse segment1 from var1 | |
twothirdhalfs_9_16_var2_segments = [segment2.copy(), segment1.copy()] | |
# ----- Create crops for twoequalhalfs layout ----- | |
# For 16:9 (two 8:9 segments side by side) | |
aspect_8_9 = 8 / 9 | |
# Calculate dimensions for segments | |
segment_height_16_9_equal = cutout_height # Use full height | |
segment_width_16_9_equal = int(segment_height_16_9_equal * aspect_8_9) | |
# Create segments for 16:9 twoequalhalfs - both centered on the person | |
# First segment (8:9) | |
segment_x_start = int( | |
max( | |
0, | |
min( | |
cutout_width - segment_width_16_9_equal, | |
cutout_center_x - segment_width_16_9_equal // 2, | |
), | |
) | |
) | |
segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9_equal)) | |
segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() | |
# Add label | |
cv2.putText( | |
segment1, | |
"8:9 (1)", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
# Second segment (identical to first for equal halfs) | |
segment2 = segment1.copy() | |
# Update label for segment 2 | |
cv2.putText( | |
segment2, | |
"8:9 (2)", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
twoequalhalfs_16_9_segments = [segment1, segment2] | |
# For 9:16 (two 9:8 segments stacked) | |
aspect_9_8 = 9 / 8 | |
# Calculate dimensions for segments | |
segment_width_9_16_equal = cutout_9_16.shape[1] # Use full width of 9:16 crop | |
segment_height_9_16_equal = int(segment_width_9_16_equal / aspect_9_8) | |
# Create segments for 9:16 twoequalhalfs - both centered on the person with adjusted center point | |
# First segment (9:8) | |
segment_y_start = int( | |
max( | |
0, | |
min( | |
cutout_9_16_height - segment_height_9_16_equal, | |
max(0, person_top - person_height * 0.05), | |
), | |
) | |
) | |
segment_y_end = int( | |
min(cutout_9_16_height, segment_y_start + segment_height_9_16_equal) | |
) | |
segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() | |
# Add label | |
cv2.putText( | |
segment1, | |
"9:8 (1)", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
# Second segment (identical to first for equal halfs) | |
segment2 = segment1.copy() | |
# Update label for segment 2 | |
cv2.putText( | |
segment2, | |
"9:8 (2)", | |
(10, 30), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.8, | |
(255, 255, 255), | |
2, | |
cv2.LINE_AA, | |
) | |
twoequalhalfs_9_16_segments = [segment1, segment2] | |
# 6. Create composite layouts by joining segments | |
# Function to create a composite image | |
def create_composite(segments, horizontal=True): | |
if not segments: | |
return None | |
if horizontal: | |
# Calculate the total width and max height | |
total_width = sum(segment.shape[1] for segment in segments) | |
max_height = max(segment.shape[0] for segment in segments) | |
# Create a canvas | |
composite = np.zeros((max_height, total_width, 3), dtype=np.uint8) | |
# Place segments side by side | |
x_offset = 0 | |
for segment in segments: | |
h, w = segment.shape[:2] | |
composite[:h, x_offset : x_offset + w] = segment | |
x_offset += w | |
else: # vertical stacking | |
# Calculate the max width and total height | |
max_width = max(segment.shape[1] for segment in segments) | |
total_height = sum(segment.shape[0] for segment in segments) | |
# Create a canvas | |
composite = np.zeros((total_height, max_width, 3), dtype=np.uint8) | |
# Place segments top to bottom | |
y_offset = 0 | |
for segment in segments: | |
h, w = segment.shape[:2] | |
composite[y_offset : y_offset + h, :w] = segment | |
y_offset += h | |
return composite | |
# Create composite layouts | |
threehalfs_16_9_composite = create_composite( | |
threehalfs_16_9_segments, horizontal=True | |
) | |
threehalfs_9_16_composite = create_composite( | |
threehalfs_9_16_segments, horizontal=False | |
) | |
twothirdhalfs_16_9_var1_composite = create_composite( | |
twothirdhalfs_16_9_var1_segments, horizontal=True | |
) | |
twothirdhalfs_16_9_var2_composite = create_composite( | |
twothirdhalfs_16_9_var2_segments, horizontal=True | |
) | |
twothirdhalfs_9_16_var1_composite = create_composite( | |
twothirdhalfs_9_16_var1_segments, horizontal=False | |
) | |
twothirdhalfs_9_16_var2_composite = create_composite( | |
twothirdhalfs_9_16_var2_segments, horizontal=False | |
) | |
twoequalhalfs_16_9_composite = create_composite( | |
twoequalhalfs_16_9_segments, horizontal=True | |
) | |
twoequalhalfs_9_16_composite = create_composite( | |
twoequalhalfs_9_16_segments, horizontal=False | |
) | |
# Add labels to all composites | |
def add_label(img, label): | |
if img is None: | |
return None | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
label_settings = { | |
"fontScale": 1.0, | |
"fontFace": font, | |
"thickness": 2, | |
} | |
# Draw background for text | |
text_size = cv2.getTextSize( | |
label, | |
fontFace=label_settings["fontFace"], | |
fontScale=label_settings["fontScale"], | |
thickness=label_settings["thickness"], | |
) | |
cv2.rectangle( | |
img, | |
(10, 10), | |
(10 + text_size[0][0] + 10, 10 + text_size[0][1] + 10), | |
(0, 0, 0), | |
-1, | |
) # Black background | |
# Draw text | |
cv2.putText( | |
img, | |
label, | |
(15, 15 + text_size[0][1]), | |
fontFace=label_settings["fontFace"], | |
fontScale=label_settings["fontScale"], | |
thickness=label_settings["thickness"], | |
color=(255, 255, 255), | |
lineType=cv2.LINE_AA, | |
) | |
return img | |
# Label the basic crops | |
cutout_image_labeled = add_label(cutout_image.copy(), "Cutout") | |
cutout_16_9_labeled = add_label(cutout_16_9.copy(), "16:9") | |
cutout_9_16_labeled = add_label(cutout_9_16.copy(), "9:16") | |
# Label the composite layouts | |
threehalfs_16_9_labeled = add_label(threehalfs_16_9_composite, "Three Halfs 16:9") | |
threehalfs_9_16_labeled = add_label(threehalfs_9_16_composite, "Three Halfs 9:16") | |
twothirdhalfs_16_9_var1_labeled = add_label( | |
twothirdhalfs_16_9_var1_composite, "Two Thirds Var1 16:9" | |
) | |
twothirdhalfs_16_9_var2_labeled = add_label( | |
twothirdhalfs_16_9_var2_composite, "Two Thirds Var2 16:9" | |
) | |
twothirdhalfs_9_16_var1_labeled = add_label( | |
twothirdhalfs_9_16_var1_composite, "Two Thirds Var1 9:16" | |
) | |
twothirdhalfs_9_16_var2_labeled = add_label( | |
twothirdhalfs_9_16_var2_composite, "Two Thirds Var2 9:16" | |
) | |
twoequalhalfs_16_9_labeled = add_label( | |
twoequalhalfs_16_9_composite, "Two Equal Halfs 16:9" | |
) | |
twoequalhalfs_9_16_labeled = add_label( | |
twoequalhalfs_9_16_composite, "Two Equal Halfs 9:16" | |
) | |
# Convert all output images to PIL format | |
def cv2_to_pil(img): | |
if img is None: | |
return None | |
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
# Convert standard crops | |
standard_crops = { | |
"cutout": cv2_to_pil(cutout_image_labeled), | |
"16:9": cv2_to_pil(cutout_16_9_labeled), | |
"9:16": cv2_to_pil(cutout_9_16_labeled), | |
} | |
# Convert threehalfs layouts | |
threehalfs_layouts = { | |
"16:9": cv2_to_pil(threehalfs_16_9_labeled), | |
"9:16": cv2_to_pil(threehalfs_9_16_labeled), | |
} | |
# Convert twothirdhalfs layouts | |
twothirdhalfs_layouts = { | |
"16:9_var1": cv2_to_pil(twothirdhalfs_16_9_var1_labeled), | |
"16:9_var2": cv2_to_pil(twothirdhalfs_16_9_var2_labeled), | |
"9:16_var1": cv2_to_pil(twothirdhalfs_9_16_var1_labeled), | |
"9:16_var2": cv2_to_pil(twothirdhalfs_9_16_var2_labeled), | |
} | |
# Convert twoequalhalfs layouts | |
twoequalhalfs_layouts = { | |
"16:9": cv2_to_pil(twoequalhalfs_16_9_labeled), | |
"9:16": cv2_to_pil(twoequalhalfs_9_16_labeled), | |
} | |
return ( | |
standard_crops, | |
threehalfs_layouts, | |
twothirdhalfs_layouts, | |
twoequalhalfs_layouts, | |
visualization_data, | |
) | |
def draw_layout_regions( | |
image, left_division, right_division, visualization_data, layout_type | |
): | |
""" | |
Create a visualization showing the layout regions overlaid on the original image. | |
Each region is independently centered on the subject, as in practice different videos | |
would be stacked in these layouts. | |
Args: | |
image: PIL Image | |
left_division: Left division index (1-20) | |
right_division: Right division index (1-20) | |
visualization_data: Dictionary with visualization data from create_layouts | |
layout_type: Type of layout to visualize (e.g., "standard", "threehalfs", "twothirdhalfs_var1", etc.) | |
Returns: | |
PIL Image: Original image with layout regions visualized | |
""" | |
# Convert PIL Image to cv2 format | |
if isinstance(image, Image.Image): | |
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
else: | |
image_cv = image.copy() | |
# Get a clean copy for drawing | |
visualization = image_cv.copy() | |
# Get image dimensions | |
height, width = image_cv.shape[:2] | |
# Extract visualization data | |
original_center_x = visualization_data["original_center_x"] | |
original_center_y = visualization_data["original_center_y"] | |
original_person_top = visualization_data["original_person_top"] | |
original_person_height = visualization_data["original_person_height"] | |
left_boundary, right_boundary = visualization_data["cutout_bounds"] | |
cutout_width = right_boundary - left_boundary | |
# Define colors for different layouts (BGR format) | |
colors = { | |
"standard": {"16:9": (0, 255, 0), "9:16": (255, 0, 0)}, # Green, Blue | |
"threehalfs": {"16:9": (0, 165, 255), "9:16": (255, 255, 0)}, # Orange, Cyan | |
"twothirdhalfs_var1": { | |
"16:9": (255, 0, 255), | |
"9:16": (128, 0, 128), | |
}, # Magenta, Purple | |
"twothirdhalfs_var2": { | |
"16:9": (0, 255, 255), | |
"9:16": (128, 128, 0), | |
}, # Yellow, Teal | |
"twoequalhalfs": { | |
"16:9": (0, 128, 128), | |
"9:16": (255, 165, 0), | |
}, # Dark Cyan, Blue-Green | |
} | |
# Define line thickness and font | |
thickness = 3 | |
font = cv2.FONT_HERSHEY_SIMPLEX | |
font_scale = 0.8 | |
font_thickness = 2 | |
# Draw standard layouts (16:9 and 9:16) | |
if layout_type == "standard": | |
# Draw 16:9 crop | |
aspect_16_9 = 16 / 9 | |
target_height_16_9 = int(cutout_width / aspect_16_9) | |
# Calculate 20% of person height for top margin | |
top_margin = int(original_person_height * 0.05) | |
y_start = int(max(0, original_person_top - top_margin)) | |
if y_start + target_height_16_9 > height: | |
y_start = int(max(0, height - target_height_16_9)) | |
y_end = int(min(height, y_start + target_height_16_9)) | |
cv2.rectangle( | |
visualization, | |
(left_boundary, y_start), | |
(right_boundary, y_end), | |
colors["standard"]["16:9"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
"16:9", | |
(left_boundary + 5, y_start + 30), | |
font, | |
font_scale, | |
colors["standard"]["16:9"], | |
font_thickness, | |
) | |
# Draw 9:16 crop | |
aspect_9_16 = 9 / 16 | |
target_width_9_16 = int(height * aspect_9_16) | |
x_start = max( | |
0, | |
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
) | |
x_end = x_start + target_width_9_16 | |
cv2.rectangle( | |
visualization, | |
(x_start, 0), | |
(x_end, height), | |
colors["standard"]["9:16"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
"9:16", | |
(x_start + 5, 30), | |
font, | |
font_scale, | |
colors["standard"]["9:16"], | |
font_thickness, | |
) | |
# Draw threehalfs layouts - each segment is centered on the subject | |
elif layout_type == "threehalfs": | |
# For 16:9 (three 5.3:9 segments side by side - visually only) | |
aspect_5_3_9 = 5.3 / 9 | |
segment_height = height | |
segment_width = int(segment_height * aspect_5_3_9) | |
# Calculate total width for visualization purposes | |
total_width = segment_width * 3 | |
start_x = max(0, original_center_x - total_width // 2) | |
for i in range(3): | |
# For visualization, we'll place them side by side | |
vis_segment_x_start = start_x + i * segment_width | |
vis_segment_x_end = vis_segment_x_start + segment_width | |
# But each segment would actually be centered on the subject independently | |
# Here we also draw the centered version more faintly | |
actual_segment_x_start = max( | |
0, min(width - segment_width, original_center_x - segment_width // 2) | |
) | |
actual_segment_x_end = min(width, actual_segment_x_start + segment_width) | |
# Draw the visualization placement (side by side) | |
cv2.rectangle( | |
visualization, | |
(vis_segment_x_start, 0), | |
(vis_segment_x_end, segment_height), | |
colors["threehalfs"]["16:9"], | |
thickness, | |
) | |
# Draw the actual centered placement with dashed lines | |
if i > 0: # Only draw centered versions for parts 2 and 3 | |
for j in range(0, segment_height, 20): # Dashed line effect | |
if j % 40 < 20: # Skip every other segment | |
cv2.line( | |
visualization, | |
(actual_segment_x_start, j), | |
(actual_segment_x_start, min(j + 20, segment_height)), | |
colors["threehalfs"]["16:9"], | |
1, | |
) | |
cv2.line( | |
visualization, | |
(actual_segment_x_end, j), | |
(actual_segment_x_end, min(j + 20, segment_height)), | |
colors["threehalfs"]["16:9"], | |
1, | |
) | |
cv2.putText( | |
visualization, | |
f"16:9 Part {i+1}", | |
(vis_segment_x_start + 5, 30 + i * 30), | |
font, | |
font_scale, | |
colors["threehalfs"]["16:9"], | |
font_thickness, | |
) | |
# For 9:16 (three 9:5.3 segments stacked top to bottom - visually only) | |
aspect_9_16 = 9 / 16 | |
target_width_9_16 = int(height * aspect_9_16) | |
x_start = max( | |
0, | |
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
) | |
x_end = x_start + target_width_9_16 | |
aspect_9_5_3 = 9 / 5.3 | |
segment_width_9_16 = target_width_9_16 | |
segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) | |
# Calculate total height for visualization purposes | |
total_height = segment_height_9_16 * 3 | |
start_y = max(0, height // 2 - total_height // 2) | |
for i in range(3): | |
# For visualization, we'll place them stacked | |
vis_segment_y_start = start_y + i * segment_height_9_16 | |
vis_segment_y_end = min(height, vis_segment_y_start + segment_height_9_16) | |
# But each segment would actually be centered on the subject independently | |
# Here we also draw the centered version more faintly | |
actual_segment_y_start = max( | |
0, | |
min( | |
height - segment_height_9_16, | |
original_center_y - segment_height_9_16 // 2, | |
), | |
) | |
actual_segment_y_end = min( | |
height, actual_segment_y_start + segment_height_9_16 | |
) | |
# Draw the visualization placement (stacked) | |
cv2.rectangle( | |
visualization, | |
(x_start, vis_segment_y_start), | |
(x_end, vis_segment_y_end), | |
colors["threehalfs"]["9:16"], | |
thickness, | |
) | |
# Draw the actual centered placement with dashed lines | |
if i > 0: # Only draw centered versions for parts 2 and 3 | |
for j in range(x_start, x_end, 20): # Dashed line effect | |
if j % 40 < 20: # Skip every other segment | |
cv2.line( | |
visualization, | |
(j, actual_segment_y_start), | |
(min(j + 20, x_end), actual_segment_y_start), | |
colors["threehalfs"]["9:16"], | |
1, | |
) | |
cv2.line( | |
visualization, | |
(j, actual_segment_y_end), | |
(min(j + 20, x_end), actual_segment_y_end), | |
colors["threehalfs"]["9:16"], | |
1, | |
) | |
cv2.putText( | |
visualization, | |
f"9:16 Part {i+1}", | |
(x_start + 5, vis_segment_y_start + 30), | |
font, | |
font_scale, | |
colors["threehalfs"]["9:16"], | |
font_thickness, | |
) | |
# Draw twothirdhalfs layouts | |
elif layout_type == "twothirdhalfs_var1" or layout_type == "twothirdhalfs_var2": | |
aspect_key = "16:9" if layout_type.endswith("var1") else "9:16" | |
layout_color = colors[ | |
( | |
"twothirdhalfs_var1" | |
if layout_type.endswith("var1") | |
else "twothirdhalfs_var2" | |
) | |
][aspect_key] | |
if aspect_key == "16:9": | |
# For 16:9 (two segments side by side) | |
aspect_10_6_9 = 10.6 / 9 | |
aspect_5_3_9 = 5.3 / 9 | |
segment1_height = height | |
segment1_width = int( | |
segment1_height | |
* (aspect_10_6_9 if layout_type.endswith("var1") else aspect_5_3_9) | |
) | |
segment2_height = height | |
segment2_width = int( | |
segment2_height | |
* (aspect_5_3_9 if layout_type.endswith("var1") else aspect_10_6_9) | |
) | |
# First segment | |
segment_center_x = original_center_x - segment2_width // 4 | |
segment_x_start = int( | |
max( | |
0, | |
min(width - segment1_width, segment_center_x - segment1_width // 2), | |
) | |
) | |
segment_x_end = int(min(width, segment_x_start + segment1_width)) | |
cv2.rectangle( | |
visualization, | |
(segment_x_start, 0), | |
(segment_x_end, segment1_height), | |
layout_color, | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"16:9 Part 1", | |
(segment_x_start + 5, 30), | |
font, | |
font_scale, | |
layout_color, | |
font_thickness, | |
) | |
# Second segment | |
segment_center_x = original_center_x + segment1_width // 4 | |
segment_x_start = int( | |
max( | |
0, | |
min(width - segment2_width, segment_center_x - segment2_width // 2), | |
) | |
) | |
segment_x_end = int(min(width, segment_x_start + segment2_width)) | |
cv2.rectangle( | |
visualization, | |
(segment_x_start, 0), | |
(segment_x_end, segment2_height), | |
layout_color, | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"16:9 Part 2", | |
(segment_x_start + 5, 60), | |
font, | |
font_scale, | |
layout_color, | |
font_thickness, | |
) | |
else: # aspect_key == "9:16" | |
# For 9:16 (two segments stacked) | |
aspect_9_16 = 9 / 16 | |
target_width_9_16 = int(height * aspect_9_16) | |
x_start = max( | |
0, | |
min( | |
width - target_width_9_16, | |
original_center_x - target_width_9_16 // 2, | |
), | |
) | |
x_end = x_start + target_width_9_16 | |
aspect_9_10_6 = 9 / 10.6 | |
aspect_9_5_3 = 9 / 5.3 | |
segment1_width = target_width_9_16 | |
segment1_height = int( | |
segment1_width | |
/ (aspect_9_10_6 if layout_type.endswith("var1") else aspect_9_5_3) | |
) | |
segment2_width = target_width_9_16 | |
segment2_height = int( | |
segment2_width | |
/ (aspect_9_5_3 if layout_type.endswith("var1") else aspect_9_10_6) | |
) | |
# First segment (top) | |
segment_y_start = 0 | |
segment_y_end = min(height, segment_y_start + segment1_height) | |
cv2.rectangle( | |
visualization, | |
(x_start, segment_y_start), | |
(x_end, segment_y_end), | |
layout_color, | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"9:16 Part 1", | |
(x_start + 5, segment_y_start + 30), | |
font, | |
font_scale, | |
layout_color, | |
font_thickness, | |
) | |
# Second segment (bottom) | |
segment_y_start = segment_y_end | |
segment_y_end = min(height, segment_y_start + segment2_height) | |
cv2.rectangle( | |
visualization, | |
(x_start, segment_y_start), | |
(x_end, segment_y_end), | |
layout_color, | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"9:16 Part 2", | |
(x_start + 5, segment_y_start + 30), | |
font, | |
font_scale, | |
layout_color, | |
font_thickness, | |
) | |
# Draw twoequalhalfs layouts | |
elif layout_type == "twoequalhalfs": | |
# For 16:9 (two 8:9 segments side by side) | |
aspect_8_9 = 8 / 9 | |
segment_height = height | |
segment_width = int(segment_height * aspect_8_9) | |
# First segment (left) | |
segment_center_x = original_center_x - segment_width // 2 | |
segment_x_start = int( | |
max(0, min(width - segment_width, segment_center_x - segment_width // 2)) | |
) | |
segment_x_end = int(min(width, segment_x_start + segment_width)) | |
cv2.rectangle( | |
visualization, | |
(segment_x_start, 0), | |
(segment_x_end, segment_height), | |
colors["twoequalhalfs"]["16:9"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"16:9 Equal 1", | |
(segment_x_start + 5, 30), | |
font, | |
font_scale, | |
colors["twoequalhalfs"]["16:9"], | |
font_thickness, | |
) | |
# Second segment (right) | |
segment_center_x = original_center_x + segment_width // 2 | |
segment_x_start = int( | |
max(0, min(width - segment_width, segment_center_x - segment_width // 2)) | |
) | |
segment_x_end = int(min(width, segment_x_start + segment_width)) | |
cv2.rectangle( | |
visualization, | |
(segment_x_start, 0), | |
(segment_x_end, segment_height), | |
colors["twoequalhalfs"]["16:9"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"16:9 Equal 2", | |
(segment_x_start + 5, 60), | |
font, | |
font_scale, | |
colors["twoequalhalfs"]["16:9"], | |
font_thickness, | |
) | |
# For 9:16 (two 9:8 segments stacked) | |
aspect_9_16 = 9 / 16 | |
target_width_9_16 = int(height * aspect_9_16) | |
x_start = max( | |
0, | |
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), | |
) | |
x_end = x_start + target_width_9_16 | |
aspect_9_8 = 9 / 8 | |
segment_width_9_16 = target_width_9_16 | |
segment_height_9_16 = int(segment_width_9_16 / aspect_9_8) | |
# First segment (top) | |
segment_y_start = 0 | |
segment_y_end = min(height, segment_y_start + segment_height_9_16) | |
cv2.rectangle( | |
visualization, | |
(x_start, segment_y_start), | |
(x_end, segment_y_end), | |
colors["twoequalhalfs"]["9:16"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"9:16 Equal 1", | |
(x_start + 5, segment_y_start + 30), | |
font, | |
font_scale, | |
colors["twoequalhalfs"]["9:16"], | |
font_thickness, | |
) | |
# Second segment (bottom) | |
segment_y_start = segment_y_end | |
segment_y_end = min(height, segment_y_start + segment_height_9_16) | |
cv2.rectangle( | |
visualization, | |
(x_start, segment_y_start), | |
(x_end, segment_y_end), | |
colors["twoequalhalfs"]["9:16"], | |
thickness, | |
) | |
cv2.putText( | |
visualization, | |
f"9:16 Equal 2", | |
(x_start + 5, segment_y_start + 30), | |
font, | |
font_scale, | |
colors["twoequalhalfs"]["9:16"], | |
font_thickness, | |
) | |
# Draw center point of person(s) | |
center_radius = 8 | |
cv2.circle( | |
visualization, | |
(original_center_x, original_center_y), | |
center_radius, | |
(255, 255, 255), | |
-1, | |
) | |
cv2.circle( | |
visualization, | |
(original_center_x, original_center_y), | |
center_radius, | |
(0, 0, 0), | |
2, | |
) | |
# Convert back to PIL format | |
visualization_pil = Image.fromarray(cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB)) | |
return visualization_pil | |
def get_image_crop(cid=None, rsid=None, uid=None, ct=None): | |
""" | |
Function that returns both standard and layout variations for visualization. | |
Returns: | |
gr.Gallery: Gallery of all generated images | |
""" | |
try: | |
sprites_data = get_sprite_firebase(cid, rsid, uid) | |
image_paths = [sprite_data["url"] for sprite_data in sprites_data] | |
durations = [sprite_data["duration"] for sprite_data in sprites_data] | |
except Exception: | |
image_paths = [ | |
# "data/C2-Roll3D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
# "data/E2-HamzaA-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
"data/F2-Roll4D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
"data/G2-Roll5D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", | |
"data/C1-Roll10D-i1x-Take2-Mar20.25-PST12.14.56pm.jpg", | |
"data/C2-Roll10D-i2x-Take2-Mar20.25-PST12.14.56pm.jpg", | |
] | |
# Lists to store all images | |
all_images = [] | |
all_captions = [] | |
for image_path in image_paths: | |
# Load image (from local file or URL) | |
try: | |
if image_path.startswith(("http://", "https://")): | |
response = requests.get(image_path) | |
input_image = Image.open(BytesIO(response.content)) | |
else: | |
input_image = Image.open(image_path) | |
except Exception as e: | |
print(f"Error loading image {image_path}: {e}") | |
continue | |
# Get the middle thumbnail | |
mid_image = get_middle_thumbnail(input_image) | |
# Add numbered divisions for GPT-4V analysis | |
numbered_mid_image = add_top_numbers( | |
input_image=mid_image, | |
num_divisions=20, | |
margin=50, | |
font_size=30, | |
dot_spacing=20, | |
) | |
# Analyze the image to get optimal crop divisions | |
# This uses GPT-4V to identify the optimal crop points | |
(left_division, right_division, num_of_speakers) = analyze_image( | |
numbered_mid_image, remove_unwanted_prompt(1), mid_image, ct | |
) | |
# Safety check for divisions | |
if left_division <= 0: | |
left_division = 1 | |
if right_division > 20: | |
right_division = 20 | |
if left_division >= right_division: | |
left_division = 1 | |
right_division = 20 | |
print(f"Using divisions: left={left_division}, right={right_division}") | |
# Create layouts and cutouts using the new function | |
( | |
standard_crops, | |
threehalfs_layouts, | |
twothirdhalfs_layouts, | |
twoequalhalfs_layouts, | |
visualization_data, | |
) = create_layouts(mid_image, left_division, right_division, num_of_speakers) | |
# Create all the required visualizations | |
# 1. Standard aspect ratio visualization (16:9 and 9:16) | |
standard_visualization = draw_layout_regions( | |
mid_image, left_division, right_division, visualization_data, "standard" | |
) | |
all_images.append(standard_visualization) | |
all_captions.append( | |
f"Standard Aspect Ratios (16:9 & 9:16) {standard_visualization.size}" | |
) | |
# Add input and middle image to gallery | |
all_images.append(input_image) | |
all_captions.append(f"Input Image {input_image.size}") | |
all_images.append(mid_image) | |
all_captions.append(f"Middle Thumbnail {mid_image.size}") | |
# Add standard crops | |
for key, crop in standard_crops.items(): | |
all_images.append(crop) | |
all_captions.append(f"{key} {crop.size}") | |
# Add threehalfs layouts | |
for key, layout in threehalfs_layouts.items(): | |
all_images.append(layout) | |
all_captions.append(f"Three Halfs {key} {layout.size}") | |
# Add twothirdhalfs layouts | |
for key, layout in twothirdhalfs_layouts.items(): | |
all_images.append(layout) | |
all_captions.append(f"Two-Thirds Halfs {key} {layout.size}") | |
# Add twoequalhalfs layouts | |
for key, layout in twoequalhalfs_layouts.items(): | |
all_images.append(layout) | |
all_captions.append(f"Two Equal Halfs {key} {layout.size}") | |
# Return gallery with all images | |
return gr.Gallery(value=list(zip(all_images, all_captions))) | |