|
import base64 |
|
import os |
|
from io import BytesIO |
|
|
|
import cv2 |
|
import gradio as gr |
|
import numpy as np |
|
import pyrebase |
|
import requests |
|
from openai import OpenAI |
|
from PIL import Image, ImageDraw, ImageFont |
|
from ultralytics import YOLO |
|
|
|
from prompts import remove_unwanted_prompt |
|
|
|
model = YOLO("yolo11n.pt") |
|
|
|
|
|
def get_middle_thumbnail(input_image: Image, grid_size=(10, 10), padding=3): |
|
""" |
|
Extract the middle thumbnail from a sprite sheet, handling different aspect ratios |
|
and removing padding. |
|
|
|
Args: |
|
input_image: PIL Image |
|
grid_size: Tuple of (columns, rows) |
|
padding: Number of padding pixels on each side (default 3) |
|
|
|
Returns: |
|
PIL.Image: The middle thumbnail image with padding removed |
|
""" |
|
sprite_sheet = input_image |
|
|
|
|
|
sprite_width, sprite_height = sprite_sheet.size |
|
thumb_width_with_padding = sprite_width // grid_size[0] |
|
thumb_height_with_padding = sprite_height // grid_size[1] |
|
|
|
|
|
thumb_width = thumb_width_with_padding - (2 * padding) |
|
thumb_height = thumb_height_with_padding - (2 * padding) |
|
|
|
|
|
total_thumbs = grid_size[0] * grid_size[1] |
|
middle_index = total_thumbs // 2 |
|
|
|
|
|
middle_row = middle_index // grid_size[0] |
|
middle_col = middle_index % grid_size[0] |
|
|
|
|
|
left = (middle_col * thumb_width_with_padding) + padding |
|
top = (middle_row * thumb_height_with_padding) + padding |
|
right = left + thumb_width |
|
bottom = top + thumb_height |
|
|
|
|
|
middle_thumb = sprite_sheet.crop((left, top, right, bottom)) |
|
return middle_thumb |
|
|
|
|
|
def encode_image_to_base64(image: Image.Image, format: str = "JPEG") -> str: |
|
""" |
|
Convert a PIL image to a base64 string. |
|
|
|
Args: |
|
image: PIL Image object |
|
format: Image format to use for encoding (default: PNG) |
|
|
|
Returns: |
|
Base64 encoded string of the image |
|
""" |
|
buffered = BytesIO() |
|
image.save(buffered, format=format) |
|
return base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
|
|
|
|
def add_top_numbers( |
|
input_image, |
|
num_divisions=20, |
|
margin=90, |
|
font_size=70, |
|
dot_spacing=20, |
|
): |
|
""" |
|
Add numbered divisions across the top and bottom of any image with dotted vertical lines. |
|
|
|
Args: |
|
input_image (Image): PIL Image |
|
num_divisions (int): Number of divisions to create |
|
margin (int): Size of margin in pixels for numbers |
|
font_size (int): Font size for numbers |
|
dot_spacing (int): Spacing between dots in pixels |
|
""" |
|
|
|
original_image = input_image |
|
|
|
|
|
new_width = original_image.width |
|
new_height = original_image.height + ( |
|
2 * margin |
|
) |
|
new_image = Image.new("RGB", (new_width, new_height), "white") |
|
|
|
|
|
new_image.paste(original_image, (0, margin)) |
|
|
|
|
|
draw = ImageDraw.Draw(new_image) |
|
|
|
try: |
|
font = ImageFont.truetype("arial.ttf", font_size) |
|
except OSError: |
|
print("Using default font") |
|
font = ImageFont.load_default(size=font_size) |
|
|
|
|
|
division_width = original_image.width / num_divisions |
|
|
|
|
|
for i in range(num_divisions): |
|
x = (i * division_width) + (division_width / 2) |
|
|
|
|
|
draw.text((x, margin // 2), str(i + 1), fill="black", font=font, anchor="mm") |
|
|
|
|
|
draw.text( |
|
(x, new_height - (margin // 2)), |
|
str(i + 1), |
|
fill="black", |
|
font=font, |
|
anchor="mm", |
|
) |
|
|
|
|
|
y_start = margin |
|
y_end = new_height - margin |
|
|
|
|
|
current_y = y_start |
|
while current_y < y_end: |
|
draw.circle( |
|
[x - 1, current_y - 1, x + 1, current_y + 1], |
|
fill="black", |
|
width=5, |
|
radius=3, |
|
) |
|
current_y += dot_spacing |
|
|
|
return new_image |
|
|
|
|
|
def analyze_image(numbered_input_image: Image, prompt, input_image, ct): |
|
""" |
|
Perform inference on an image using GPT-4V. |
|
|
|
Args: |
|
numbered_input_image (Image): PIL Image |
|
prompt (str): The prompt/question about the image |
|
input_image (Image): input image without numbers |
|
|
|
Returns: |
|
str: The model's response |
|
""" |
|
client = OpenAI() |
|
base64_image = encode_image_to_base64(numbered_input_image, format="JPEG") |
|
|
|
messages = [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": prompt}, |
|
{ |
|
"type": "image_url", |
|
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, |
|
}, |
|
], |
|
} |
|
] |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4o", messages=messages, max_tokens=300 |
|
) |
|
|
|
messages.extend( |
|
[ |
|
{"role": "assistant", "content": response.choices[0].message.content}, |
|
{ |
|
"role": "user", |
|
"content": "please return the response in the json with keys left_row, right_row, and num_of_speakers", |
|
}, |
|
], |
|
) |
|
|
|
response = ( |
|
client.chat.completions.create(model="gpt-4o", messages=messages) |
|
.choices[0] |
|
.message.content |
|
) |
|
|
|
left_index = response.find("{") |
|
right_index = response.rfind("}") |
|
|
|
try: |
|
if left_index != -1 and right_index != -1: |
|
print(response[left_index : right_index + 1]) |
|
response_json = eval(response[left_index : right_index + 1]) |
|
except Exception as e: |
|
print(e) |
|
return 0, 20 |
|
|
|
return ( |
|
response_json["left_row"], |
|
response_json["right_row"], |
|
response_json["num_of_speakers"], |
|
) |
|
|
|
|
|
def get_sprite_firebase(cid, rsid, uid): |
|
config = { |
|
"apiKey": f"{os.getenv('FIREBASE_API_KEY')}", |
|
"authDomain": f"{os.getenv('FIREBASE_AUTH_DOMAIN')}", |
|
"databaseURL": f"{os.getenv('FIREBASE_DATABASE_URL')}", |
|
"projectId": f"{os.getenv('FIREBASE_PROJECT_ID')}", |
|
"storageBucket": f"{os.getenv('FIREBASE_STORAGE_BUCKET')}", |
|
"messagingSenderId": f"{os.getenv('FIREBASE_MESSAGING_SENDER_ID')}", |
|
"appId": f"{os.getenv('FIREBASE_APP_ID')}", |
|
"measurementId": f"{os.getenv('FIREBASE_MEASUREMENT_ID')}", |
|
} |
|
|
|
firebase = pyrebase.initialize_app(config) |
|
db = firebase.database() |
|
account_id = os.getenv("ROLL_ACCOUNT") |
|
|
|
COLLAB_EDIT_LINK = "collab_sprite_link_handler" |
|
|
|
path = f"{account_id}/{COLLAB_EDIT_LINK}/{uid}/{cid}/{rsid}" |
|
|
|
data = db.child(path).get() |
|
return data.val() |
|
|
|
|
|
def find_persons_center(image, num_of_speakers=1): |
|
""" |
|
Find the center point of the largest num_of_speakers persons in the image. |
|
If multiple persons are detected, merge the bounding boxes of only the largest ones. |
|
|
|
Args: |
|
image: CV2/numpy array image |
|
num_of_speakers: Number of speakers to consider (default: 1) |
|
|
|
Returns: |
|
int: x-coordinate of the center point of all considered persons |
|
""" |
|
|
|
results = model(image, classes=[0], conf=0.6) |
|
|
|
if not results or len(results[0].boxes) == 0: |
|
|
|
return image.shape[1] // 2 |
|
|
|
|
|
boxes = results[0].boxes.xyxy.cpu().numpy() |
|
|
|
|
|
print(f"Detected {len(boxes)} persons in the image") |
|
|
|
if len(boxes) == 1: |
|
|
|
x1, _, x2, _ = boxes[0] |
|
center_x = int((x1 + x2) // 2) |
|
print(f"Single person detected at center x: {center_x}") |
|
return center_x |
|
else: |
|
|
|
|
|
|
|
box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] |
|
|
|
|
|
sorted_indices = sorted( |
|
range(len(box_areas)), key=lambda i: box_areas[i], reverse=True |
|
) |
|
|
|
|
|
num_boxes_to_use = min(num_of_speakers, len(boxes)) |
|
selected_indices = sorted_indices[:num_boxes_to_use] |
|
selected_boxes = [boxes[i] for i in selected_indices] |
|
|
|
|
|
left_x = min(box[0] for box in selected_boxes) |
|
right_x = max(box[2] for box in selected_boxes) |
|
merged_center_x = int((left_x + right_x) // 2) |
|
|
|
print( |
|
f"{num_boxes_to_use} largest persons merged bounding box center x: {merged_center_x}" |
|
) |
|
print(f"Merged bounds: left={left_x}, right={right_x}") |
|
|
|
return merged_center_x |
|
|
|
|
|
def create_layouts(image, left_division, right_division, num_of_speakers): |
|
""" |
|
Create different layout variations of the image using specific aspect ratios. |
|
All layout variations will be centered on detected persons. |
|
|
|
Args: |
|
image: PIL Image |
|
left_division: Left division index (1-20) |
|
right_division: Right division index (1-20) |
|
|
|
Returns: |
|
tuple: (standard_crops, threehalfs_layouts, twothirdhalfs_layouts, twoequalhalfs_layouts, visualization_data) |
|
""" |
|
|
|
if isinstance(image, Image.Image): |
|
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) |
|
else: |
|
image_cv = image.copy() |
|
|
|
|
|
height, width = image_cv.shape[:2] |
|
|
|
|
|
division_width = width / 20 |
|
left_boundary = int((left_division - 1) * division_width) |
|
right_boundary = int(right_division * division_width) |
|
|
|
|
|
cutout_image = image_cv[:, left_boundary:right_boundary].copy() |
|
cutout_width = right_boundary - left_boundary |
|
cutout_height = cutout_image.shape[0] |
|
|
|
|
|
results = model(cutout_image, classes=[0], conf=0.6) |
|
|
|
|
|
cutout_center_x = cutout_image.shape[1] // 2 |
|
cutout_center_y = cutout_height // 2 |
|
|
|
|
|
person_top = 0.0 |
|
person_height = float(cutout_height) |
|
|
|
if results and len(results[0].boxes) > 0: |
|
|
|
boxes = results[0].boxes.xyxy.cpu().numpy() |
|
|
|
if len(boxes) == 1: |
|
|
|
x1, y1, x2, y2 = boxes[0] |
|
cutout_center_x = int((x1 + x2) // 2) |
|
cutout_center_y = int((y1 + y2) // 2) |
|
person_top = y1 |
|
person_height = y2 - y1 |
|
else: |
|
|
|
|
|
|
|
box_areas = [(box[2] - box[0]) * (box[3] - box[1]) for box in boxes] |
|
|
|
|
|
sorted_indices = sorted( |
|
range(len(box_areas)), key=lambda i: box_areas[i], reverse=True |
|
) |
|
|
|
|
|
num_boxes_to_use = min(num_of_speakers, len(boxes)) |
|
selected_indices = sorted_indices[:num_boxes_to_use] |
|
selected_boxes = [boxes[i] for i in selected_indices] |
|
|
|
|
|
left_x = min(box[0] for box in selected_boxes) |
|
right_x = max(box[2] for box in selected_boxes) |
|
top_y = min(box[1] for box in selected_boxes) |
|
bottom_y = max(box[3] for box in selected_boxes) |
|
|
|
cutout_center_x = int((left_x + right_x) // 2) |
|
cutout_center_y = int((top_y + bottom_y) // 2) |
|
person_top = top_y |
|
person_height = bottom_y - top_y |
|
|
|
|
|
aspect_16_9 = 16 / 9 |
|
aspect_9_16 = 9 / 16 |
|
|
|
|
|
target_height_16_9 = int(cutout_width / aspect_16_9) |
|
if target_height_16_9 <= cutout_height: |
|
|
|
top_margin = int(person_height * 0.05) |
|
|
|
|
|
y_start = int(max(0, person_top - top_margin)) |
|
|
|
|
|
if y_start + target_height_16_9 > cutout_height: |
|
y_start = int(max(0, cutout_height - target_height_16_9)) |
|
|
|
y_end = int(min(cutout_height, y_start + target_height_16_9)) |
|
cutout_16_9 = cutout_image[y_start:y_end, :].copy() |
|
else: |
|
|
|
new_width = int(cutout_height * aspect_16_9) |
|
x_start = max( |
|
0, min(cutout_width - new_width, cutout_center_x - new_width // 2) |
|
) |
|
x_end = min(cutout_width, x_start + new_width) |
|
cutout_16_9 = cutout_image[:, x_start:x_end].copy() |
|
|
|
|
|
target_width_9_16 = int(cutout_height * aspect_9_16) |
|
|
|
|
|
adjusted_center_y = int(cutout_center_y - (person_height * 0.2)) |
|
|
|
if target_width_9_16 <= cutout_width: |
|
|
|
x_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_width - target_width_9_16, |
|
cutout_center_x - target_width_9_16 // 2, |
|
), |
|
) |
|
) |
|
x_end = int(min(cutout_width, x_start + target_width_9_16)) |
|
|
|
|
|
y_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_height - cutout_height, |
|
adjusted_center_y - cutout_height // 2, |
|
), |
|
) |
|
) |
|
cutout_9_16 = cutout_image[y_start:, x_start:x_end].copy() |
|
else: |
|
|
|
new_height = int(cutout_width / aspect_9_16) |
|
|
|
|
|
y_start = int( |
|
max(0, min(cutout_height - new_height, adjusted_center_y - new_height // 2)) |
|
) |
|
y_end = int(min(cutout_height, y_start + new_height)) |
|
cutout_9_16 = cutout_image[y_start:y_end, :].copy() |
|
|
|
|
|
original_center_x = left_boundary + cutout_center_x |
|
original_center_y = cutout_center_y |
|
original_person_top = person_top |
|
|
|
|
|
visualization_data = { |
|
"original_center_x": original_center_x, |
|
"original_center_y": original_center_y, |
|
"original_person_top": original_person_top, |
|
"original_person_height": person_height, |
|
"cutout_bounds": (left_boundary, right_boundary), |
|
} |
|
|
|
|
|
|
|
|
|
|
|
aspect_5_3_9 = 5.3 / 9 |
|
|
|
|
|
segment_height_16_9 = cutout_height |
|
segment_width_16_9 = int(segment_height_16_9 * aspect_5_3_9) |
|
|
|
|
|
threehalfs_16_9_segments = [] |
|
for i in range(3): |
|
|
|
segment_x_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_width - segment_width_16_9, |
|
cutout_center_x - segment_width_16_9 // 2, |
|
), |
|
) |
|
) |
|
segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9)) |
|
|
|
|
|
segment = cutout_image[:, segment_x_start:segment_x_end].copy() |
|
|
|
|
|
label = f"Part {i+1}" |
|
cv2.putText( |
|
segment, |
|
label, |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
threehalfs_16_9_segments.append(segment) |
|
|
|
|
|
aspect_9_5_3 = 9 / 5.3 |
|
|
|
|
|
segment_width_9_16 = cutout_9_16.shape[1] |
|
segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) |
|
|
|
|
|
cutout_9_16_center_y = cutout_9_16.shape[0] // 2 |
|
adjusted_9_16_center_y = int(cutout_9_16_center_y - (person_height * 0.2)) |
|
cutout_9_16_height = cutout_9_16.shape[0] |
|
|
|
|
|
threehalfs_9_16_segments = [] |
|
|
|
for i in range(3): |
|
|
|
segment_y_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_9_16_height - segment_height_9_16, |
|
person_top, |
|
), |
|
) |
|
) |
|
segment_y_end = int( |
|
min(cutout_9_16_height, segment_y_start + segment_height_9_16) |
|
) |
|
|
|
|
|
segment = cutout_9_16[segment_y_start:segment_y_end, :].copy() |
|
|
|
|
|
label = f"Part {i+1}" |
|
cv2.putText( |
|
segment, |
|
label, |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
threehalfs_9_16_segments.append(segment) |
|
|
|
|
|
|
|
aspect_10_6_9 = 10.6 / 9 |
|
|
|
|
|
segment1_height_16_9 = cutout_height |
|
segment1_width_16_9 = int(segment1_height_16_9 * aspect_10_6_9) |
|
segment2_height_16_9 = cutout_height |
|
segment2_width_16_9 = int(segment2_height_16_9 * aspect_5_3_9) |
|
|
|
|
|
|
|
|
|
|
|
segment_x_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_width - segment1_width_16_9, |
|
cutout_center_x - segment1_width_16_9 // 2, |
|
), |
|
) |
|
) |
|
segment_x_end = int(min(cutout_width, segment_x_start + segment1_width_16_9)) |
|
segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() |
|
|
|
|
|
cv2.putText( |
|
segment1, |
|
"10.6:9", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
|
|
segment_x_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_width - segment2_width_16_9, |
|
cutout_center_x - segment2_width_16_9 // 2, |
|
), |
|
) |
|
) |
|
segment_x_end = int(min(cutout_width, segment_x_start + segment2_width_16_9)) |
|
segment2 = cutout_image[:, segment_x_start:segment_x_end].copy() |
|
|
|
|
|
cv2.putText( |
|
segment2, |
|
"5.3:9", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
twothirdhalfs_16_9_var1_segments = [segment1, segment2] |
|
|
|
|
|
|
|
|
|
twothirdhalfs_16_9_var2_segments = [segment2.copy(), segment1.copy()] |
|
|
|
|
|
aspect_9_10_6 = 9 / 10.6 |
|
aspect_9_5_3 = 9 / 5.3 |
|
|
|
|
|
segment1_width_9_16 = cutout_9_16.shape[1] |
|
segment1_height_9_16 = int(segment1_width_9_16 / aspect_9_10_6) |
|
segment2_width_9_16 = cutout_9_16.shape[1] |
|
segment2_height_9_16 = int(segment2_width_9_16 / aspect_9_5_3) |
|
|
|
|
|
|
|
|
|
|
|
segment_y_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_9_16_height - segment1_height_9_16, |
|
adjusted_9_16_center_y - segment1_height_9_16 // 2, |
|
), |
|
) |
|
) |
|
segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment1_height_9_16)) |
|
segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() |
|
|
|
|
|
cv2.putText( |
|
segment1, |
|
"9:10.6", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
|
|
segment_y_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_9_16_height - segment2_height_9_16, |
|
person_top, |
|
), |
|
) |
|
) |
|
segment_y_end = int(min(cutout_9_16_height, segment_y_start + segment2_height_9_16)) |
|
segment2 = cutout_9_16[segment_y_start:segment_y_end, :].copy() |
|
|
|
|
|
cv2.putText( |
|
segment2, |
|
"9:5.3", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
twothirdhalfs_9_16_var1_segments = [segment1, segment2] |
|
|
|
|
|
|
|
|
|
twothirdhalfs_9_16_var2_segments = [segment2.copy(), segment1.copy()] |
|
|
|
|
|
|
|
aspect_8_9 = 8 / 9 |
|
|
|
|
|
segment_height_16_9_equal = cutout_height |
|
segment_width_16_9_equal = int(segment_height_16_9_equal * aspect_8_9) |
|
|
|
|
|
|
|
segment_x_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_width - segment_width_16_9_equal, |
|
cutout_center_x - segment_width_16_9_equal // 2, |
|
), |
|
) |
|
) |
|
segment_x_end = int(min(cutout_width, segment_x_start + segment_width_16_9_equal)) |
|
segment1 = cutout_image[:, segment_x_start:segment_x_end].copy() |
|
|
|
|
|
cv2.putText( |
|
segment1, |
|
"8:9 (1)", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
|
|
segment2 = segment1.copy() |
|
|
|
|
|
cv2.putText( |
|
segment2, |
|
"8:9 (2)", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
twoequalhalfs_16_9_segments = [segment1, segment2] |
|
|
|
|
|
aspect_9_8 = 9 / 8 |
|
|
|
|
|
segment_width_9_16_equal = cutout_9_16.shape[1] |
|
segment_height_9_16_equal = int(segment_width_9_16_equal / aspect_9_8) |
|
|
|
|
|
|
|
segment_y_start = int( |
|
max( |
|
0, |
|
min( |
|
cutout_9_16_height - segment_height_9_16_equal, |
|
max(0, person_top - person_height * 0.05), |
|
), |
|
) |
|
) |
|
segment_y_end = int( |
|
min(cutout_9_16_height, segment_y_start + segment_height_9_16_equal) |
|
) |
|
segment1 = cutout_9_16[segment_y_start:segment_y_end, :].copy() |
|
|
|
|
|
cv2.putText( |
|
segment1, |
|
"9:8 (1)", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
|
|
segment2 = segment1.copy() |
|
|
|
|
|
cv2.putText( |
|
segment2, |
|
"9:8 (2)", |
|
(10, 30), |
|
cv2.FONT_HERSHEY_SIMPLEX, |
|
0.8, |
|
(255, 255, 255), |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
|
|
twoequalhalfs_9_16_segments = [segment1, segment2] |
|
|
|
|
|
|
|
def create_composite(segments, horizontal=True): |
|
if not segments: |
|
return None |
|
|
|
if horizontal: |
|
|
|
total_width = sum(segment.shape[1] for segment in segments) |
|
max_height = max(segment.shape[0] for segment in segments) |
|
|
|
|
|
composite = np.zeros((max_height, total_width, 3), dtype=np.uint8) |
|
|
|
|
|
x_offset = 0 |
|
for segment in segments: |
|
h, w = segment.shape[:2] |
|
composite[:h, x_offset : x_offset + w] = segment |
|
x_offset += w |
|
|
|
else: |
|
|
|
max_width = max(segment.shape[1] for segment in segments) |
|
total_height = sum(segment.shape[0] for segment in segments) |
|
|
|
|
|
composite = np.zeros((total_height, max_width, 3), dtype=np.uint8) |
|
|
|
|
|
y_offset = 0 |
|
for segment in segments: |
|
h, w = segment.shape[:2] |
|
composite[y_offset : y_offset + h, :w] = segment |
|
y_offset += h |
|
|
|
return composite |
|
|
|
|
|
threehalfs_16_9_composite = create_composite( |
|
threehalfs_16_9_segments, horizontal=True |
|
) |
|
threehalfs_9_16_composite = create_composite( |
|
threehalfs_9_16_segments, horizontal=False |
|
) |
|
|
|
twothirdhalfs_16_9_var1_composite = create_composite( |
|
twothirdhalfs_16_9_var1_segments, horizontal=True |
|
) |
|
twothirdhalfs_16_9_var2_composite = create_composite( |
|
twothirdhalfs_16_9_var2_segments, horizontal=True |
|
) |
|
twothirdhalfs_9_16_var1_composite = create_composite( |
|
twothirdhalfs_9_16_var1_segments, horizontal=False |
|
) |
|
twothirdhalfs_9_16_var2_composite = create_composite( |
|
twothirdhalfs_9_16_var2_segments, horizontal=False |
|
) |
|
|
|
twoequalhalfs_16_9_composite = create_composite( |
|
twoequalhalfs_16_9_segments, horizontal=True |
|
) |
|
twoequalhalfs_9_16_composite = create_composite( |
|
twoequalhalfs_9_16_segments, horizontal=False |
|
) |
|
|
|
|
|
def add_label(img, label): |
|
if img is None: |
|
return None |
|
|
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
label_settings = { |
|
"fontScale": 1.0, |
|
"fontFace": font, |
|
"thickness": 2, |
|
} |
|
|
|
|
|
text_size = cv2.getTextSize( |
|
label, |
|
fontFace=label_settings["fontFace"], |
|
fontScale=label_settings["fontScale"], |
|
thickness=label_settings["thickness"], |
|
) |
|
|
|
cv2.rectangle( |
|
img, |
|
(10, 10), |
|
(10 + text_size[0][0] + 10, 10 + text_size[0][1] + 10), |
|
(0, 0, 0), |
|
-1, |
|
) |
|
|
|
|
|
cv2.putText( |
|
img, |
|
label, |
|
(15, 15 + text_size[0][1]), |
|
fontFace=label_settings["fontFace"], |
|
fontScale=label_settings["fontScale"], |
|
thickness=label_settings["thickness"], |
|
color=(255, 255, 255), |
|
lineType=cv2.LINE_AA, |
|
) |
|
return img |
|
|
|
|
|
cutout_image_labeled = add_label(cutout_image.copy(), "Cutout") |
|
cutout_16_9_labeled = add_label(cutout_16_9.copy(), "16:9") |
|
cutout_9_16_labeled = add_label(cutout_9_16.copy(), "9:16") |
|
|
|
|
|
threehalfs_16_9_labeled = add_label(threehalfs_16_9_composite, "Three Halfs 16:9") |
|
threehalfs_9_16_labeled = add_label(threehalfs_9_16_composite, "Three Halfs 9:16") |
|
|
|
twothirdhalfs_16_9_var1_labeled = add_label( |
|
twothirdhalfs_16_9_var1_composite, "Two Thirds Var1 16:9" |
|
) |
|
twothirdhalfs_16_9_var2_labeled = add_label( |
|
twothirdhalfs_16_9_var2_composite, "Two Thirds Var2 16:9" |
|
) |
|
twothirdhalfs_9_16_var1_labeled = add_label( |
|
twothirdhalfs_9_16_var1_composite, "Two Thirds Var1 9:16" |
|
) |
|
twothirdhalfs_9_16_var2_labeled = add_label( |
|
twothirdhalfs_9_16_var2_composite, "Two Thirds Var2 9:16" |
|
) |
|
|
|
twoequalhalfs_16_9_labeled = add_label( |
|
twoequalhalfs_16_9_composite, "Two Equal Halfs 16:9" |
|
) |
|
twoequalhalfs_9_16_labeled = add_label( |
|
twoequalhalfs_9_16_composite, "Two Equal Halfs 9:16" |
|
) |
|
|
|
|
|
def cv2_to_pil(img): |
|
if img is None: |
|
return None |
|
return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) |
|
|
|
|
|
standard_crops = { |
|
"cutout": cv2_to_pil(cutout_image_labeled), |
|
"16:9": cv2_to_pil(cutout_16_9_labeled), |
|
"9:16": cv2_to_pil(cutout_9_16_labeled), |
|
} |
|
|
|
|
|
threehalfs_layouts = { |
|
"16:9": cv2_to_pil(threehalfs_16_9_labeled), |
|
"9:16": cv2_to_pil(threehalfs_9_16_labeled), |
|
} |
|
|
|
|
|
twothirdhalfs_layouts = { |
|
"16:9_var1": cv2_to_pil(twothirdhalfs_16_9_var1_labeled), |
|
"16:9_var2": cv2_to_pil(twothirdhalfs_16_9_var2_labeled), |
|
"9:16_var1": cv2_to_pil(twothirdhalfs_9_16_var1_labeled), |
|
"9:16_var2": cv2_to_pil(twothirdhalfs_9_16_var2_labeled), |
|
} |
|
|
|
|
|
twoequalhalfs_layouts = { |
|
"16:9": cv2_to_pil(twoequalhalfs_16_9_labeled), |
|
"9:16": cv2_to_pil(twoequalhalfs_9_16_labeled), |
|
} |
|
|
|
return ( |
|
standard_crops, |
|
threehalfs_layouts, |
|
twothirdhalfs_layouts, |
|
twoequalhalfs_layouts, |
|
visualization_data, |
|
) |
|
|
|
|
|
def draw_layout_regions( |
|
image, left_division, right_division, visualization_data, layout_type |
|
): |
|
""" |
|
Create a visualization showing the layout regions overlaid on the original image. |
|
Each region is independently centered on the subject, as in practice different videos |
|
would be stacked in these layouts. |
|
|
|
Args: |
|
image: PIL Image |
|
left_division: Left division index (1-20) |
|
right_division: Right division index (1-20) |
|
visualization_data: Dictionary with visualization data from create_layouts |
|
layout_type: Type of layout to visualize (e.g., "standard", "threehalfs", "twothirdhalfs_var1", etc.) |
|
|
|
Returns: |
|
PIL Image: Original image with layout regions visualized |
|
""" |
|
|
|
if isinstance(image, Image.Image): |
|
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) |
|
else: |
|
image_cv = image.copy() |
|
|
|
|
|
visualization = image_cv.copy() |
|
|
|
|
|
height, width = image_cv.shape[:2] |
|
|
|
|
|
original_center_x = visualization_data["original_center_x"] |
|
original_center_y = visualization_data["original_center_y"] |
|
original_person_top = visualization_data["original_person_top"] |
|
original_person_height = visualization_data["original_person_height"] |
|
left_boundary, right_boundary = visualization_data["cutout_bounds"] |
|
cutout_width = right_boundary - left_boundary |
|
|
|
|
|
colors = { |
|
"standard": {"16:9": (0, 255, 0), "9:16": (255, 0, 0)}, |
|
"threehalfs": {"16:9": (0, 165, 255), "9:16": (255, 255, 0)}, |
|
"twothirdhalfs_var1": { |
|
"16:9": (255, 0, 255), |
|
"9:16": (128, 0, 128), |
|
}, |
|
"twothirdhalfs_var2": { |
|
"16:9": (0, 255, 255), |
|
"9:16": (128, 128, 0), |
|
}, |
|
"twoequalhalfs": { |
|
"16:9": (0, 128, 128), |
|
"9:16": (255, 165, 0), |
|
}, |
|
} |
|
|
|
|
|
thickness = 3 |
|
font = cv2.FONT_HERSHEY_SIMPLEX |
|
font_scale = 0.8 |
|
font_thickness = 2 |
|
|
|
|
|
if layout_type == "standard": |
|
|
|
aspect_16_9 = 16 / 9 |
|
target_height_16_9 = int(cutout_width / aspect_16_9) |
|
|
|
|
|
top_margin = int(original_person_height * 0.05) |
|
y_start = int(max(0, original_person_top - top_margin)) |
|
if y_start + target_height_16_9 > height: |
|
y_start = int(max(0, height - target_height_16_9)) |
|
y_end = int(min(height, y_start + target_height_16_9)) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(left_boundary, y_start), |
|
(right_boundary, y_end), |
|
colors["standard"]["16:9"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
"16:9", |
|
(left_boundary + 5, y_start + 30), |
|
font, |
|
font_scale, |
|
colors["standard"]["16:9"], |
|
font_thickness, |
|
) |
|
|
|
|
|
aspect_9_16 = 9 / 16 |
|
target_width_9_16 = int(height * aspect_9_16) |
|
|
|
x_start = max( |
|
0, |
|
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), |
|
) |
|
x_end = x_start + target_width_9_16 |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, 0), |
|
(x_end, height), |
|
colors["standard"]["9:16"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
"9:16", |
|
(x_start + 5, 30), |
|
font, |
|
font_scale, |
|
colors["standard"]["9:16"], |
|
font_thickness, |
|
) |
|
|
|
|
|
elif layout_type == "threehalfs": |
|
|
|
aspect_5_3_9 = 5.3 / 9 |
|
segment_height = height |
|
segment_width = int(segment_height * aspect_5_3_9) |
|
|
|
|
|
total_width = segment_width * 3 |
|
start_x = max(0, original_center_x - total_width // 2) |
|
|
|
for i in range(3): |
|
|
|
vis_segment_x_start = start_x + i * segment_width |
|
vis_segment_x_end = vis_segment_x_start + segment_width |
|
|
|
|
|
|
|
actual_segment_x_start = max( |
|
0, min(width - segment_width, original_center_x - segment_width // 2) |
|
) |
|
actual_segment_x_end = min(width, actual_segment_x_start + segment_width) |
|
|
|
|
|
cv2.rectangle( |
|
visualization, |
|
(vis_segment_x_start, 0), |
|
(vis_segment_x_end, segment_height), |
|
colors["threehalfs"]["16:9"], |
|
thickness, |
|
) |
|
|
|
|
|
if i > 0: |
|
for j in range(0, segment_height, 20): |
|
if j % 40 < 20: |
|
cv2.line( |
|
visualization, |
|
(actual_segment_x_start, j), |
|
(actual_segment_x_start, min(j + 20, segment_height)), |
|
colors["threehalfs"]["16:9"], |
|
1, |
|
) |
|
cv2.line( |
|
visualization, |
|
(actual_segment_x_end, j), |
|
(actual_segment_x_end, min(j + 20, segment_height)), |
|
colors["threehalfs"]["16:9"], |
|
1, |
|
) |
|
|
|
cv2.putText( |
|
visualization, |
|
f"16:9 Part {i+1}", |
|
(vis_segment_x_start + 5, 30 + i * 30), |
|
font, |
|
font_scale, |
|
colors["threehalfs"]["16:9"], |
|
font_thickness, |
|
) |
|
|
|
|
|
aspect_9_16 = 9 / 16 |
|
target_width_9_16 = int(height * aspect_9_16) |
|
x_start = max( |
|
0, |
|
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), |
|
) |
|
x_end = x_start + target_width_9_16 |
|
|
|
aspect_9_5_3 = 9 / 5.3 |
|
segment_width_9_16 = target_width_9_16 |
|
segment_height_9_16 = int(segment_width_9_16 / aspect_9_5_3) |
|
|
|
|
|
total_height = segment_height_9_16 * 3 |
|
start_y = max(0, height // 2 - total_height // 2) |
|
|
|
for i in range(3): |
|
|
|
vis_segment_y_start = start_y + i * segment_height_9_16 |
|
vis_segment_y_end = min(height, vis_segment_y_start + segment_height_9_16) |
|
|
|
|
|
|
|
actual_segment_y_start = max( |
|
0, |
|
min( |
|
height - segment_height_9_16, |
|
original_center_y - segment_height_9_16 // 2, |
|
), |
|
) |
|
actual_segment_y_end = min( |
|
height, actual_segment_y_start + segment_height_9_16 |
|
) |
|
|
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, vis_segment_y_start), |
|
(x_end, vis_segment_y_end), |
|
colors["threehalfs"]["9:16"], |
|
thickness, |
|
) |
|
|
|
|
|
if i > 0: |
|
for j in range(x_start, x_end, 20): |
|
if j % 40 < 20: |
|
cv2.line( |
|
visualization, |
|
(j, actual_segment_y_start), |
|
(min(j + 20, x_end), actual_segment_y_start), |
|
colors["threehalfs"]["9:16"], |
|
1, |
|
) |
|
cv2.line( |
|
visualization, |
|
(j, actual_segment_y_end), |
|
(min(j + 20, x_end), actual_segment_y_end), |
|
colors["threehalfs"]["9:16"], |
|
1, |
|
) |
|
|
|
cv2.putText( |
|
visualization, |
|
f"9:16 Part {i+1}", |
|
(x_start + 5, vis_segment_y_start + 30), |
|
font, |
|
font_scale, |
|
colors["threehalfs"]["9:16"], |
|
font_thickness, |
|
) |
|
|
|
|
|
elif layout_type == "twothirdhalfs_var1" or layout_type == "twothirdhalfs_var2": |
|
aspect_key = "16:9" if layout_type.endswith("var1") else "9:16" |
|
layout_color = colors[ |
|
( |
|
"twothirdhalfs_var1" |
|
if layout_type.endswith("var1") |
|
else "twothirdhalfs_var2" |
|
) |
|
][aspect_key] |
|
|
|
if aspect_key == "16:9": |
|
|
|
aspect_10_6_9 = 10.6 / 9 |
|
aspect_5_3_9 = 5.3 / 9 |
|
|
|
segment1_height = height |
|
segment1_width = int( |
|
segment1_height |
|
* (aspect_10_6_9 if layout_type.endswith("var1") else aspect_5_3_9) |
|
) |
|
segment2_height = height |
|
segment2_width = int( |
|
segment2_height |
|
* (aspect_5_3_9 if layout_type.endswith("var1") else aspect_10_6_9) |
|
) |
|
|
|
|
|
segment_center_x = original_center_x - segment2_width // 4 |
|
segment_x_start = int( |
|
max( |
|
0, |
|
min(width - segment1_width, segment_center_x - segment1_width // 2), |
|
) |
|
) |
|
segment_x_end = int(min(width, segment_x_start + segment1_width)) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(segment_x_start, 0), |
|
(segment_x_end, segment1_height), |
|
layout_color, |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"16:9 Part 1", |
|
(segment_x_start + 5, 30), |
|
font, |
|
font_scale, |
|
layout_color, |
|
font_thickness, |
|
) |
|
|
|
|
|
segment_center_x = original_center_x + segment1_width // 4 |
|
segment_x_start = int( |
|
max( |
|
0, |
|
min(width - segment2_width, segment_center_x - segment2_width // 2), |
|
) |
|
) |
|
segment_x_end = int(min(width, segment_x_start + segment2_width)) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(segment_x_start, 0), |
|
(segment_x_end, segment2_height), |
|
layout_color, |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"16:9 Part 2", |
|
(segment_x_start + 5, 60), |
|
font, |
|
font_scale, |
|
layout_color, |
|
font_thickness, |
|
) |
|
else: |
|
|
|
aspect_9_16 = 9 / 16 |
|
target_width_9_16 = int(height * aspect_9_16) |
|
x_start = max( |
|
0, |
|
min( |
|
width - target_width_9_16, |
|
original_center_x - target_width_9_16 // 2, |
|
), |
|
) |
|
x_end = x_start + target_width_9_16 |
|
|
|
aspect_9_10_6 = 9 / 10.6 |
|
aspect_9_5_3 = 9 / 5.3 |
|
|
|
segment1_width = target_width_9_16 |
|
segment1_height = int( |
|
segment1_width |
|
/ (aspect_9_10_6 if layout_type.endswith("var1") else aspect_9_5_3) |
|
) |
|
segment2_width = target_width_9_16 |
|
segment2_height = int( |
|
segment2_width |
|
/ (aspect_9_5_3 if layout_type.endswith("var1") else aspect_9_10_6) |
|
) |
|
|
|
|
|
segment_y_start = 0 |
|
segment_y_end = min(height, segment_y_start + segment1_height) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, segment_y_start), |
|
(x_end, segment_y_end), |
|
layout_color, |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"9:16 Part 1", |
|
(x_start + 5, segment_y_start + 30), |
|
font, |
|
font_scale, |
|
layout_color, |
|
font_thickness, |
|
) |
|
|
|
|
|
segment_y_start = segment_y_end |
|
segment_y_end = min(height, segment_y_start + segment2_height) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, segment_y_start), |
|
(x_end, segment_y_end), |
|
layout_color, |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"9:16 Part 2", |
|
(x_start + 5, segment_y_start + 30), |
|
font, |
|
font_scale, |
|
layout_color, |
|
font_thickness, |
|
) |
|
|
|
|
|
elif layout_type == "twoequalhalfs": |
|
|
|
aspect_8_9 = 8 / 9 |
|
|
|
segment_height = height |
|
segment_width = int(segment_height * aspect_8_9) |
|
|
|
|
|
segment_center_x = original_center_x - segment_width // 2 |
|
segment_x_start = int( |
|
max(0, min(width - segment_width, segment_center_x - segment_width // 2)) |
|
) |
|
segment_x_end = int(min(width, segment_x_start + segment_width)) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(segment_x_start, 0), |
|
(segment_x_end, segment_height), |
|
colors["twoequalhalfs"]["16:9"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"16:9 Equal 1", |
|
(segment_x_start + 5, 30), |
|
font, |
|
font_scale, |
|
colors["twoequalhalfs"]["16:9"], |
|
font_thickness, |
|
) |
|
|
|
|
|
segment_center_x = original_center_x + segment_width // 2 |
|
segment_x_start = int( |
|
max(0, min(width - segment_width, segment_center_x - segment_width // 2)) |
|
) |
|
segment_x_end = int(min(width, segment_x_start + segment_width)) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(segment_x_start, 0), |
|
(segment_x_end, segment_height), |
|
colors["twoequalhalfs"]["16:9"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"16:9 Equal 2", |
|
(segment_x_start + 5, 60), |
|
font, |
|
font_scale, |
|
colors["twoequalhalfs"]["16:9"], |
|
font_thickness, |
|
) |
|
|
|
|
|
aspect_9_16 = 9 / 16 |
|
target_width_9_16 = int(height * aspect_9_16) |
|
x_start = max( |
|
0, |
|
min(width - target_width_9_16, original_center_x - target_width_9_16 // 2), |
|
) |
|
x_end = x_start + target_width_9_16 |
|
|
|
aspect_9_8 = 9 / 8 |
|
segment_width_9_16 = target_width_9_16 |
|
segment_height_9_16 = int(segment_width_9_16 / aspect_9_8) |
|
|
|
|
|
segment_y_start = 0 |
|
segment_y_end = min(height, segment_y_start + segment_height_9_16) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, segment_y_start), |
|
(x_end, segment_y_end), |
|
colors["twoequalhalfs"]["9:16"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"9:16 Equal 1", |
|
(x_start + 5, segment_y_start + 30), |
|
font, |
|
font_scale, |
|
colors["twoequalhalfs"]["9:16"], |
|
font_thickness, |
|
) |
|
|
|
|
|
segment_y_start = segment_y_end |
|
segment_y_end = min(height, segment_y_start + segment_height_9_16) |
|
|
|
cv2.rectangle( |
|
visualization, |
|
(x_start, segment_y_start), |
|
(x_end, segment_y_end), |
|
colors["twoequalhalfs"]["9:16"], |
|
thickness, |
|
) |
|
cv2.putText( |
|
visualization, |
|
f"9:16 Equal 2", |
|
(x_start + 5, segment_y_start + 30), |
|
font, |
|
font_scale, |
|
colors["twoequalhalfs"]["9:16"], |
|
font_thickness, |
|
) |
|
|
|
|
|
center_radius = 8 |
|
cv2.circle( |
|
visualization, |
|
(original_center_x, original_center_y), |
|
center_radius, |
|
(255, 255, 255), |
|
-1, |
|
) |
|
cv2.circle( |
|
visualization, |
|
(original_center_x, original_center_y), |
|
center_radius, |
|
(0, 0, 0), |
|
2, |
|
) |
|
|
|
|
|
visualization_pil = Image.fromarray(cv2.cvtColor(visualization, cv2.COLOR_BGR2RGB)) |
|
|
|
return visualization_pil |
|
|
|
|
|
def get_image_crop(cid=None, rsid=None, uid=None, ct=None): |
|
""" |
|
Function that returns both standard and layout variations for visualization. |
|
|
|
Returns: |
|
gr.Gallery: Gallery of all generated images |
|
""" |
|
try: |
|
sprites_data = get_sprite_firebase(cid, rsid, uid) |
|
image_paths = [sprite_data["url"] for sprite_data in sprites_data] |
|
durations = [sprite_data["duration"] for sprite_data in sprites_data] |
|
except Exception: |
|
image_paths = [ |
|
|
|
|
|
"data/F2-Roll4D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", |
|
"data/G2-Roll5D-i2x-Take2-Nov19.24-PST02.31.31pm.jpg", |
|
"data/C1-Roll10D-i1x-Take2-Mar20.25-PST12.14.56pm.jpg", |
|
"data/C2-Roll10D-i2x-Take2-Mar20.25-PST12.14.56pm.jpg", |
|
] |
|
|
|
|
|
all_images = [] |
|
all_captions = [] |
|
|
|
for image_path in image_paths: |
|
|
|
try: |
|
if image_path.startswith(("http://", "https://")): |
|
response = requests.get(image_path) |
|
input_image = Image.open(BytesIO(response.content)) |
|
else: |
|
input_image = Image.open(image_path) |
|
except Exception as e: |
|
print(f"Error loading image {image_path}: {e}") |
|
continue |
|
|
|
|
|
mid_image = get_middle_thumbnail(input_image) |
|
|
|
|
|
numbered_mid_image = add_top_numbers( |
|
input_image=mid_image, |
|
num_divisions=20, |
|
margin=50, |
|
font_size=30, |
|
dot_spacing=20, |
|
) |
|
|
|
|
|
|
|
(left_division, right_division, num_of_speakers) = analyze_image( |
|
numbered_mid_image, remove_unwanted_prompt(1), mid_image, ct |
|
) |
|
|
|
|
|
if left_division <= 0: |
|
left_division = 1 |
|
if right_division > 20: |
|
right_division = 20 |
|
if left_division >= right_division: |
|
left_division = 1 |
|
right_division = 20 |
|
|
|
print(f"Using divisions: left={left_division}, right={right_division}") |
|
|
|
|
|
( |
|
standard_crops, |
|
threehalfs_layouts, |
|
twothirdhalfs_layouts, |
|
twoequalhalfs_layouts, |
|
visualization_data, |
|
) = create_layouts(mid_image, left_division, right_division, num_of_speakers) |
|
|
|
|
|
|
|
standard_visualization = draw_layout_regions( |
|
mid_image, left_division, right_division, visualization_data, "standard" |
|
) |
|
all_images.append(standard_visualization) |
|
all_captions.append( |
|
f"Standard Aspect Ratios (16:9 & 9:16) {standard_visualization.size}" |
|
) |
|
|
|
|
|
all_images.append(input_image) |
|
all_captions.append(f"Input Image {input_image.size}") |
|
|
|
all_images.append(mid_image) |
|
all_captions.append(f"Middle Thumbnail {mid_image.size}") |
|
|
|
|
|
for key, crop in standard_crops.items(): |
|
all_images.append(crop) |
|
all_captions.append(f"{key} {crop.size}") |
|
|
|
|
|
for key, layout in threehalfs_layouts.items(): |
|
all_images.append(layout) |
|
all_captions.append(f"Three Halfs {key} {layout.size}") |
|
|
|
|
|
for key, layout in twothirdhalfs_layouts.items(): |
|
all_images.append(layout) |
|
all_captions.append(f"Two-Thirds Halfs {key} {layout.size}") |
|
|
|
|
|
for key, layout in twoequalhalfs_layouts.items(): |
|
all_images.append(layout) |
|
all_captions.append(f"Two Equal Halfs {key} {layout.size}") |
|
|
|
|
|
return gr.Gallery(value=list(zip(all_images, all_captions))) |
|
|