Spaces:

HarshitX
/

Multi_LLM_Image_Captioning

Sleeping

App Files Files Community

Multi_LLM_Image_Captioning / caption_overlay.py

HarshitX

Upload 9 files

8a8f3ed verified 7 months ago

raw

history blame contribute delete

6.03 kB

	import os
	import cv2
	import numpy as np

	from PIL import Image, ImageDraw, ImageFont

	class ImageCaptionOverlay:
	"""Handles adding captions to images using OpenCV"""

	@staticmethod
	def add_caption_overlay(image: np.ndarray, caption: str, position: str = "bottom",
	font_size: int = 1, thickness: int = 2) -> np.ndarray:
	"""Add caption as overlay on the image"""
	img_copy = image.copy()
	height, width = img_copy.shape[:2]

	# Prepare text
	font = cv2.FONT_HERSHEY_SIMPLEX

	# Calculate text size and position
	text_size = cv2.getTextSize(caption, font, font_size, thickness)[0]

	# Wrap text if too long
	max_width = width - 40
	if text_size[0] > max_width:
	words = caption.split()
	lines = []
	current_line = ""

	for word in words:
	test_line = current_line + " " + word if current_line else word
	test_size = cv2.getTextSize(test_line, font, font_size, thickness)[0]

	if test_size[0] <= max_width:
	current_line = test_line
	else:
	if current_line:
	lines.append(current_line)
	current_line = word

	if current_line:
	lines.append(current_line)
	else:
	lines = [caption]

	# Calculate positions
	line_height = cv2.getTextSize("A", font, font_size, thickness)[0][1] + 10
	total_height = len(lines) * line_height

	if position == "bottom":
	start_y = height - total_height - 20
	elif position == "top":
	start_y = 30
	else: # center
	start_y = (height - total_height) // 2

	# Add background rectangle for better readability
	for i, line in enumerate(lines):
	text_size = cv2.getTextSize(line, font, font_size, thickness)[0]
	text_x = (width - text_size[0]) // 2
	text_y = start_y + (i * line_height) + text_size[1]

	# Background rectangle
	cv2.rectangle(img_copy,
	(text_x - 10, text_y - text_size[1] - 5),
	(text_x + text_size[0] + 10, text_y + 5),
	(0, 0, 0), -1)

	# Text
	cv2.putText(img_copy, line, (text_x, text_y), font, font_size, (255, 255, 255), thickness)

	return img_copy

	@staticmethod
	def add_caption_background(image: np.ndarray, caption: str,
	font_path: str = None,
	background_color: tuple = (0, 0, 0),
	text_color: tuple = (255, 255, 255),
	margin: int = 50) -> np.ndarray:
	"""Add caption on a background behind the image"""
	height, width = image.shape[:2]

	# Use PIL for better text rendering
	pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))

	# Try to use Poppins font first, then fallback to default
	try:
	# First priority: custom font path if provided
	if font_path and os.path.exists(font_path):
	font = ImageFont.truetype(font_path, 24)
	# Second priority: check for Poppins font in fonts directory
	elif os.path.exists("fonts/Poppins-Regular.ttf"):
	font = ImageFont.truetype("fonts/Poppins-Regular.ttf", 24)
	else:
	# Fallback to default font
	font = ImageFont.load_default()
	except Exception:
	# If anything fails, use default font
	font = ImageFont.load_default()

	# Calculate text dimensions
	draw = ImageDraw.Draw(pil_image)
	bbox = draw.textbbox((0, 0), caption, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]

	# Wrap text if necessary
	max_width = width - (2 * margin)
	if text_width > max_width:
	words = caption.split()
	lines = []
	current_line = ""

	for word in words:
	test_line = current_line + " " + word if current_line else word
	test_bbox = draw.textbbox((0, 0), test_line, font=font)
	test_width = test_bbox[2] - test_bbox[0]

	if test_width <= max_width:
	current_line = test_line
	else:
	if current_line:
	lines.append(current_line)
	current_line = word

	if current_line:
	lines.append(current_line)
	else:
	lines = [caption]

	# Calculate total text height
	total_text_height = len(lines) * text_height + (len(lines) - 1) * 10

	# Create new image with space for text
	new_height = height + total_text_height + (2 * margin)
	new_image = Image.new('RGB', (width, new_height), background_color)

	# Paste original image
	new_image.paste(pil_image, (0, total_text_height + (2 * margin)))

	# Add text
	draw = ImageDraw.Draw(new_image)
	y_offset = margin

	for line in lines:
	bbox = draw.textbbox((0, 0), line, font=font)
	line_width = bbox[2] - bbox[0]
	x_position = (width - line_width) // 2

	draw.text((x_position, y_offset), line, fill=text_color, font=font)
	y_offset += text_height + 10

	# Convert back to OpenCV format
	return cv2.cvtColor(np.array(new_image), cv2.COLOR_RGB2BGR)