import gradio as gr import numpy as np from PIL import Image, ImageDraw, ImageFont import io import os from typing import Optional, Tuple, List import re # Mock OCR function (in real implementation, you'd use pytesseract or similar) def detect_text_in_image(image: np.ndarray) -> List[str]: """Mock function to detect existing text in image""" # In a real implementation, this would use OCR like pytesseract # For demo purposes, we'll return some sample detected text return ["Sample text detected", "Another text element"] def add_text_to_image( image: np.ndarray, new_text: str, text_position: str, font_size: int, text_color: str, background_color: str, detect_existing: bool, accessibility_description: str ) -> Tuple[np.ndarray, str, str]: """ Add text overlay to image with accessibility features """ if image is None: return None, "Please upload an image first", "" # Convert numpy array to PIL Image pil_image = Image.fromarray(image.astype('uint8'), 'RGB') # Detect existing text if requested existing_text_info = "" if detect_existing: try: detected_texts = detect_text_in_image(image) if detected_texts: existing_text_info = "Detected existing text:\n" + "\n".join(f"âĸ {text}" for text in detected_texts) else: existing_text_info = "No existing text detected in the image." except Exception as e: existing_text_info = f"Text detection failed: {str(e)}" # Add new text overlay draw = ImageDraw.Draw(pil_image) # Create font (you might want to use a proper font file in production) try: font = ImageFont.truetype("arial.ttf", font_size) except: font = ImageFont.load_default() # Get image dimensions img_width, img_height = pil_image.size # Calculate text position text_bbox = draw.textbbox((0, 0), new_text, font=font) text_width = text_bbox[2] - text_bbox[0] text_height = text_bbox[3] - text_bbox[1] position_map = { "top-left": (10, 10), "top-center": ((img_width - text_width) // 2, 10), "top-right": (img_width - text_width - 10, 10), "center-left": (10, (img_height - text_height) // 2), "center": ((img_width - text_width) // 2, (img_height - text_height) // 2), "center-right": (img_width - text_width - 10, (img_height - text_height) // 2), "bottom-left": (10, img_height - text_height - 10), "bottom-center": ((img_width - text_width) // 2, img_height - text_height - 10), "bottom-right": (img_width - text_width - 10, img_height - text_height - 10) } x, y = position_map.get(text_position, position_map["bottom-center"]) # Add background rectangle for better readability (optional) if background_color != "transparent": # Convert hex color to RGB if background_color.startswith('#'): bg_rgb = tuple(int(background_color[i:i+2], 16) for i in (1, 3, 5)) else: bg_rgb = (0, 0, 0) # default black # Draw background rectangle padding = 5 draw.rectangle([ x - padding, y - padding, x + text_width + padding, y + text_height + padding ], fill=bg_rgb) # Convert text color to RGB if text_color.startswith('#'): text_rgb = tuple(int(text_color[i:i+2], 16) for i in (1, 3, 5)) else: text_rgb = (255, 255, 255) # default white # Draw text draw.text((x, y), new_text, fill=text_rgb, font=font) # Convert back to numpy array result_image = np.array(pil_image) # Generate accessibility description if not accessibility_description: accessibility_description = f"Image with text overlay: '{new_text}' positioned at {text_position}" # Combine all information for accessibility full_accessibility_info = f"Accessibility Description: {accessibility_description}\n\n" if existing_text_info: full_accessibility_info += f"{existing_text_info}\n\n" full_accessibility_info += f"Added text: '{new_text}' at {text_position} with font size {font_size}" return result_image, full_accessibility_info, existing_text_info # Create the Gradio interface with gr.Blocks(title="Premium Image Text Overlay - Accessibility Edition") as demo: gr.HTML("""
Enhanced accessibility for visually impaired users with text detection