Spaces:
Sleeping
Sleeping
| """AI utilities for generating tags, captions, and embeddings""" | |
| import numpy as np | |
| from typing import List, Tuple | |
| import random | |
| def generate_tags(filename: str) -> List[str]: | |
| """ | |
| Generate tags for an image based on filename. | |
| In production, this would use CLIP or similar models. | |
| Currently using placeholder logic. | |
| """ | |
| # Extract meaningful words from filename | |
| name_parts = filename.lower().replace("_", " ").replace("-", " ").split() | |
| name_parts = [p.replace(".jpg", "").replace(".png", "").replace(".jpeg", "") | |
| for p in name_parts if p] | |
| # Common image tags for demo | |
| common_tags = [ | |
| "photo", "image", "landscape", "portrait", "nature", "architecture", | |
| "people", "animal", "food", "object", "abstract", "text", "sunset", | |
| "mountain", "beach", "forest", "urban", "indoor", "outdoor" | |
| ] | |
| # Select random subset of common tags + filename parts | |
| tags = list(set(name_parts[:2] + random.sample(common_tags, min(3, len(common_tags))))) | |
| return tags[:5] # Return up to 5 tags | |
| def generate_caption(filename: str, tags: List[str]) -> str: | |
| """ | |
| Generate a caption for an image. | |
| In production, this would use BLIP or similar models. | |
| Currently using placeholder logic. | |
| """ | |
| caption_templates = [ | |
| "A beautiful {tag} photograph", | |
| "Captured moment: {tag}", | |
| "Scenic view of {tag}", | |
| "Amazing {tag} scene", | |
| "Photography: {tag} collection", | |
| ] | |
| tag = tags[0] if tags else "image" | |
| template = random.choice(caption_templates) | |
| return template.format(tag=tag) | |
| def generate_embedding(filename: str, tags: List[str], caption: str) -> np.ndarray: | |
| """ | |
| Generate a 512-dimensional embedding for semantic search. | |
| In production, this would use CLIP or similar models. | |
| Currently using placeholder random embeddings (reproducible from filename). | |
| """ | |
| # Create a reproducible random embedding based on filename | |
| # In production: use CLIP or similar to generate real embeddings | |
| random.seed(hash(filename) % (2**32)) | |
| embedding = np.random.randn(512).astype(np.float32) | |
| # Normalize to unit vector | |
| embedding = embedding / np.linalg.norm(embedding) | |
| return embedding | |
| def generate_filename_embedding(filename: str) -> np.ndarray: | |
| """ | |
| Generate a deterministic embedding from filename for testing. | |
| Ensures same filename always gets same embedding. | |
| """ | |
| random.seed(hash(filename) % (2**32)) | |
| embedding = np.random.randn(512).astype(np.float32) | |
| embedding = embedding / np.linalg.norm(embedding) | |
| return embedding |