ByteDream / create_test_dataset.py
Enzo8930302's picture
Upload folder using huggingface_hub
0e3999b verified
"""
Create Test Dataset
Generate sample images for testing the training pipeline
"""
from PIL import Image, ImageDraw, ImageFont
import numpy as np
from pathlib import Path
import random
def create_test_dataset(output_dir: str = "./dataset", num_images: int = 10):
"""
Create a test dataset with synthetic images
Args:
output_dir: Output directory
num_images: Number of test images to create
"""
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
print(f"Creating {num_images} test images in {output_path}...")
# Color palettes
colors = [
(255, 99, 71), # Tomato
(64, 224, 208), # Turquoise
(255, 215, 0), # Gold
(138, 43, 226), # Blue Violet
(50, 205, 50), # Lime Green
(255, 165, 0), # Orange
(219, 112, 147), # Pale Violet Red
(70, 130, 180), # Steel Blue
(255, 192, 203), # Pink
(144, 238, 144), # Light Green
]
shapes = ['circle', 'rectangle', 'triangle']
captions = []
for i in range(num_images):
# Generate random image
img_size = 512
img = Image.new('RGB', (img_size, img_size), color=(240, 240, 240))
draw = ImageDraw.Draw(img)
# Random parameters
num_shapes = random.randint(3, 8)
bg_color = random.choice(colors)
# Draw background gradient
for y in range(img_size):
r = int(bg_color[0] * (0.8 + 0.2 * y / img_size))
g = int(bg_color[1] * (0.8 + 0.2 * y / img_size))
b = int(bg_color[2] * (0.8 + 0.2 * y / img_size))
draw.line([(0, y), (img_size, y)], fill=(r, g, b))
# Draw random shapes
for _ in range(num_shapes):
shape = random.choice(shapes)
color = tuple(random.randint(50, 255) for _ in range(3))
x1 = random.randint(50, img_size - 50)
y1 = random.randint(50, img_size - 50)
size = random.randint(30, 100)
if shape == 'circle':
bbox = [x1, y1, x1 + size, y1 + size]
draw.ellipse(bbox, fill=color, outline=(0, 0, 0))
elif shape == 'rectangle':
bbox = [x1, y1, x1 + size, y1 + size // 2]
draw.rectangle(bbox, fill=color, outline=(0, 0, 0))
elif shape == 'triangle':
points = [
(x1, y1),
(x1 + size, y1),
(x1 + size // 2, y1 + size)
]
draw.polygon(points, fill=color, outline=(0, 0, 0))
# Save image
img_path = output_path / f"test_image_{i+1:03d}.jpg"
img.save(img_path, quality=95)
# Create caption
caption = f"A colorful abstract composition with {num_shapes} geometric shapes on a {bg_color[0]} background"
captions.append(caption)
# Save caption
caption_path = output_path / f"test_image_{i+1:03d}.txt"
with open(caption_path, 'w', encoding='utf-8') as f:
f.write(caption)
print(f" Created: {img_path.name}")
print(f"\n✓ Test dataset created successfully!")
print(f" Location: {output_path.absolute()}")
print(f" Images: {num_images}")
print(f"\nTo train with this dataset:")
print(f" python train.py --config config.yaml --train_data {output_path}")
if __name__ == "__main__":
create_test_dataset()