# Collect resources

## From GitHub

In [7]:
!git clone -q https://github.com/mrok273/Qiita ../data/raw/mrok273/Qiita

## From Kaggle

In [1]:
!kaggle datasets download -d mikoajkolman/pokemon-images-first-generation17000-files -p "../data/raw/" -q

## From Web

- Install firefox
- Open web page
- Bulk save image (See [How to Save All the Images on a Web Page in Firefox Browser](https://www.journeybytes.com/bulk-save-images-using-firefox/))

## From YouTube

### Pal

In [None]:
# Official paldeck
!yt-dlp --postprocessor-args "-ss 00:00:00 -t 00:00:05" -o "../data/video/pocketpair/%(title)s-%(id)s-5s.%(ext)s" -q https://www.youtube.com/playlist?list=PLptNv_Fxn9idzsTRulWNmLYKWgKhqKI5s

In [10]:
import os
import re

directory = "../data/video/pocketpair"
for filename in os.listdir(directory):
 match = re.search(r'[Pp]aldeck.*[Nn]o.(\d+).*.webm', filename)
 paldeck_no, = match.groups() if match else [None]
 if paldeck_no is None:
 continue
 new_filename = f"paldeck_no{paldeck_no.zfill(3)}.webm"
 os.rename(os.path.join(directory, filename), os.path.join(directory, new_filename))

In [12]:
!ffprobe -v error -select_streams v:0 -show_entries stream=width,height,r_frame_rate -of csv=s=x:p=0 "../data/video/pocketpair/paldeck_no001.webm"

1920x1080x60/1


In [21]:
import sys
sys.path.append('..')

import os
import subprocess

from datetime import datetime
from src.pipeline import *

input_dir = "../data/video/pocketpair"
output_dir = "../data/raw/pocketpair"

for root, dirs, files in os.walk(input_dir):
 for filename in files:
 full_input_path = os.path.join(root, filename)
 filename_without_ext, _ext = os.path.splitext(filename)
 output_subdir = os.path.join(output_dir, filename_without_ext)
 os.makedirs(output_subdir, exist_ok=True)

 output_pattern = os.path.join(output_subdir, "frame_%05d.png")
 command = ['ffmpeg', '-hwaccel', 'cuda', '-i', full_input_path, '-vf', 'fps=12', output_pattern]
 subprocess.run(command, check=True)
 
 for root_out, _, files_out in os.walk(output_subdir):
 for filename_out in files_out:
 full_output_path = os.path.join(root_out, filename_out)
 raw_dir = data_dir(Step.raw.value)
 metadata = Metadata(
 bucket=raw_dir,
 path=os.path.relpath(full_output_path, raw_dir),
 step=Step.raw,
 label=Label.pal,
 created_at=datetime.utcnow()
 )
 create_metadata(metadata)
 

In [None]:
# Fan video
!yt-dlp -o "../data/video/palworld-fan/%(id)s.%(ext)s" -q https://www.youtube.com/playlist?list=PLitsLuiXBQxtd0ThPaYMqsbxUMfmdxVHc

In [24]:
import sys
sys.path.append('..')

import os
import subprocess

from datetime import datetime
from src.pipeline import *

input_dir = "../data/video/palworld-fan"
output_dir = "../data/raw/palworld-fan"

for root, dirs, files in os.walk(input_dir):
 for filename in files:
 full_input_path = os.path.join(root, filename)
 filename_without_ext, _ext = os.path.splitext(filename)
 output_subdir = os.path.join(output_dir, filename_without_ext)
 os.makedirs(output_subdir, exist_ok=True)

 output_pattern = os.path.join(output_subdir, "frame_%05d.png")
 command = ['ffmpeg', '-hwaccel', 'cuda', '-i', full_input_path, '-vf', 'fps=12', output_pattern]
 subprocess.run(command, check=True)
 
 for root_out, _, files_out in os.walk(output_subdir):
 for filename_out in files_out:
 full_output_path = os.path.join(root_out, filename_out)
 raw_dir = data_dir(Step.raw.value)
 metadata = Metadata(
 bucket=raw_dir,
 path=os.path.relpath(full_output_path, raw_dir),
 step=Step.raw,
 label=Label.pal,
 created_at=datetime.utcnow()
 )
 create_metadata(metadata)
 

### Pokemon

In [None]:
!yt-dlp -o "../data/video/pokemon-games/%(id)s.%(ext)s" -q https://youtube.com/playlist?list=PLitsLuiXBQxvqH5Hv1R5ioFnCpIBMNvX3&si=nzehh3dDiU3k2Q7F

In [44]:
import os
import subprocess

def video2img(video: str, output_dir: str, fps: int):
 filename_without_ext, _ext = os.path.splitext(os.path.basename(video))
 output_subdir = os.path.join(output_dir, filename_without_ext)
 os.makedirs(output_subdir, exist_ok=True)
 output_pattern = os.path.join(output_subdir, "frame_%05d.png")
 command = ['ffmpeg', '-hwaccel', 'cuda', '-i', video, '-vf', f"fps={fps}", output_pattern]
 subprocess.run(command, check=True)

In [None]:
for video in ["0Loz61U6CuE.webm", "AObd6oPnlyg.webm", "cIi40yfs630.webm", "G9L0LK07lis.webm", "LG-LZKUUVZI.webm", "Q3-fCEL-JjE.webm"]:
 video2img(f"../data/video/pokemon-games/{video}", "../data/raw/pokemon-games", 6)

In [45]:
video2img("../data/video/pokemon-games/EEupjm0LwUQ.webm", "../data/raw/pokemon-games", 1)

In [13]:
# torchvision.dataset はフォルダ構造が`split`/`label`でないと使えない。前処理にはHuggingFace。
import os
from datasets import load_dataset
from torchvision import transforms
from typing import Tuple


def center_crop_and_save(input_dir:str, output_dir:str, crop_size: Tuple[int, int]):
 dataset = load_dataset("imagefolder", data_dir=input_dir)
 cropper = transforms.CenterCrop(crop_size)
 os.makedirs(output_dir, exist_ok=True)

 def _center_crop_and_save(example):
 cropped = cropper(example["image"])
 cropped.filename = os.path.abspath(example["image"].filename).lower().replace(
 os.path.abspath(input_dir).lower(),
 os.path.abspath(output_dir).lower(),
 )
 cropped.save(cropped.filename)
 # No need to return example, just save it.

 dataset.map(_center_crop_and_save)

In [15]:
center_crop_and_save("../data/raw/pokemon-games/0Loz61U6CuE", "../data/raw/pokemon-games/0Loz61U6CuE_cropped", (1028, 1028))
center_crop_and_save("../data/raw/pokemon-games/AObd6oPnlyg", "../data/raw/pokemon-games/AObd6oPnlyg_cropped", (1028, 1028))
center_crop_and_save("../data/raw/pokemon-games/LG-LZKUUVZI", "../data/raw/pokemon-games/LG-LZKUUVZI_cropped", (1028, 1028))
center_crop_and_save("../data/raw/pokemon-games/Q3-fCEL-JjE", "../data/raw/pokemon-games/Q3-fCEL-JjE_cropped", (1028, 1028))

Map: 100%|██████████| 11112/11112 [11:55<00:00, 15.52 examples/s]
Generating train split: 7405 examples [00:00, 10062.70 examples/s]
Map: 100%|██████████| 7405/7405 [18:05<00:00, 6.82 examples/s]
Generating train split: 9862 examples [00:00, 10031.40 examples/s]
Map: 100%|██████████| 9862/9862 [30:25<00:00, 5.40 examples/s]
Generating train split: 12420 examples [00:01, 10197.81 examples/s]
Map: 100%|██████████| 12420/12420 [30:12<00:00, 6.85 examples/s]


In [25]:
import os
from datasets import load_dataset
from torchvision.transforms.functional import crop
from typing import Tuple

def left_crop_and_save(input_dir:str, output_dir:str, crop_size: Tuple[int, int]):
 dataset = load_dataset("imagefolder", data_dir=input_dir)
 os.makedirs(output_dir, exist_ok=True)
 cropper = lambda image: crop(image, 0, 0, crop_size[0], crop_size[1])

 def _left_crop_and_save(example):
 try:
 if example["image"].size == crop_size:
 return
 cropped = cropper(example["image"])
 cropped.filename = os.path.abspath(example["image"].filename).lower().replace(
 os.path.abspath(input_dir).lower(),
 os.path.abspath(output_dir).lower(),
 )
 cropped.save(cropped.filename)
 # No need to return example, just save it.

 except Exception as e:
 print(f"Error occurred: {e}")

 dataset.map(_left_crop_and_save)

In [26]:
left_crop_and_save("../data/raw/pokemon-games/cIi40yfs630", "../data/raw/pokemon-games/cIi40yfs630", (1080, 1080))

Generating train split: 27813 examples [00:00, 47790.54 examples/s]
Map: 57%|█████▋ | 15990/27813 [03:43<02:23, 82.47 examples/s]

Error occurred: cannot identify image file 'C:\\Users\\hiroga\\Documents\\GitHub\\til\\computer-science\\machine-learning\\_src\\pokemon-palworld\\data\\raw\\pokemon-games\\cIi40yfs630\\frame_15991.png'


Map: 100%|██████████| 27813/27813 [15:52<00:00, 29.19 examples/s]


In [37]:
import sys

sys.path.append('../RMBG-1.4')

from typing import Optional

import numpy as np
import torch
from PIL.Image import Image
from briarmbg import BriaRMBG
from utilities import postprocess_image, preprocess_image

net = BriaRMBG()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
net.to(device)
net.eval() 

def remove_background(image: Image) -> Optional[Image]:
 try:
 # prepare input
 orig_im = np.array(image)
 orig_im = orig_im[:,:,:3] # remove alpha channel
 orig_im_size = orig_im.shape[0:2]
 model_input_size = [1024,1024]
 preprocessed = preprocess_image(orig_im, model_input_size).to(device)

 # inference 
 result = net(preprocessed)

 # post process
 result_image = postprocess_image(result[0][0], orig_im_size)

 # save result
 pil_im = Image.fromarray(result_image)
 no_bg_image = Image.new("RGBA", pil_im.size, (0,0,0,0))
 no_bg_image.paste(image, mask=pil_im)
 return no_bg_image

 except Exception as e:
 print(f"{e, image}")
 return None

In [46]:
import os
from datasets import load_dataset

def remove_bg_and_save(input_dir:str, output_dir:str):
 dataset = load_dataset("imagefolder", data_dir=input_dir)
 os.makedirs(output_dir, exist_ok=True)

 def _remove_bg_and_save(example):
 try:
 nobg = remove_background(example["image"])
 nobg.filename = os.path.abspath(example["image"].filename).lower().replace(
 os.path.abspath(input_dir).lower(),
 os.path.abspath(output_dir).lower(),
 )
 nobg.save(nobg.filename)

 except Exception as e:
 print(f"Error occurred: {e}")

 dataset.map(_remove_bg_and_save)

In [48]:
remove_bg_and_save("../data/raw/pokemon-games/0Loz61U6CuE_cropped", "../data/nobg/pokemon-games/0Loz61U6CuE")
remove_bg_and_save("../data/raw/pokemon-games/AObd6oPnlyg_cropped", "../data/nobg/pokemon-games/AObd6oPnlyg")
remove_bg_and_save("../data/raw/pokemon-games/cIi40yfs630", "../data/nobg/pokemon-games/cIi40yfs630")
remove_bg_and_save("../data/raw/pokemon-games/EEupjm0LwUQ", "../data/nobg/pokemon-games/EEupjm0LwUQ")
remove_bg_and_save("../data/raw/pokemon-games/G9L0LK07lis", "../data/nobg/pokemon-games/G9L0LK07lis")
remove_bg_and_save("../data/raw/pokemon-games/LG-LZKUUVZI_cropped", "../data/nobg/pokemon-games/LG-LZKUUVZI")
remove_bg_and_save("../data/raw/pokemon-games/Q3-fCEL-JjE_cropped", "../data/nobg/pokemon-games/Q3-fCEL-JjE")

Generating train split: 11112 examples [00:00, 43038.25 examples/s]
Map: 100%|██████████| 11112/11112 [28:06<00:00, 6.59 examples/s]
Generating train split: 7405 examples [00:00, 46350.91 examples/s]
Map: 100%|██████████| 7405/7405 [15:08<00:00, 8.15 examples/s]
Generating train split: 27813 examples [00:00, 41096.54 examples/s]
Map: 57%|█████▋ | 15992/27813 [42:15<26:43, 7.37 examples/s] 

Error occurred: cannot identify image file 'C:\\Users\\hiroga\\Documents\\GitHub\\til\\computer-science\\machine-learning\\_src\\pokemon-palworld\\data\\raw\\pokemon-games\\cIi40yfs630\\frame_15991.png'


Map: 100%|██████████| 27813/27813 [1:13:12<00:00, 6.33 examples/s]
Generating train split: 586 examples [00:00, 45085.15 examples/s]
Map: 100%|██████████| 586/586 [02:00<00:00, 4.86 examples/s]
Generating train split: 3069 examples [00:00, 45460.81 examples/s]
Map: 100%|██████████| 3069/3069 [14:44<00:00, 3.47 examples/s]
Generating train split: 9862 examples [00:00, 45277.86 examples/s]
Map: 100%|██████████| 9862/9862 [24:11<00:00, 6.79 examples/s] 
Generating train split: 12420 examples [00:00, 48989.20 examples/s]
Map: 100%|██████████| 12420/12420 [24:59<00:00, 8.28 examples/s]


前処理後の画像のチェック結果
- 0Loz61U6CuE: タイトルのロゴが不要、ポケモン選択画面が不要、画面下部のウィンドウが邪魔
- AObd6oPnlyg: ポケモン選択画面が不要
- cIi40yfs630: ポケモン切り替え中の画面が不要
- EEupjm0LwUQ: ほぼ変わらない絵が1ポケモンあたり5枚あるのは多いかも。その割に、パルデア原産のポケモンはパルワールドとの区別にそこまで貢献しない気がする...
- G9L0LK07lis: 特になし
- LG-LZKUUVZI: 特になし。図鑑ではなくプレイ中画面から切り取ったものはノイズが少ない
- Q3-fCEL-JjE: 画面下部のウインドウが邪魔

In [87]:
# 再度クロップ。初回クロップ時に動画ごとに調整すればよかったことを反省...
# 今回はフォルダを分けず、既存の画像を上書きする
from datasets import load_dataset
from PIL.Image import Image
from torchvision.transforms import CenterCrop
from torchvision.transforms.functional import crop
from typing import Callable

def crop_and_save(data_dir: str, cropper: Callable[[Image], Image]):
 dataset = load_dataset("imagefolder", data_dir=data_dir)
 dataset = dataset.map(lambda data: {"image": cropper(data["image"]), "original_filename": data["image"].filename})
 dataset.map(lambda data: data["image"].save(data["original_filename"]))

crop_and_save("../data/nobg/pokemon-games/0Loz61U6CuE", CenterCrop((540, 540)))
crop_and_save("../data/nobg/pokemon-games/Q3-fCEL-JjE", lambda image: crop(image, 0, 0, 750, 1080)) # type: ignore

Map: 100%|██████████| 11112/11112 [14:14<00:00, 13.00 examples/s] 
Map: 100%|██████████| 11112/11112 [07:02<00:00, 26.32 examples/s]
Generating train split: 12420 examples [00:01, 9464.56 examples/s]
Map: 100%|██████████| 12420/12420 [19:25<00:00, 10.66 examples/s] 
Map: 100%|██████████| 12420/12420 [10:10<00:00, 20.34 examples/s]


オブジェクト検出を行い、一定サイズ以上のオブジェクトだけを保存

In [3]:
from datasets import load_dataset
from PIL.Image import Image
import cv2
import os
import numpy as np

def get_object_bounding_boxes(image: Image):
 individual_channels = image.split()

 alpha_channel: np.array
 if len(individual_channels) == 4:
 alpha_channel = np.array(individual_channels[3])
 else:
 raise ValueError("Image does not have an alpha channel.")

 # cv2.threshold関数を使用して、アルファチャンネルの値が1以上のピクセルを255(白)に、それ以外を0(黒)に変換します。
 # これにより、画像のオブジェクト部分を白、背景部分を黒としたバイナリマスクが作成されます。
 _, binary_mask = cv2.threshold(alpha_channel, 1, 255, cv2.THRESH_BINARY)

 contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

 return contours or []

def image_to_objects(image: Image, to_dir: str, min_height: int, min_width: int) -> list[Image]:
 contours = get_object_bounding_boxes(image)
 filtered_contours = [contour for contour in contours if cv2.contourArea(contour) > min_height * min_width]
 for index, contour in enumerate(filtered_contours):
 x, y, w, h = cv2.boundingRect(contour)
 cropped_image = image.crop((x, y, x + w, y + h))
 filename = os.path.basename(image.filename)
 filename_without_extension, _ = os.path.splitext(filename)
 cropped_image.save(os.path.join(to_dir, f"{filename_without_extension}_{index:03}.png"))

def detect_main_objects_and_save(data_dir: str, to_dir: str, min_height: int, min_width: int):
 dataset = load_dataset("imagefolder", data_dir=data_dir, split="train")
 os.makedirs(to_dir, exist_ok=True)
 dataset.map(lambda example: {"image": image_to_objects(example["image"], to_dir, min_height, min_width)}, batched=False)


min_height, min_width = 256, 256 # YouTubeから保存した画像が1920x1080という前提。キャラクターが普通に写っている場合は高さか幅が256pxを超えているように見える。
detect_main_objects_and_save("../data/nobg/pokemon-games/0Loz61U6CuE/", "../data/cropped/pokemon-games/0Loz61U6CuE/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/AObd6oPnlyg/", "../data/cropped/pokemon-games/AObd6oPnlyg/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/cIi40yfs630/", "../data/cropped/pokemon-games/cIi40yfs630/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/EEupjm0LwUQ/", "../data/cropped/pokemon-games/EEupjm0LwUQ/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/G9L0LK07lis/", "../data/cropped/pokemon-games/G9L0LK07lis/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/LG-LZKUUVZI/", "../data/cropped/pokemon-games/LG-LZKUUVZI/", min_height, min_width)
detect_main_objects_and_save("../data/nobg/pokemon-games/Q3-fCEL-JjE/", "../data/cropped/pokemon-games/Q3-fCEL-JjE/", min_height, min_width)

Map: 100%|██████████| 11112/11112 [07:13<00:00, 25.63 examples/s]
Generating train split: 7405 examples [00:00, 40654.13 examples/s]
Map: 100%|██████████| 7405/7405 [06:03<00:00, 20.39 examples/s]
Map: 100%|██████████| 27812/27812 [42:15<00:00, 10.97 examples/s] 
Generating train split: 586 examples [00:00, 46861.05 examples/s]
Map: 100%|██████████| 586/586 [01:09<00:00, 8.42 examples/s]
Generating train split: 3069 examples [00:00, 51572.00 examples/s]
Map: 100%|██████████| 3069/3069 [09:30<00:00, 5.38 examples/s]
Generating train split: 9862 examples [00:00, 43790.95 examples/s]
Map: 100%|██████████| 9862/9862 [12:20<00:00, 13.32 examples/s]
Generating train split: 12420 examples [00:00, 44092.09 examples/s]
Map: 100%|██████████| 12420/12420 [07:52<00:00, 26.28 examples/s]


## Filter images

In [18]:
from safetensors import safe_open
from torchvision import models
import torch

labels = ["etc", "pal", "pokemon"]

model = models.resnet18()
model.fc = torch.nn.Linear(model.fc.in_features, len(labels))

model_save_path = "../models/snapshots/filter.safetensors"
tensors = {}
with safe_open(model_save_path, framework="pt", device="cpu") as f:
 for key in f.keys():
 tensors[key] = f.get_tensor(key)

model.load_state_dict(tensors, strict=False)
model.eval()

ResNet(
 (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
 (layer1): Sequential(
 (0): BasicBlock(
 (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 )
 (1): BasicBlock(
 (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
 (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 (relu): ReLU(inplace=True)
 (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 

In [19]:
from torchvision import transforms
from PIL.Image import Image

preprocess = transforms.Compose([
 transforms.Resize(224),
 transforms.ToTensor(),
 transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

def classify_image(input_image: Image):
 img_t = preprocess(input_image)
 batch_t = torch.unsqueeze(img_t, 0)
 
 with torch.no_grad():
 output = model(batch_t)
 _, max_index = torch.max(output, dim=1)
 return max_index.item()


In [12]:
from datasets import load_dataset
from PIL.Image import Image
import os

def save_if_pokemon_or_pal(example: dict[str, any], to_dir: str):
 etc = 0
 image = example["image"]
 # convert('RGB') って不要だった気が...なぜ急に必要に?
 classified_label = classify_image(image.convert('RGB'))
 if classified_label != etc:
 filename = os.path.basename(image.filename)
 image.save(os.path.join(to_dir, filename))
 

def filter_images_and_save(data_dir: str, to_dir: str):
 dataset = load_dataset("imagefolder", data_dir=data_dir, split="train")
 os.makedirs(to_dir, exist_ok=True)
 dataset.map(lambda example: {"image": save_if_pokemon_or_pal(example, to_dir)}, batched=False)

filter_images_and_save("../data/cropped/palworld-fan/1JN5-jr5D_k/", "../data/filtered/palworld-fan_1JN5-jr5D_k/")
filter_images_and_save("../data/cropped/palworld-fan/3qpPt0YLp0g/", "../data/filtered/palworld-fan_3qpPt0YLp0g/")
filter_images_and_save("../data/cropped/palworld-fan/5FdIrKB1SUI/", "../data/filtered/palworld-fan_5FdIrKB1SUI/")
filter_images_and_save("../data/cropped/palworld-fan/7dmUoLu14qs/", "../data/filtered/palworld-fan_7dmUoLu14qs/")
filter_images_and_save("../data/cropped/palworld-fan/AAtemrMzo3s/", "../data/filtered/palworld-fan_AAtemrMzo3s/")
filter_images_and_save("../data/cropped/palworld-fan/bckQkt8aUlo/", "../data/filtered/palworld-fan_bckQkt8aUlo/")
filter_images_and_save("../data/cropped/palworld-fan/GB7rGn3IDpI/", "../data/filtered/palworld-fan_GB7rGn3IDpI/")
filter_images_and_save("../data/cropped/palworld-fan/gM-jmf28GEY/", "../data/filtered/palworld-fan_gM-jmf28GEY/")
filter_images_and_save("../data/cropped/palworld-fan/HBJwXcKymOk/", "../data/filtered/palworld-fan_HBJwXcKymOk/")
filter_images_and_save("../data/cropped/palworld-fan/iiGcw_gq53c/", "../data/filtered/palworld-fan_iiGcw_gq53c/")
filter_images_and_save("../data/cropped/palworld-fan/rNmZXw4zCys/", "../data/filtered/palworld-fan_rNmZXw4zCys/")
filter_images_and_save("../data/cropped/palworld-fan/S8-_o6CEI8M/", "../data/filtered/palworld-fan_S8-_o6CEI8M/")
filter_images_and_save("../data/cropped/palworld-fan/SNCOkUE3A0A/", "../data/filtered/palworld-fan_SNCOkUE3A0A/")
filter_images_and_save("../data/cropped/palworld-fan/utAT6L3Ea00/", "../data/filtered/palworld-fan_utAT6L3Ea00/")
filter_images_and_save("../data/cropped/palworld-fan/v878zGYOGq8/", "../data/filtered/palworld-fan_v878zGYOGq8/")
filter_images_and_save("../data/cropped/palworld-fan/XETrVLff13M/", "../data/filtered/palworld-fan_XETrVLff13M/")
filter_images_and_save("../data/cropped/palworld-fan/YSpO6l5TglA/", "../data/filtered/palworld-fan_YSpO6l5TglA/")
filter_images_and_save("../data/cropped/palworld-fan/zms3ORqAXiQ/", "../data/filtered/palworld-fan_zms3ORqAXiQ/")
filter_images_and_save("../data/cropped/palworld-fan/Zyqgp460xRo/", "../data/filtered/palworld-fan_Zyqgp460xRo/")

filter_images_and_save("../data/cropped/pokemon-games/0Loz61U6CuE/", "../data/filtered/pokemon-games_0Loz61U6CuE/")
filter_images_and_save("../data/cropped/pokemon-games/AObd6oPnlyg/", "../data/filtered/pokemon-games_AObd6oPnlyg/")
filter_images_and_save("../data/cropped/pokemon-games/cIi40yfs630/", "../data/filtered/pokemon-games_cIi40yfs630/")
filter_images_and_save("../data/cropped/pokemon-games/EEupjm0LwUQ/", "../data/filtered/pokemon-games_EEupjm0LwUQ/")
filter_images_and_save("../data/cropped/pokemon-games/G9L0LK07lis/", "../data/filtered/pokemon-games_G9L0LK07lis/")
filter_images_and_save("../data/cropped/pokemon-games/LG-LZKUUVZI/", "../data/filtered/pokemon-games_LG-LZKUUVZI/")
filter_images_and_save("../data/cropped/pokemon-games/Q3-fCEL-JjE/", "../data/filtered/pokemon-games_Q3-fCEL-JjE/")


Map: 100%|██████████| 362/362 [00:29<00:00, 12.15 examples/s]
Generating train split: 1481 examples [00:00, 43102.23 examples/s]
Map: 100%|██████████| 1481/1481 [04:01<00:00, 6.13 examples/s]
Generating train split: 512 examples [00:00, 42667.22 examples/s]
Map: 100%|██████████| 512/512 [00:40<00:00, 12.60 examples/s]
Generating train split: 434 examples [00:00, 39452.27 examples/s]
Map: 100%|██████████| 434/434 [00:37<00:00, 11.62 examples/s]
Generating train split: 718 examples [00:00, 39889.14 examples/s]
Map: 100%|██████████| 718/718 [00:48<00:00, 14.83 examples/s]
Generating train split: 774 examples [00:00, 42979.21 examples/s]
Map: 100%|██████████| 774/774 [00:54<00:00, 14.25 examples/s]
Generating train split: 958 examples [00:00, 39906.87 examples/s]
Map: 100%|██████████| 958/958 [02:40<00:00, 5.95 examples/s]
Generating train split: 534 examples [00:00, 42679.14 examples/s]
Map: 100%|██████████| 534/534 [01:42<00:00, 5.20 examples/s]
Generating train split: 1938 examples [00: