{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\hiroga\\miniconda3\\envs\\pokemon-pal\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n" ] } ], "source": [ "import sys\n", "sys.path.append('..')\n", "\n", "from src.pipeline import *\n", "\n", "import sqlite3\n", "\n", "conn = sqlite3.connect(DB_NAME)\n", "cursor = conn.cursor()\n", "\n", "# Convert timestamp from JST to UTC (-09:00) for all records\n", "cursor.execute(\"UPDATE metadata SET created_at = datetime(created_at, '-9 hours')\")\n", "conn.commit()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# insert nobg job to SQLite manually.\n", "from datetime import datetime\n", "from src.pipeline import Step, create_job\n", "\n", "# Insert a nobg job manually\n", "create_job(Step.nobg, datetime.utcnow())\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Remove all nobg and crop job, except the latest.\n", "import sys\n", "sys.path.append('..')\n", "\n", "from src.pipeline import *\n", "\n", "for step in [Step.nobg, Step.cropped]:\n", " conn = sqlite3.connect(DB_NAME)\n", " cursor = conn.cursor()\n", "\n", " cursor.execute(\"SELECT MAX(executed_at) FROM jobs WHERE step == ?\", (step.value,))\n", " latest_job = cursor.fetchone()[0]\n", "\n", " cursor.execute(\"DELETE FROM jobs WHERE step == ? AND executed_at != ?\", (step.value, latest_job))\n", " conn.commit()\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import sys\n", "sys.path.append('..')\n", "\n", "from src.pipeline import *\n", "\n", "from datetime import datetime\n", "\n", "# 画像を保存しなおしたことに加え、クロッピングの基準を64x64から512x512に変更したので、それ以前のメタデータを削除\n", "latest_raw_images_saved_at = datetime.strptime('2024-02-04 23:04:03.539261', '%Y-%m-%d %H:%M:%S.%f')\n", "conn = sqlite3.connect(DB_NAME)\n", "cursor = conn.cursor()\n", "\n", "cursor.execute(\"DELETE FROM metadata WHERE created_at < ?\", (latest_raw_images_saved_at,))\n", "conn.commit()\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# jobが中途半端な状態で終わっていたようなので、処理をやり直す\n", "\n", "# Define the datetime string\n", "latest_metadata_saved_at = datetime.strptime('2024-02-05 12:39:45.644526', '%Y-%m-%d %H:%M:%S.%f')\n", "\n", "# Connect to the database\n", "conn = sqlite3.connect(DB_NAME)\n", "cursor = conn.cursor()\n", "\n", "# Execute the deletion query\n", "cursor.execute(\"DELETE FROM metadata WHERE created_at >= ? AND step != ?\", (latest_metadata_saved_at, Step.raw.value,))\n", "conn.commit()\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Pipeline構築前に保存したデータに対して、metadataを付与する\n", "import sys\n", "sys.path.append('..')\n", "\n", "import os\n", "from datetime import datetime\n", "from src.pipeline import Label, Metadata, Step, create_metadata, part_path\n", "\n", "raw_dirs = {\n", " '../data/raw/#Newポケモンスナップ/': Label.pokemon,\n", " '../data/raw/#パルワールド/': Label.pal,\n", " '../data/raw/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,\n", "}\n", "nobg_dirs = {\n", " '../data/nobg/#Newポケモンスナップ/': Label.pokemon,\n", " '../data/nobg/#パルワールド/': Label.pal,\n", " '../data/nobg/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,\n", "}\n", "cropped_dirs = {\n", " '../data/cropped/#Newポケモンスナップ/': Label.pokemon,\n", " '../data/cropped/#パルワールド/': Label.pal,\n", " '../data/cropped/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,\n", "}\n", "created_at = '2024-01-28 08:00:00.000'\n", "\n", "def create_metadata_in_folders(folders, bucket: str, step: Step):\n", " for folder, label in folders.items():\n", " for root, dirs, files in os.walk(folder):\n", " for filename in files:\n", " bucket, path = part_path(root, filename, bucket).values()\n", " metadata = Metadata(bucket, path, step, label, datetime.strptime(created_at, '%Y-%m-%d %H:%M:%S.%f'))\n", " create_metadata(metadata)\n", "\n", "create_metadata_in_folders(raw_dirs, '../data/raw', Step.raw)\n", "create_metadata_in_folders(nobg_dirs, '../data/nobg', Step.nobg)\n", "create_metadata_in_folders(cropped_dirs, '../data/cropped', Step.cropped)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Palworldのファン動画プレイリストの背景切り抜き処理を途中から再開する。\n", "# `2024-02-05 12:52:27`の追加分までは処理が完了している。パイプラインは最後のジョブ実行時間以降に追加されたMetadataを処理するため、その実行日時の`nobg`ジョブを追加する。\n", "import sys\n", "sys.path.append('..')\n", "\n", "import sqlite3\n", "from datetime import datetime\n", "from src.pipeline import DB_NAME, Step\n", "\n", "executed_at = datetime.strptime('2024-02-05 12:52:27.000', '%Y-%m-%d %H:%M:%S.%f')\n", "conn = sqlite3.connect(DB_NAME)\n", "cursor = conn.cursor()\n", "\n", "cursor.execute(\"INSERT INTO jobs (step, executed_at) VALUES (?, ?)\", (Step.nobg.value, executed_at))\n", "conn.commit()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "pokemon-pal", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.7" } }, "nbformat": 4, "nbformat_minor": 2 }