In [1]:
import sys
sys.path.append('..')

from src.pipeline import *

import sqlite3

conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()

# Convert timestamp from JST to UTC (-09:00) for all records
cursor.execute("UPDATE metadata SET created_at = datetime(created_at, '-9 hours')")
conn.commit()


 from .autonotebook import tqdm as notebook_tqdm


In [3]:
# insert nobg job to SQLite manually.
from datetime import datetime
from src.pipeline import Step, create_job

# Insert a nobg job manually
create_job(Step.nobg, datetime.utcnow())


In [3]:
# Remove all nobg and crop job, except the latest.
import sys
sys.path.append('..')

from src.pipeline import *

for step in [Step.nobg, Step.cropped]:
 conn = sqlite3.connect(DB_NAME)
 cursor = conn.cursor()

 cursor.execute("SELECT MAX(executed_at) FROM jobs WHERE step == ?", (step.value,))
 latest_job = cursor.fetchone()[0]

 cursor.execute("DELETE FROM jobs WHERE step == ? AND executed_at != ?", (step.value, latest_job))
 conn.commit()


In [5]:
import sys
sys.path.append('..')

from src.pipeline import *

from datetime import datetime

# 画像を保存しなおしたことに加え、クロッピングの基準を64x64から512x512に変更したので、それ以前のメタデータを削除
latest_raw_images_saved_at = datetime.strptime('2024-02-04 23:04:03.539261', '%Y-%m-%d %H:%M:%S.%f')
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()

cursor.execute("DELETE FROM metadata WHERE created_at < ?", (latest_raw_images_saved_at,))
conn.commit()


In [6]:
# jobが中途半端な状態で終わっていたようなので、処理をやり直す

# Define the datetime string
latest_metadata_saved_at = datetime.strptime('2024-02-05 12:39:45.644526', '%Y-%m-%d %H:%M:%S.%f')

# Connect to the database
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()

# Execute the deletion query
cursor.execute("DELETE FROM metadata WHERE created_at >= ? AND step != ?", (latest_metadata_saved_at, Step.raw.value,))
conn.commit()


In [6]:
# Pipeline構築前に保存したデータに対して、metadataを付与する
import sys
sys.path.append('..')

import os
from datetime import datetime
from src.pipeline import Label, Metadata, Step, create_metadata, part_path

raw_dirs = {
 '../data/raw/#Newポケモンスナップ/': Label.pokemon,
 '../data/raw/#パルワールド/': Label.pal,
 '../data/raw/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,
}
nobg_dirs = {
 '../data/nobg/#Newポケモンスナップ/': Label.pokemon,
 '../data/nobg/#パルワールド/': Label.pal,
 '../data/nobg/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,
}
cropped_dirs = {
 '../data/cropped/#Newポケモンスナップ/': Label.pokemon,
 '../data/cropped/#パルワールド/': Label.pal,
 '../data/cropped/every-pal-in-palworld-a-complete-paldeck-list': Label.pal,
}
created_at = '2024-01-28 08:00:00.000'

def create_metadata_in_folders(folders, bucket: str, step: Step):
 for folder, label in folders.items():
 for root, dirs, files in os.walk(folder):
 for filename in files:
 bucket, path = part_path(root, filename, bucket).values()
 metadata = Metadata(bucket, path, step, label, datetime.strptime(created_at, '%Y-%m-%d %H:%M:%S.%f'))
 create_metadata(metadata)

create_metadata_in_folders(raw_dirs, '../data/raw', Step.raw)
create_metadata_in_folders(nobg_dirs, '../data/nobg', Step.nobg)
create_metadata_in_folders(cropped_dirs, '../data/cropped', Step.cropped)


In [8]:
# Palworldのファン動画プレイリストの背景切り抜き処理を途中から再開する。
# `2024-02-05 12:52:27`の追加分までは処理が完了している。パイプラインは最後のジョブ実行時間以降に追加されたMetadataを処理するため、その実行日時の`nobg`ジョブを追加する。
import sys
sys.path.append('..')

import sqlite3
from datetime import datetime
from src.pipeline import DB_NAME, Step

executed_at = datetime.strptime('2024-02-05 12:52:27.000', '%Y-%m-%d %H:%M:%S.%f')
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()

cursor.execute("INSERT INTO jobs (step, executed_at) VALUES (?, ?)", (Step.nobg.value, executed_at))
conn.commit()
