toolkit / utils /concat_captions_prefix_filename.py
k4d3's picture
refactor stuff
5db1908
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# 画像ファイルと.captionと.tagsファイルを.txtファイルに連結する
# Concatenate image files and .caption and .tags files to .txt files
#
# This script walks through a directory, identifies image files, and checks for the existence of corresponding
# .caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files.
#
# Usage:
# - Place the script in the directory containing the image files.
# - Run the script to concatenate .caption and .tags files into .txt files.
# - Use the dry_run flag to preview the changes without writing to the .txt files.
# - Files missing either .caption or .tags will be skipped.
#
# Functions:
# get_files(path): Walks through the directory and yields image files along with their .caption and .tags files.
# concat(caption_path, tags_path, txt_path, source_file, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file.
from pathlib import Path
import os
from utils.file_processor import FileProcessor, ProcessorOptions
FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl", ".webp"}
def get_files(path):
path = Path(path)
# Walk the directory, looking for image files
for root, dirs, files in os.walk(path):
root = path / root
for file in files:
file = root / file
if file.suffix not in FILE_EXTS:
continue
caption = file.with_suffix(".caption")
tags = file.with_suffix(".tags")
txt = file.with_suffix(".txt")
if not caption.exists():
print(f"Skipping {file}: {caption} does not exist")
continue
if not tags.exists():
print(f"Skipping {file}: {tags} does not exist")
continue
yield file, caption, tags, txt
class FilenameProcessor(FileProcessor):
def process_content(self, content: str) -> str:
# Remove the specified pattern and file extension
name = content.replace('_24_regular_1024x1024', '')
# Convert remaining underscores to spaces
return name.replace('_', ' ').strip()
class CaptionPrefixConcatenator:
def __init__(self, dry_run: bool = False):
self.dry_run = dry_run
self.options = ProcessorOptions(
recursive=False,
dry_run=dry_run,
file_extensions={'.caption', '.txt'}
)
def concat(self, caption_path: Path, tags_path: Path, txt_path: Path, source_file: Path):
caption = caption_path.read_text().strip()
tags = tags_path.read_text().strip(", \n")
# Get the formatted filename prefix
prefix = source_file.stem.replace('_24_regular_1024x1024', '').replace('_', ' ').strip()
# Combine the text with the prefix
txt = f"{prefix}, {tags}, {caption}"
if self.dry_run:
print(f"{txt_path}:")
print(txt)
print()
else:
txt_path.write_text(txt)
print(f"wrote {txt_path}")
def main():
dry_run = False
concatenator = CaptionPrefixConcatenator(dry_run=dry_run)
for file, caption, tags, txt in get_files("."):
concatenator.concat(caption, tags, txt, file)
if __name__ == "__main__":
main()