#!/usr/bin/env python # -*- coding: utf-8 -*- # # 画像ファイルと.captionと.tagsファイルを.txtファイルに連結する # Concatenate image files and .caption and .tags files to .txt files # # This script walks through a directory, identifies image files, and checks for the existence of corresponding # .caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files. # # Usage: # - Place the script in the directory containing the image files. # - Run the script to concatenate .caption and .tags files into .txt files. # - Use the dry_run flag to preview the changes without writing to the .txt files. # - Files missing either .caption or .tags will be skipped. # # Functions: # get_files(path): Walks through the directory and yields image files along with their .caption and .tags files. # concat(caption_path, tags_path, txt_path, source_file, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file. from pathlib import Path import os from utils.file_processor import FileProcessor, ProcessorOptions FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl", ".webp"} def get_files(path): path = Path(path) # Walk the directory, looking for image files for root, dirs, files in os.walk(path): root = path / root for file in files: file = root / file if file.suffix not in FILE_EXTS: continue caption = file.with_suffix(".caption") tags = file.with_suffix(".tags") txt = file.with_suffix(".txt") if not caption.exists(): print(f"Skipping {file}: {caption} does not exist") continue if not tags.exists(): print(f"Skipping {file}: {tags} does not exist") continue yield file, caption, tags, txt class FilenameProcessor(FileProcessor): def process_content(self, content: str) -> str: # Remove the specified pattern and file extension name = content.replace('_24_regular_1024x1024', '') # Convert remaining underscores to spaces return name.replace('_', ' ').strip() class CaptionPrefixConcatenator: def __init__(self, dry_run: bool = False): self.dry_run = dry_run self.options = ProcessorOptions( recursive=False, dry_run=dry_run, file_extensions={'.caption', '.txt'} ) def concat(self, caption_path: Path, tags_path: Path, txt_path: Path, source_file: Path): caption = caption_path.read_text().strip() tags = tags_path.read_text().strip(", \n") # Get the formatted filename prefix prefix = source_file.stem.replace('_24_regular_1024x1024', '').replace('_', ' ').strip() # Combine the text with the prefix txt = f"{prefix}, {tags}, {caption}" if self.dry_run: print(f"{txt_path}:") print(txt) print() else: txt_path.write_text(txt) print(f"wrote {txt_path}") def main(): dry_run = False concatenator = CaptionPrefixConcatenator(dry_run=dry_run) for file, caption, tags, txt in get_files("."): concatenator.concat(caption, tags, txt, file) if __name__ == "__main__": main()