|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path |
|
import os |
|
from utils.file_processor import FileProcessor, ProcessorOptions |
|
|
|
FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl", ".webp"} |
|
|
|
|
|
def get_files(path): |
|
path = Path(path) |
|
|
|
for root, dirs, files in os.walk(path): |
|
root = path / root |
|
for file in files: |
|
file = root / file |
|
if file.suffix not in FILE_EXTS: |
|
continue |
|
caption = file.with_suffix(".caption") |
|
tags = file.with_suffix(".tags") |
|
txt = file.with_suffix(".txt") |
|
|
|
if not caption.exists(): |
|
print(f"Skipping {file}: {caption} does not exist") |
|
continue |
|
if not tags.exists(): |
|
print(f"Skipping {file}: {tags} does not exist") |
|
continue |
|
|
|
yield file, caption, tags, txt |
|
|
|
|
|
class FilenameProcessor(FileProcessor): |
|
def process_content(self, content: str) -> str: |
|
|
|
name = content.replace('_24_regular_1024x1024', '') |
|
|
|
return name.replace('_', ' ').strip() |
|
|
|
|
|
class CaptionPrefixConcatenator: |
|
def __init__(self, dry_run: bool = False): |
|
self.dry_run = dry_run |
|
self.options = ProcessorOptions( |
|
recursive=False, |
|
dry_run=dry_run, |
|
file_extensions={'.caption', '.txt'} |
|
) |
|
|
|
def concat(self, caption_path: Path, tags_path: Path, txt_path: Path, source_file: Path): |
|
caption = caption_path.read_text().strip() |
|
tags = tags_path.read_text().strip(", \n") |
|
|
|
|
|
prefix = source_file.stem.replace('_24_regular_1024x1024', '').replace('_', ' ').strip() |
|
|
|
|
|
txt = f"{prefix}, {tags}, {caption}" |
|
|
|
if self.dry_run: |
|
print(f"{txt_path}:") |
|
print(txt) |
|
print() |
|
else: |
|
txt_path.write_text(txt) |
|
print(f"wrote {txt_path}") |
|
|
|
|
|
def main(): |
|
dry_run = False |
|
concatenator = CaptionPrefixConcatenator(dry_run=dry_run) |
|
|
|
for file, caption, tags, txt in get_files("."): |
|
concatenator.concat(caption, tags, txt, file) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|