File size: 2,760 Bytes
cff1674
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
import argparse
import logging
from pathlib import Path

def writable_dir(target_path):
    """ Check if a path is a valid directory and that it can be written to. """
    path = Path(target_path)
    if path.is_dir():
        if os.access(path, os.W_OK):
            return path
        else:
            raise argparse.ArgumentTypeError(f"Directory '{path}' is not writable.")
    else:
        raise argparse.ArgumentTypeError(f"Directory '{path}' does not exist.")
    
def main(folder_path:Path, extension:str, keywords:set=None):
    for file_name in os.listdir(folder_path):
        if file_name.endswith(extension):
            file_path = os.path.join(folder_path, file_name)
            try:
                with open(file_path, "r") as f:
                    text = f.read()
                # extract tags from text and split into a list using comma as the delimiter
                tags = [tag.strip() for tag in text.split(",")]
                # remove the specified keywords from the tags list
                if keywords:
                    tags = [tag for tag in tags if tag not in keywords]
                # remove empty or whitespace-only tags
                tags = [tag for tag in tags if tag.strip() != ""]
                # join the tags back into a comma-separated string and write back to the file
                with open(file_path, "w") as f:
                    f.write(", ".join(tags))
                logging.info(f"Processed {file_name}")
            except Exception as e:
                logging.error(f"Error processing {file_name}: {e}")

if __name__ == "__main__":
    # Set up logging
    logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')

    parser = argparse.ArgumentParser(description="Remove specified keywords from all text files in a directory.")
    parser.add_argument("folder_path", type=writable_dir, help="path to directory containing text files")
    parser.add_argument("-e", "--extension", type=str, default=".txt", help="file extension of text files to be processed (default: .txt)")
    parser.add_argument("-k", "--keywords", type=str, nargs="*", help="Optional: list of keywords to be removed from text files. If not provided, the default list will be used.")
    args = parser.parse_args()

    folder_path = args.folder_path
    extension = args.extension
    keywords = set(args.keywords) if args.keywords else set(["1girl", "solo", "blue eyes", "brown eyes", "blonde hair", "black hair", "realistic", "red lips", "lips", "artist name", "makeup", "realistic","brown hair", "dark skin", 
                "dark-skinned female", "medium breasts", "breasts", "1boy"])

    main(folder_path, extension, keywords)