import gradio as gr import os import json from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction from waifuc.export import SaveExporter, TextualInversionExporter from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource from cyberharem.dataset.crawler import crawl_dataset_to_huggingface from cyberharem.utils import get_hf_client, get_hf_fs from hbutils.system import TemporaryDirectory from cyberharem.utils import download_file as cyber_download_file from huggingface_hub import hf_hub_url, hf_hub_download def start_func(token, chars, is_cpu, udghs): if not udghs: if token: os.environ['HF_TOKEN'] = token else: return "无令牌" if is_cpu: os.environ['ONNX_MODE'] = 'CPUExecutionProvider' char_list = chars.split(',') for ch in char_list: crawl_dataset_to_huggingface(ch) print(ch + "完成") return str(chars)+" 上传完成" else: if token: os.environ['HF_TOKEN'] = token dgrepo = 'deepghs/game_characters' else: return "无令牌" if is_cpu: os.environ['ONNX_MODE'] = 'CPUExecutionProvider' with TemporaryDirectory() as jsondir: print("Downloading jsons..") hf_fs = get_hf_fs() _dgdatas = [file for file in hf_fs.glob(f'datasets/{dgrepo}/*/pixiv_characters.json')] for name in _dgdatas: os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True) # print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}') js = hf_hub_download( # f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}', # hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)), repo_id=dgrepo, repo_type='dataset', # os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), token=os.environ['HF_TOKEN'] ) # with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f: with open(js, 'r', encoding='utf-8') as f: jt = json.load(f) chs = jt['characters'] for jp in chs: jp = jp['jpname'] print(jp, 'start...') crawl_dataset_to_huggingface(jp) print(jp + "完成") return "完成" with gr.Blocks() as jblock: hf_token = gr.Textbox(label="访问令牌", interactive=True) char_list = gr.Textbox(label="角色列表", info="用,分隔", placeholder="《输入角色名然后你的数据集就出现在抱脸了》", interactive=True) is_cpu = gr.Checkbox(label="无显卡", info="不使用显卡", value=True, interactive=True) use_dghs = gr.Checkbox(label="从dghs", info="override", value=False, interactive=True) start_button = gr.Button("开始上传", interactive=True) opt_msg = gr.Textbox(interactive=False) start_button.click(start_func, [hf_token, char_list, is_cpu, use_dghs], [opt_msg], api_name="crawlup") if __name__ == "__main__": jblock.queue(max_size=64) jblock.launch(share=True) # if __name__ == "__main__": # jblock.launch(server_port=args.port)