AppleJupyter-test

Runtime error

App Files Files Community

AppleJupyter-test / waifu_get.py

LittleApple-fp16

Update waifu_get.py

d762fa5 11 months ago

raw

history blame

5.41 kB

	import argparse
	import os
	from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction
	from waifuc.export import SaveExporter, TextualInversionExporter
	from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
	from cyberharem.dataset.crawler import crawl_dataset_to_huggingface

	import gradio as gr
	import os
	import json
	from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction
	from waifuc.export import SaveExporter, TextualInversionExporter
	from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
	from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
	from cyberharem.utils import get_hf_client, get_hf_fs
	from hbutils.system import TemporaryDirectory
	from cyberharem.utils import download_file as cyber_download_file
	from huggingface_hub import hf_hub_url, hf_hub_download


	def start_func(chars, is_cpu, udghs, game_index=None):
	if not udghs:
	if is_cpu:
	os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
	char_list = chars.split(',')
	for ch in char_list:
	crawl_dataset_to_huggingface(ch)
	print(ch + "完成")
	return str(chars)+" 上传完成"
	else:
	dgrepo = 'deepghs/game_characters'
	if is_cpu:
	os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
	with TemporaryDirectory() as jsondir:
	print("Downloading jsons..")
	hf_fs = get_hf_fs()
	_dgdatas = [file for file in hf_fs.glob(f'datasets/{dgrepo}/*/pixiv_characters.json')]
	if game_index:
	name = _dgdatas[game_index-1]
	os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
	# print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
	js = hf_hub_download(
	# f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
	# hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
	repo_id=dgrepo, repo_type='dataset',
	# os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
	filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
	token=os.environ['HF_TOKEN']
	)
	# with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
	with open(js, 'r', encoding='utf-8') as f:
	jt = json.load(f)
	chs = jt['characters']
	for jp in chs:
	jp = jp['jpname']
	print(jp, 'start...')
	crawl_dataset_to_huggingface(jp)
	print(jp + "完成")
	else:
	for name in _dgdatas:
	os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
	# print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
	js = hf_hub_download(
	# f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
	# hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
	repo_id=dgrepo, repo_type='dataset',
	# os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
	filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
	token=os.environ['HF_TOKEN']
	)
	# with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
	with open(js, 'r', encoding='utf-8') as f:
	jt = json.load(f)
	chs = jt['characters']
	for jp in chs:
	jp = jp['jpname']
	print(jp, 'start...')
	with open(os.path.join(os.path.basename(os.path.dirname(name)), 'log.txt'), 'w') as log_f:
	print(f'{jp} is in crawl.', file=log_f)
	crawl_dataset_to_huggingface(jp)
	print(jp + "完成")
	return "完成"


	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--char', type=str, help='角色列表', default=None)
	parser.add_argument('--index', type=int, default=None)
	args = parser.parse_args()
	start_func(args.char, True, False if args.char else True, args.index)
	print("全部完成")


	if __name__ == "__main__":
	main()