AppleJupyter-test

Runtime error

App Files Files Community

LittleApple_fp16 commited on Dec 15, 2023

Commit

ca96eac

•

1 Parent(s): 2720ecc

1by1

Browse files

Files changed (3) hide show

cyberharem/dataset/crawler.py +1 -1
test.ipynb +2 -12
waifu_get.py +78 -17

cyberharem/dataset/crawler.py CHANGED Viewed

@@ -156,7 +156,7 @@ def crawl_dataset_to_huggingface(
             repository = f'AppleHarem/{get_alphabet_name(name)}'
     hf_fs = get_hf_fs()
-    if hf_fs.exists(f'{repository}/.gitattributes'):
         logging.warn(f'{repository} exists, skipped.')
         return
     origin_source = get_main_source(source, no_r18, bg_color, no_monochrome_check, drop_multi, skip_preprocess)

             repository = f'AppleHarem/{get_alphabet_name(name)}'
     hf_fs = get_hf_fs()
+    if hf_fs.exists(f'datasets/{repository}/.gitattributes'):
         logging.warn(f'{repository} exists, skipped.')
         return
     origin_source = get_main_source(source, no_r18, bg_color, no_monochrome_check, drop_multi, skip_preprocess)

test.ipynb CHANGED Viewed

@@ -1,15 +1,5 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e4b4f4a7-1514-4de7-8594-06b2611746ff",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "mkdir cyberharem && mv cyberharem.zip cyberharem/ && cd cyberharem/ && unzip cyberharem.zip && rm -f cyberharem.zip && cd ../"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -19,7 +9,7 @@
    },
    "outputs": [],
    "source": [
-    "!python waifu_get.py --char 才羽モモイ --token token"
    ]
   },
   {
@@ -59,7 +49,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.1"
   }
  },
  "nbformat": 4,

 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
    },
    "outputs": [],
    "source": [
+    "!python waifu_get.py --char abc --index 1"
    ]
   },
   {
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.10.6"
   }
  },
  "nbformat": 4,

waifu_get.py CHANGED Viewed

@@ -5,27 +5,88 @@ from waifuc.export import SaveExporter, TextualInversionExporter
 from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
 from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
 def main():
-    os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
     parser = argparse.ArgumentParser()
-    parser.add_argument('--char', type=str, help='角色列表')
-    parser.add_argument('--artist', type=str, help='画师列表')
-    parser.add_argument('--token', type=str, help='token')
-    os.environ['HF_TOKEN'] = args.token
     args = parser.parse_args()
-    if args.artist:
-        char_list = args.artist.split(',')
-        for ch in char_list:
-            crawl_dataset_to_huggingface(ch,DanbooruSource)
-            print(ch+"完成")
-        print("全部完成")
-    else:
-        char_list = args.char.split(',')
-        for ch in char_list:
-            crawl_dataset_to_huggingface(ch)
-            print(ch+"完成")
-        print("全部完成")
 if __name__ == "__main__":

 from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
 from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
+import gradio as gr
+import os
+import json
+from waifuc.action import HeadCountAction, AlignMinSizeAction, CCIPAction, ThreeStageSplitAction, ModeConvertAction, ClassFilterAction, PersonSplitAction, TaggingAction, RatingFilterAction, NoMonochromeAction, RandomFilenameAction, FirstNSelectAction, FilterSimilarAction, FileExtAction
+from waifuc.export import SaveExporter, TextualInversionExporter
+from waifuc.source import DanbooruSource, PixivSearchSource, ZerochanSource, LocalSource, GcharAutoSource
+from cyberharem.dataset.crawler import crawl_dataset_to_huggingface
+from cyberharem.utils import get_hf_client, get_hf_fs
+from hbutils.system import TemporaryDirectory
+from cyberharem.utils import download_file as cyber_download_file
+from huggingface_hub import hf_hub_url, hf_hub_download
+def start_func(chars, is_cpu, udghs, game_index=None):
+    if not udghs:
+        if is_cpu:
+            os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
+        char_list = chars.split(',')
+        for ch in char_list:
+            crawl_dataset_to_huggingface(ch)
+            print(ch + "完成")
+        return str(chars)+" 上传完成"
+    else:
+        dgrepo = 'deepghs/game_characters'
+        if is_cpu:
+            os.environ['ONNX_MODE'] = 'CPUExecutionProvider'
+        with TemporaryDirectory() as jsondir:
+            print("Downloading jsons..")
+            hf_fs = get_hf_fs()
+            _dgdatas = [file for file in hf_fs.glob(f'datasets/{dgrepo}/*/pixiv_characters.json')]
+            if game_index:
+                name = _dgdatas[game_index-1]
+                os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
+                # print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
+                js = hf_hub_download(
+                    # f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
+                    # hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
+                    repo_id=dgrepo, repo_type='dataset',
+                    # os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
+                    filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
+                    token=os.environ['HF_TOKEN']
+                )
+                # with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
+                with open(js, 'r', encoding='utf-8') as f:
+                    jt = json.load(f)
+                    chs = jt['characters']
+                    for jp in chs:
+                        jp = jp['jpname']
+                        print(jp, 'start...')
+                        crawl_dataset_to_huggingface(jp)
+                        print(jp + "完成")
+            else:
+                for name in _dgdatas:
+                    os.makedirs(os.path.basename(os.path.dirname(name)), exist_ok=True)
+                    # print(f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}')
+                    js = hf_hub_download(
+                        # f'https://huggingface.co/{dgrepo}/blob/main/{os.path.basename(os.path.dirname(name))}/{os.path.basename(name)}',
+                        # hf_hub_url(dgrepo, filename=os.path.relpath(name, dgrepo)),
+                        repo_id=dgrepo, repo_type='dataset',
+                        # os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
+                        filename=os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'),
+                        token=os.environ['HF_TOKEN']
+                    )
+                    # with open(os.path.join(os.path.basename(os.path.dirname(name)), 'pixiv_characters.json'), 'r') as f:
+                    with open(js, 'r', encoding='utf-8') as f:
+                        jt = json.load(f)
+                        chs = jt['characters']
+                        for jp in chs:
+                            jp = jp['jpname']
+                            print(jp, 'start...')
+                            crawl_dataset_to_huggingface(jp)
+                            print(jp + "完成")
+            return "完成"
 def main():
     parser = argparse.ArgumentParser()
+    parser.add_argument('--char', type=str, help='角色列表', default=None)
+    parser.add_argument('--index', type=int, default=None)
     args = parser.parse_args()
+    start_func(args.char, True, False is args.char else True, args.index)
+    print("全部完成")
 if __name__ == "__main__":