File size: 43,393 Bytes
cec1023
1
2
{"cells":[{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":[]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["from pathlib import Path\n","import os\n","import time\n","import re\n","import subprocess\n","import threading\n","import sys\n","import socket\n","import torch\n","from typing import List\n","import uuid"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 内置参数默认值,当上下文有参数时可覆盖默认值\n","_runing = False\n","_useFrpc = locals().get('useFrpc') or globals().get('useFrpc') or True\n","\n","_useNgrok = locals().get('useNgrok') or globals().get('useNgrok') or True\n","\n","_server_port = locals().get('server_port') or globals().get('server_port') or 7860\n","    \n","_huggingface_token = locals().get('huggingface_token') or globals().get('huggingface_token') or '{input_path}/configs/huggingface_token.txt'\n","_huggingface_token = _huggingface_token\\\n","    .replace('{sdwui}','stable-diffusion-webui')\\\n","    .replace('{wui}',\"webui\")\n","\n","show_shell_info = locals().get('hidden_console_info') or globals().get('hidden_console_info')\n","if show_shell_info is None: show_shell_info = False\n","\n","run_by_none_device = False\n","\n","_proxy_path = locals().get('proxy_path') or globals().get('proxy_path') or {}\n","\n","_config_args:dict[str, str] =  locals().get('config_args') or globals().get('config_args') or {}"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","def run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True) -> str:\n","    global show_shell_info\n","    if desc is not None:\n","        print(desc)\n","\n","    run_kwargs = {\n","        \"args\": command,\n","        \"shell\": True,\n","        \"cwd\": cwd,\n","        \"env\": os.environ if custom_env is None else custom_env,\n","        \"encoding\": 'utf8',\n","        \"errors\": 'ignore',\n","    }\n","\n","    if not show_shell_info:\n","        run_kwargs[\"stdout\"] = run_kwargs[\"stderr\"] = subprocess.PIPE\n","\n","    result = subprocess.run(**run_kwargs)\n","\n","    if result.returncode != 0:\n","        error_bits = [\n","            f\"{errdesc or 'Error running command'}.\",\n","            f\"Command: {command}\",\n","            f\"Error code: {result.returncode}\",\n","        ]\n","        if result.stdout:\n","            error_bits.append(f\"stdout: {result.stdout}\")\n","        if result.stderr:\n","            error_bits.append(f\"stderr: {result.stderr}\")\n","        if try_error:\n","            print((RuntimeError(\"\\n\".join(error_bits))))\n","        else:\n","            raise RuntimeError(\"\\n\".join(error_bits))\n","\n","    if show_shell_info:\n","        print((result.stdout or \"\"))\n","    return (result.stdout or \"\")\n","\n","def mkdirs(path, exist_ok=True):\n","    if path and not Path(path).exists():\n","        os.makedirs(path,exist_ok=exist_ok)\n","\n","\n","# 检查网络\n","def check_service(host, port):\n","    try:\n","        socket.create_connection((host, port), timeout=5)\n","        return True\n","    except socket.error:\n","        return False\n","\n","\n","# 检查gpu是否存在\n","def check_gpu():\n","    if not run_by_none_device and torch.cuda.device_count() == 0:\n","        raise Exception('当前环境没有GPU')\n","\n","\n","def echoToFile(content:str,path:str):\n","    if path.find('/') >= 0:\n","        _path = '/'.join(path.split('/')[:-1])\n","        run(f'''mkdir -p {_path}''')\n","    with open(path,'w') as sh:\n","        sh.write(content)\n","        \n","def get_freefrp_confog(local_port):\n","    rd_str = uuid.uuid1()\n","    return (f'''\n","[common]\n","server_addr = frp.freefrp.net\n","server_port = 7000\n","token = freefrp.net\n","\n","[{rd_str}_http]\n","type = http\n","local_ip = 127.0.0.1\n","local_port = {local_port}\n","custom_domains = {rd_str}.frp.eaias.com\n","''',f'http://{rd_str}.frp.eaias.com')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["\n","_install_path = f\"{os.environ['HOME']}/sd_webui\" # 安装目录\n","_output_path = '/kaggle/working' if os.path.exists('/kaggle/working/') else f\"{os.environ['HOME']}/.sdwui/Output\" # 输出目录 如果使用google云盘 会在google云盘增加sdwebui/Output\n","_input_path = '/kaggle/input' # 输入目录\n","_ui_dir_name = 'sd_main_dir'\n","\n","_install_path = locals().get('install_path') or globals().get('install_path') or _install_path\n","_output_path = locals().get('output_path') or globals().get('output_path') or _output_path\n","_input_path = locals().get('input_path') or globals().get('input_path') or _input_path\n","_ui_dir_name = locals().get('ui_dir_name') or globals().get('ui_dir_name') or _ui_dir_name\n","\n","install_path = _install_path\n","output_path = _output_path\n","input_path = _input_path\n","ui_dir_name = _ui_dir_name\n","    \n","google_drive = '' \n","\n","\n","_useGooglrDrive = locals().get('useGooglrDrive') or globals().get('useGooglrDrive') or True\n","\n","# 连接谷歌云\n","try:\n","    if _useGooglrDrive:\n","        from google.colab import drive\n","        drive.mount(f'~/google_drive')\n","        google_drive = f\"{os.environ['HOME']}/google_drive/MyDrive\"\n","        _output_path = f'{google_drive}/sdwebui/Output'\n","        _input_path = f'{google_drive}/sdwebui/Input'\n","        run(f'''mkdir -p {_input_path}''')\n","        print('''\n","已经链接到谷歌云盘\n","已在云盘创建Input和Output目录\n","        ''')\n","except:\n","    _useGooglrDrive = False\n","\n","run(f'''mkdir -p {_install_path}''')\n","run(f'''mkdir -p {_output_path}''')\n","\n","\n","os.environ['install_path'] = _install_path\n","os.environ['output_path'] = _output_path\n","os.environ['google_drive'] = google_drive\n","os.environ['input_path'] = _input_path\n","\n","def replace_path(input_str:str):\n","    if not input_str: return ''\n","    for key in _config_args:\n","        input_str = input_str.replace(key,_config_args[key])\n","        \n","    return input_str.replace('$install_path',_install_path)\\\n","    .replace('{install_path}',_install_path)\\\n","    .replace('$input_path',_input_path)\\\n","    .replace('{input_path}',_input_path)\\\n","    .replace('$output_path',_output_path)\\\n","    .replace('{output_path}',_output_path)\\\n","    .replace('{sdwui}','stable-diffusion-webui')\\\n","    .replace('{wui}',\"webui\")\n","\n","space_string = ' \\n\\r\\t\\'\\\",'\n","\n","def config_reader(conf:str):\n","    args = [replace_path(item.split('#')[0].strip(space_string)) for item in conf.split('\\n') if item.strip(space_string)]\n","    return [item.strip() for item in args if item.strip()]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"i3LhnwYHLCtC","trusted":true},"outputs":[],"source":["ngrokTokenFile = os.path.join(_input_path,'configs/ngrok_token.txt') # 非必填 存放ngrokToken的文件的路径\n","frpcConfigFile = os.path.join(_input_path,'configs/frpc_koishi.ini') # 非必填 frp 配置文件\n","# ss证书目录 下载nginx的版本,把pem格式改成crt格式\n","frpcSSLFFlies = [os.path.join(_input_path,'configs/koishi_ssl')]\n","if 'frp_ssl_dir' in locals() or 'frp_ssl_dir' in globals():\n","    frpcSSLFFlies = frpcSSLFFlies + config_reader(locals().get('frp_ssl_dir') or globals().get('frp_ssl_dir'))\n","# frpc 文件目录 如果目录不存在,会自动下载,也可以在数据集搜索 viyiviyi/utils 添加\n","frpcExePath = os.path.join(_input_path,'utils-tools/frpc')\n","# 其他需要加载的webui启动参数 写到【参数列表】这个配置去\n","\n","# 用于使用kaggle api的token文件 参考 https://www.kaggle.com/docs/api\n","# 此文件用于自动上传koishi的相关配置 也可以用于保存重要的输出文件\n","kaggleApiTokenFile = locals().get('kaggle_api_token') or globals().get('kaggle_api_token') or os.path.join(_input_path,'configs/kaggle.json')\n","\n","requirements = []\n"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"a_GtG2ayLCtD","trusted":true},"outputs":[],"source":["# 这下面的是用于初始化一些值或者环境变量的,轻易别改\n","_setting_file = replace_path(locals().get('setting_file') or globals().get('setting_file') or '/kaggle/working/configs/config.json')\n","\n","_ui_config_file = replace_path(locals().get('ui_config_file') or globals().get('ui_config_file') or '/kaggle/working/configs/ui-config.json')\n","\n","# 设置文件路径\n","if Path(f\"{os.environ['HOME']}/google_drive/MyDrive\").exists():\n","    if _setting_file == '/kaggle/working/configs/config.json':\n","        _setting_file = os.path.join(_output_path,'configs/config.json')\n","    if _ui_config_file == '/kaggle/working/configs/ui-config.json':\n","        _ui_config_file = os.path.join(_output_path,'configs/ui-config.json')\n","    \n","frpcStartArg = ''\n","freefrp_url = ''\n","_frp_temp_config_file = ''\n","_frp_config_or_file = replace_path(locals().get('frp_config_or_file') or globals().get('frp_config_or_file')) or frpcConfigFile\n","run(f'''mkdir -p {_install_path}/configFiles''')\n","if _frp_config_or_file:\n","    if '[common]' in _frp_config_or_file:\n","        echoToFile(_frp_config_or_file,f'{_install_path}/configFiles/temp_frpc_webui.ini')\n","        _frp_temp_config_file = f'{_install_path}/configFiles/temp_frpc_webui.ini'\n","    elif '.ini' in _frp_config_or_file:\n","        _frp_temp_config_file = _frp_config_or_file.strip()\n","        \n","    if _frp_temp_config_file:\n","        if Path(_frp_temp_config_file).exists():\n","            run(f'''cp -f {_frp_temp_config_file} {_install_path}/configFiles/frpc_webui.ini''')\n","            run(f'''sed -i \"s/local_port = .*/local_port = {_server_port}/g\" {_install_path}/configFiles/frpc_webui.ini''')\n","            frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n","    elif _frp_config_or_file.strip().startswith('-f'):\n","        frpcStartArg = _frp_config_or_file.strip()\n","        \n","if not frpcStartArg:\n","    conf,url = get_freefrp_confog(_server_port)\n","    echoToFile(conf,f'{_install_path}/configFiles/frpc_webui.ini')\n","    freefrp_url = url\n","    frpcStartArg = f' -c {_install_path}/configFiles/frpc_webui.ini'\n","\n","ngrokToken=''\n","_ngrok_config_or_file = replace_path(locals().get('ngrok_config_or_file') or globals().get('ngrok_config_or_file')) or ngrokTokenFile\n","if _ngrok_config_or_file:\n","    if Path(_ngrok_config_or_file.strip()).exists():\n","        ngrokTokenFile = _ngrok_config_or_file.strip()\n","    if Path(ngrokTokenFile).exists():\n","        with open(ngrokTokenFile,encoding = \"utf-8\") as nkfile:\n","            ngrokToken = nkfile.readline()\n","    elif not _ngrok_config_or_file.strip().startswith('/'):\n","        ngrokToken=_ngrok_config_or_file.strip()\n","    \n","huggingface_headers:dict = None  "]},{"cell_type":"markdown","metadata":{},"source":["## 文件下载工具\n","\n","---\n","\n","link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',sync:bool=False,thread_num:int=None)"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import concurrent.futures\n","import importlib\n","import os\n","import pprint\n","import re\n","from pathlib import Path\n","from typing import List\n","\n","import requests\n","\n","show_shell_info = False\n","\n","def is_installed(package):\n","    try:\n","        spec = importlib.util.find_spec(package)\n","    except ModuleNotFoundError:\n","        return False\n","\n","    return spec is not None\n","\n","def download_file(url:str, filename:str, dist_path:str, cache_path = '',_link_instead_of_copy:bool=True,headers={}):\n","    # 获取文件的真实文件名\n","    if not filename:\n","        with requests.get(url, stream=True,headers=headers) as r:\n","            if 'Content-Disposition' in r.headers:\n","                filename = r.headers['Content-Disposition'].split('filename=')[1].strip('\"')\n","            r.close()\n","    if not filename and re.search(r'/[^/]+\\.[^/]+$',url):\n","        filename = url.split('/')[-1].split('?')[0]\n","    \n","    filename = re.sub(r'[\\\\/:*?\"<>|;]', '', filename)\n","    filename = re.sub(r'[\\s\\t]+', '_', filename)\n","    \n","    print(f'下载 {filename} url: {url} --> {dist_path}')\n","    \n","    # 创建目录\n","    if cache_path and not Path(cache_path).exists():\n","        os.makedirs(cache_path,exist_ok=True)\n","    if dist_path and not Path(dist_path).exists():\n","        os.makedirs(dist_path,exist_ok=True)\n","        \n","    # 拼接文件的完整路径\n","    filepath = os.path.join(dist_path, filename)\n","\n","    if cache_path:\n","        cache_path = os.path.join(cache_path, filename)\n","        \n","    # 判断文件是否已存在\n","    if Path(filepath).exists():\n","        print(f'文件 {filename} 已存在 {dist_path}')\n","        return\n","    \n","    if cache_path and Path(cache_path).exists():\n","        run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n","        print(f'文件缓存 {cache_path} --> {dist_path}')\n","        return\n","    # 下载文件\n","    with requests.get(url, stream=True, headers=headers) as r:\n","        r.raise_for_status()\n","        with open(cache_path or filepath, 'wb') as f:\n","            for chunk in r.iter_content(chunk_size=1024):\n","                if chunk:\n","                    f.write(chunk)\n","    # 如果使用了缓存目录 需要复制或链接文件到目标目录\n","    if cache_path:\n","        run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {cache_path} {dist_path}')\n","    print(f'下载完成 {filename} --> {dist_path}')\n","        \n","def download_git(url, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n","    if not Path(dist_path).exists():\n","        os.makedirs(dist_path,exist_ok=True)\n","    if show_shell_info:\n","        print(f'git 下载 {url} --> {dist_path}')\n","    if cache_path and not Path(cache_path).exists():\n","        os.makedirs(cache_path,exist_ok=True)\n","        run(f'git clone {url}',cwd = cache_path)\n","    if cache_path:\n","        run(f'cp -n -r -f {cache_path}/* {dist_path}')\n","    else:\n","        run(f'git clone {url}',cwd = dist_path)\n","    if show_shell_info:\n","        print(f'git 下载完成 {url} --> {dist_path}')\n","    \n","    \n","def download_huggingface(url:str, filename:str, dist_path, cache_path = '',_link_instead_of_copy:bool=True):\n","    fileReg = r'^https:\\/\\/huggingface.co(\\/([^\\/]+\\/)?[^\\/]+\\/[^\\/]+\\/(resolve|blob)\\/[^\\/]+\\/|[^\\.]+\\.[^\\.]+$|download=true)'\n","    def isFile(url:str):\n","        if re.match(fileReg,url):\n","            return True\n","        return False\n","    if isFile(url):\n","        download_file(url,filename,dist_path,cache_path,_link_instead_of_copy,headers=huggingface_headers)\n","    else:\n","        download_git(url,dist_path,cache_path,_link_instead_of_copy)\n","    \n","# 加入文件到下载列表\n","def pause_url(url:str,dist_path:str):\n","    file_name = ''\n","    if re.match(r'^[^:]+:(https?|ftps?)://', url, flags=0):\n","        file_name = re.findall(r'^[^:]+:',url)[0][:-1]\n","        url = url[len(file_name)+1:]\n","    if not re.match(r'^(https?|ftps?)://',url):\n","        return\n","    file_name = re.sub(r'\\s+','_',file_name or '')\n","    path_hash = str(hash(url)).replace('-','')\n","    \n","    return {'file_name':file_name,'path_hash':path_hash,'url':url,'dist_path':dist_path}\n","\n","def download_urls(download_list:List[dict],sync:bool=False,thread_num:int=5, \n","                  cache_path:str=os.path.join(os.environ['HOME'],'.cache','download_util'),\n","                  _link_instead_of_copy:bool=True,is_await:bool=False):\n","    if sync:\n","        for conf in download_list:\n","            cache_dir = os.path.join(cache_path,conf['path_hash'])\n","            if conf['url'].startswith('https://github.com'):\n","                download_git(conf['url'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n","                continue\n","            if conf['url'].startswith('https://huggingface.co'):\n","                download_huggingface(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n","                continue\n","            if conf['url'].startswith('https://civitai.com'):\n","                if not re.search(r'token=.+', conf['url']):\n","                    if conf['url'].find('?') == -1:\n","                        conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n","                    else:\n","                        conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n","            download_file(conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy)\n","    else:\n","        executor = concurrent.futures.ThreadPoolExecutor(max_workers=thread_num)\n","        futures = []\n","        for conf in download_list:\n","            cache_dir = os.path.join(cache_path,conf['path_hash'])\n","            if conf['url'].startswith('https://github.com'):\n","                futures.append(executor.submit(download_git, conf['url'],conf['dist_path'],\n","                                                cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n","                continue\n","            if conf['url'].startswith('https://huggingface.co'):\n","                futures.append(executor.submit(download_huggingface,conf['url'],conf['file_name'],conf['dist_path'],cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n","                continue\n","            if conf['url'].startswith('https://civitai.com'):\n","                if not re.search(r'token=.+', conf['url']):\n","                    if conf['url'].find('?') == -1:\n","                        conf['url'] = conf['url']+'?token=fee8bb78b75566eddfd04d061996185c'\n","                    else:\n","                        conf['url'] = conf['url']+'&token=fee8bb78b75566eddfd04d061996185c'\n","            futures.append(executor.submit(download_file, conf['url'],conf['file_name'],conf['dist_path'],\n","                                            cache_path=cache_dir,_link_instead_of_copy=_link_instead_of_copy))\n","        if is_await:\n","            concurrent.futures.wait(futures)\n","            \n","                          \n","def parse_config(config:str):\n","    space_string = ' \\n\\r\\t\\'\\\",'\n","    other_flie_list = [item.split('#')[0].strip(space_string) for item in config.split('\\n') if item.strip(space_string)]\n","    other_flie_list = [item.strip() for item in other_flie_list if item.strip()]\n","    other_flie_list_store = {}\n","    other_flie_list_store_name='default'\n","    other_flie_list_store_list_cache=[]\n","    \n","    for item in other_flie_list:\n","        if item.startswith('[') and item.endswith(']'):\n","            if not other_flie_list_store_name == 'default':\n","                other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n","                other_flie_list_store_list_cache = []\n","            other_flie_list_store_name = item[1:-1]\n","        else:\n","            other_flie_list_store_list_cache.append(item)\n","    other_flie_list_store[other_flie_list_store_name]=other_flie_list_store_list_cache\n","    \n","    return other_flie_list_store\n","\n","\n","def link_or_download_flie(config:str, skip_url:bool=False, _link_instead_of_copy:bool=True, base_path:str = '',\n","                          sync:bool=False,thread_num:int=None, is_await:bool=False):\n","    store:dict[str,List[str]] = parse_config(config)\n","    download_list = []\n","    for dist_dir in store.keys():\n","        dist_path = os.path.join(base_path,dist_dir)\n","        os.makedirs(dist_path,exist_ok=True)\n","        for path in store[dist_dir]:\n","            if 'https://' in path or 'http://' in path:\n","                if skip_url:\n","                    continue\n","                if sync:\n","                    download_urls([pause_url(path,dist_path)],_link_instead_of_copy = _link_instead_of_copy, sync=sync)\n","                    continue\n","                download_list.append(pause_url(path,dist_path))\n","            else:\n","                run(f'cp -n -r -f {\"-s\" if _link_instead_of_copy else \"\"} {path} {dist_path}')\n","                if show_shell_info:\n","                    print(f'{\"链接\" if _link_instead_of_copy else \"复制\"} {path} --> {dist_path}')\n","        run(f'rm -f {dist_path}/\\*.* ')\n","    if not skip_url:\n","        if show_shell_info:\n","            pprint.pprint(download_list)\n","        download_urls(download_list,_link_instead_of_copy = _link_instead_of_copy, sync=sync, thread_num=thread_num or 3,is_await=is_await)"]},{"cell_type":"markdown","metadata":{"id":"p0uS-BLULCtD"},"source":["## kaggle public API\n","\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"id":"m8FJi4j0LCtD","trusted":true},"outputs":[],"source":["# 安装kaggle的api token文件\n","def initKaggleConfig():\n","    if Path('~/.kaggle/kaggle.json').exists():\n","        return True\n","    if Path(kaggleApiTokenFile).exists():\n","        run(f'''mkdir -p ~/.kaggle/''')\n","        run('cp '+kaggleApiTokenFile+' ~/.kaggle/kaggle.json')\n","        run(f'''chmod 600 ~/.kaggle/kaggle.json''')\n","        return True\n","    print('缺少kaggle的apiToken文件,访问:https://www.kaggle.com/你的kaggle用户名/account 获取')\n","    return False\n","\n","def getUserName():\n","    if not initKaggleConfig(): return\n","    import kaggle\n","    return kaggle.KaggleApi().read_config_file()['username']\n","\n","def createOrUpdateDataSet(path:str,datasetName:str):\n","    if not initKaggleConfig(): return\n","    print('创建或更新数据集 '+datasetName)\n","    import kaggle\n","    run(f'mkdir -p {_install_path}/kaggle_cache')\n","    run(f'rm -rf {_install_path}/kaggle_cache/*')\n","    datasetDirPath = _install_path+'/kaggle_cache/'+datasetName\n","    run('mkdir -p '+datasetDirPath)\n","    run('cp -f '+path+' '+datasetDirPath+'/')\n","    username = getUserName()\n","    print(\"kaggle username:\"+username)\n","    datasetPath = username+'/'+datasetName\n","    datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n","    print(datasetList)\n","    if len(datasetList) == 0 or datasetPath not in [str(d) for d in datasetList]: # 创建 create\n","        run('kaggle datasets init -p' + datasetDirPath)\n","        metadataFile = datasetDirPath+'/dataset-metadata.json'\n","        run('sed -i s/INSERT_TITLE_HERE/'+ datasetName + '/g ' + metadataFile)\n","        run('sed -i s/INSERT_SLUG_HERE/'+ datasetName + '/g ' + metadataFile)\n","        run('cat '+metadataFile)\n","        run('kaggle datasets create -p '+datasetDirPath)\n","        print('create database done')\n","    else:\n","        kaggle.api.dataset_metadata(datasetPath,datasetDirPath)\n","        kaggle.api.dataset_create_version(datasetDirPath, 'auto update',dir_mode='zip')\n","        print('upload database done')\n","\n","def downloadDatasetFiles(datasetName:str,outputPath:str):\n","    if not initKaggleConfig(): return\n","    print('下载数据集文件 '+datasetName)\n","    import kaggle\n","    username = getUserName()\n","    datasetPath = username+'/'+datasetName\n","    datasetList = kaggle.api.dataset_list(mine=True,search=datasetPath)\n","    if datasetPath not in [str(d) for d in datasetList]:\n","        return False\n","    run('mkdir -p '+outputPath)\n","    kaggle.api.dataset_download_files(datasetPath,path=outputPath,unzip=True)\n","    return True\n","\n"]},{"cell_type":"markdown","metadata":{},"source":["## 同步文件夹到 huggingface\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["# 文件夹与 huggingface 同步\n","if _huggingface_token:\n","    if not is_installed('watchdog'):\n","        requirements.append('watchdog')\n","    if not is_installed('huggingface_hub'):\n","        requirements.append('huggingface_hub')\n","    else:\n","        try:\n","            from huggingface_hub  import HfApi,login,snapshot_download\n","        except:\n","            requirements.append('huggingface_hub')\n","\n","huggingface_is_init = False\n","\n","def init_huggingface():\n","    if not _huggingface_token:\n","        return False\n","\n","    global huggingface_headers\n","    global huggingface_is_init\n","    \n","    from huggingface_hub  import login\n","    token = replace_path(_huggingface_token)\n","    if not _huggingface_token.startswith('hf_') and Path(token).exists():\n","        with open(token,encoding = \"utf-8\") as nkfile:\n","            token = nkfile.readline()\n","    if not token.startswith('hf_'):\n","        print('huggingface token 不正确,请将 token 或 仅存放token 的txt文件路径填入 _huggingface_token 配置')\n","        return False\n","    login(token,add_to_git_credential=True)\n","    huggingface_headers = {'Authorization': 'Bearer '+token}\n","    print('huggingface token 已经加载,可以下载私有仓库或文件')\n","    \n","    huggingface_is_init = True\n","    return True\n","\n","\n","def start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg']):\n","    if not huggingface_is_init:\n","        print('huggingface 相关功能未初始化 请调用 init_huggingface() 初始化')\n","    \n","    if not directory_to_watch:\n","        print('请指定需要同步的本地目录 directory_to_watch')\n","        return\n","    if not Path(directory_to_watch).exists():\n","        run(f'mkdir -p {directory_to_watch}')\n","    from watchdog.observers import Observer\n","    from watchdog.events import FileSystemEventHandler\n","    from huggingface_hub  import HfApi,login,snapshot_download\n","    \n","    # 配置监视的目录和 Hugging Face 仓库信息\n","    class FileChangeHandler(FileSystemEventHandler):\n","        def __init__(self, api, repo_id, repo_type,directory_to_watch):\n","            self.api = api\n","            self.repo_id = repo_id\n","            self.repo_type = repo_type\n","            self.directory_to_watch = directory_to_watch\n","        def on_created(self, event):\n","            if not event.is_directory:\n","                # 上传新文件到 Hugging Face 仓库\n","                file_path = event.src_path\n","                file_name:str = os.path.basename(file_path)\n","                print(file_name)\n","                if file_name[file_name.rindex('.'):] not in file_types: return\n","                print(file_name,'>>','huggingface')\n","                try:\n","                    self.api.upload_file(\n","                        path_or_fileobj=file_path,\n","                        path_in_repo=file_path.replace(self.directory_to_watch,''),\n","                        repo_id=self.repo_id,\n","                        repo_type=self.repo_type,\n","                    )\n","                except IOError as error:\n","                    print(error)\n","\n","        def on_deleted(self, event):\n","            if not event.is_directory:\n","                # 从 Hugging Face 仓库删除文件\n","                file_path = event.src_path\n","                file_name = os.path.basename(file_path)\n","                if file_name[file_name.rindex('.'):] not in file_types: return\n","                try:\n","                    self.api.delete_file(\n","                        path_in_repo=file_path.replace(self.directory_to_watch,''),\n","                        repo_id=self.repo_id,\n","                        repo_type=self.repo_type,\n","                        )\n","                except IOError as error:\n","                    print(error)\n","\n","        def on_modified(self, event):\n","            if not event.is_directory:\n","                # 更新 Hugging Face 仓库中的文件\n","                file_path = event.src_path\n","                file_name = os.path.basename(file_path)\n","                if file_name[file_name.rindex('.'):] not in ['.png','.jpg','.txt','.webp','.jpeg']: return\n","                try:\n","                    self.api.upload_file(\n","                        path_or_fileobj=file_path,\n","                        path_in_repo=file_path.replace(self.directory_to_watch,''),\n","                        repo_id=self.repo_id,\n","                        repo_type=self.repo_type,\n","                    )\n","                except IOError as error:\n","                    print(error)\n","\n","        def on_moved(self, event):\n","            if not event.is_directory:\n","                file_path = event.dest_path\n","                file_name = os.path.basename(file_path)\n","                if file_name[file_name.rindex('.'):] not in file_types: return\n","                if event.dest_path.startswith(self.directory_to_watch):\n","                    try:\n","                        self.api.upload_file(\n","                            path_or_fileobj=file_path,\n","                            path_in_repo=file_path.replace(self.directory_to_watch,''),\n","                            repo_id=self.repo_id,\n","                            repo_type=self.repo_type,\n","                        )\n","                    except IOError as error:\n","                        print(error)\n","\n","    api = HfApi()\n","    \n","    # 创建观察者对象并注册文件变化处理程序\n","    event_handler = FileChangeHandler(api,repo_id,repo_type,directory_to_watch)\n","    observer = Observer()\n","    observer.schedule(event_handler, directory_to_watch, recursive=True)\n","\n","    # 启动观察者\n","    observer.name = \"solo_directory_to_watch\"\n","    print(f'启动目录同步,{directory_to_watch} 将自动同步到 huggingface {repo_type} : {repo_id}')\n","    observer.start()"]},{"cell_type":"markdown","metadata":{"id":"sswa04veLCtE"},"source":["## 工具函数\n","**不能使用%cd这种会改变当前工作目录的命令,会导致和其他线程冲突**\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["\n","def zipPath(path:str,zipName:str,format='tar'):\n","    if path.startswith('$install_path'):\n","        path = path.replace('$install_path',_install_path)\n","    if path.startswith('$output_path'):\n","        path = path.replace('$install_path',_output_path)\n","    if not path.startswith('/'):\n","        path = f'{_install_path}/{_ui_dir_name}/{path}'\n","    if Path(path).exists():\n","        if 'tar' == format:\n","            run(f'tar -cf {_output_path}/'+ zipName +'.tar -C '+ path +' . ')\n","        elif 'gz' == format:\n","            run(f'tar -czf {_output_path}/'+ zipName +'.tar.gz -C '+ path +' . ')\n","        return\n","    print('指定的目录不存在:'+path)\n"]},{"cell_type":"markdown","metadata":{},"source":["## 内网穿透\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"id":"coqQvTSLLCtE","trusted":true},"outputs":[],"source":["def printUrl(url,name=''):\n","    print(f'{name} 访问地址:{url}')\n","    for key in sorted(_proxy_path.keys(), key=len)[::-1]:\n","        print(f'{name} 本地服务:{_proxy_path[key]}  访问地址:{url}{key}')\n","# ngrok\n","def startNgrok(ngrokToken:str,ngrokLocalPort:int):\n","    if not is_installed('pyngrok'):\n","        run('pip install pyngrok')\n","    from pyngrok import conf, ngrok\n","    try:\n","        conf.get_default().auth_token = ngrokToken\n","        conf.get_default().monitor_thread = False\n","        ssh_tunnels = ngrok.get_tunnels(conf.get_default())\n","        url = ''\n","        if len(ssh_tunnels) == 0:\n","            ssh_tunnel = ngrok.connect(ngrokLocalPort)\n","            url = ssh_tunnel.public_url\n","            print('ngrok 访问地址:'+ssh_tunnel.public_url)\n","        else:\n","            print('ngrok 访问地址:'+ssh_tunnels[0].public_url)\n","            url = ssh_tunnels[0].public_url\n","        printUrl(url,'ngrok')\n","        def auto_request_ngrok():\n","            if url:\n","                while(_runing):\n","                    time.sleep(60*1)\n","                    try:\n","                        res = requests.get(url+'/',headers={\"ngrok-skip-browser-warning\" : \"1\"},timeout=10)\n","                    except:\n","                        ''\n","                    # print('自动调用ngrok链接以保存链接不会断开',res.status_code)\n","\n","        # threading.Thread(target = auto_request_ngrok,daemon=True,name='solo_auto_request_ngrok').start()\n","    except:\n","        print('启动ngrok出错')\n","        \n","def load_frpc_config(conf_or_file,dost_port=None):\n","    frpcStartArg = ''\n","    _frp_temp_config_file = ''\n","    _frp_config_or_file = conf_or_file\n","    run(f'''mkdir -p {_install_path}/configFiles''')\n","    if _frp_config_or_file:\n","        if '[common]' in _frp_config_or_file:\n","            echoToFile(_frp_config_or_file,f'{_install_path}/configFiles/temp_frpc_{dost_port or _server_port}.ini')\n","            _frp_temp_config_file = f'{_install_path}/configFiles/temp_frpc_{dost_port or _server_port}.ini'\n","        elif '.ini' in _frp_config_or_file:\n","            _frp_temp_config_file = _frp_config_or_file.strip()\n","            \n","        if _frp_temp_config_file:\n","            if Path(_frp_temp_config_file).exists():\n","                run(f'''cp -f {_frp_temp_config_file} {_install_path}/configFiles/frpc_{dost_port or _server_port}.ini''')\n","                run(f'''sed -i \"s/local_port = .*/local_port = {dost_port or _server_port}/g\" {_install_path}/configFiles/frpc_{dost_port or _server_port}.ini''')\n","                frpcStartArg = f' -c {_install_path}/configFiles/frpc_{dost_port or _server_port}.ini'\n","        elif _frp_config_or_file.strip().startswith('-f'):\n","            frpcStartArg = _frp_config_or_file.strip()\n","    return frpcStartArg\n","\n","def startFrpc(name,configFile):\n","    if not Path(f'{_install_path}/frpc/frpc').exists():\n","        installFrpExe()\n","    if freefrp_url:\n","        printUrl(freefrp_url,'freefrp')\n","    echoToFile(f'''\n","cd {_install_path}/frpc/\n","{_install_path}/frpc/frpc {configFile}\n","''',f'{_install_path}/frpc/start.sh')\n","    get_ipython().system(f'''bash {_install_path}/frpc/start.sh''')\n","        \n","def installFrpExe():\n","    if _useFrpc:\n","        print('安装frpc')\n","        run(f'mkdir -p {_install_path}/frpc')\n","        if Path(frpcExePath).exists():\n","            run(f'cp -f -n {frpcExePath} {_install_path}/frpc/frpc')\n","        else:\n","            run(f'wget \"https://huggingface.co/datasets/ACCA225/Frp/resolve/main/frpc\" -O {_install_path}/frpc/frpc')\n","        \n","        for ssl in frpcSSLFFlies:\n","            if Path(ssl).exists():\n","                run(f'cp -f -n {ssl}/* {_install_path}/frpc/')\n","        run(f'chmod +x {_install_path}/frpc/frpc')\n","        run(f'{_install_path}/frpc/frpc -v')\n","\n","def startProxy():\n","    if _useNgrok:\n","        startNgrok(ngrokToken,_server_port)\n","    if _useFrpc:\n","        startFrpc('frpc_proxy',frpcStartArg)"]},{"cell_type":"markdown","metadata":{},"source":["## NGINX 反向代理\n","\n","---"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"_kg_hide-output":true,"trusted":true},"outputs":[],"source":["\n","# nginx 反向代理配置文件\n","def localProxy():\n","    def getProxyLocation(subPath:str, localServer:str):\n","        return '''\n","    location '''+ subPath +'''\n","    {\n","        proxy_pass '''+ localServer +''';\n","        proxy_set_header Host $host;\n","        proxy_set_header X-Real-IP $remote_addr;\n","        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;\n","        proxy_set_header REMOTE-HOST $remote_addr;\n","        proxy_set_header   Upgrade $http_upgrade;\n","        proxy_set_header   Connection upgrade;\n","        proxy_http_version 1.1;\n","        proxy_connect_timeout 10m;\n","        proxy_read_timeout 10m;\n","    }\n","    \n","    '''\n","    \n","    conf = '''\n","server\n","{\n","    listen '''+str(_server_port)+''';\n","    listen [::]:'''+str(_server_port)+''';\n","    server_name 127.0.0.1 localhost 0.0.0.0 \"\";\n","    \n","    if ($request_method = OPTIONS) {\n","        return 200;\n","    }\n","    fastcgi_send_timeout                 10m;\n","    fastcgi_read_timeout                 10m;\n","    fastcgi_connect_timeout              10m;\n","    \n","    '''+ ''.join([getProxyLocation(key,_proxy_path[key]) for key in sorted(_proxy_path.keys(), key=len)[::-1]]) +'''\n","}\n","'''\n","    echoToFile(conf,'/etc/nginx/conf.d/proxy_nginx.conf')\n","    if not check_service('localhost',_server_port):\n","        run(f'''nginx -c /etc/nginx/nginx.conf''')\n","    run(f'''nginx -s reload''')"]},{"cell_type":"markdown","metadata":{},"source":["## 线程清理工具\n","\n","---\n","\n","清理线程名以 solo_ 开头的所有线程"]},{"cell_type":"code","execution_count":null,"metadata":{"_kg_hide-input":true,"trusted":true},"outputs":[],"source":["import inspect\n","import ctypes\n","\n","def _async_raise(tid, exctype):\n","    \"\"\"raises the exception, performs cleanup if needed\"\"\"\n","    tid = ctypes.c_long(tid)\n","    if not inspect.isclass(exctype):\n","        exctype = type(exctype)\n","    res = ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, ctypes.py_object(exctype))\n","    if res == 0:\n","        raise ValueError(\"invalid thread id\")\n","    elif res != 1:\n","        # \"\"\"if it returns a number greater than one, you're in trouble,\n","        # and you should call it again with exc=NULL to revert the effect\"\"\"\n","        ctypes.pythonapi.PyThreadState_SetAsyncExc(tid, None)\n","        raise SystemError(\"PyThreadState_SetAsyncExc failed\")\n","\n","def stop_thread(thread):\n","    _async_raise(thread.ident, SystemExit)\n","\n","def stop_solo_threads():\n","    global _runing\n","    _runing = False\n","    # 获取当前所有活动的线程\n","    threads = threading.enumerate()\n","    # 关闭之前创建的子线程\n","    for thread in threads:\n","        if thread.name.startswith('solo_'):\n","            print(f'结束线程:{thread.name}')\n","            try:\n","                stop_thread(thread)\n","            except socket.error:\n","                print(f'结束线程:{thread.name} 执行失败')\n","                "]},{"cell_type":"code","execution_count":null,"metadata":{},"outputs":[],"source":["docs = '''\n","# 配置项\n","server_port int 本地服务端口,穿透使用的端口\n","huggingface_token string huggingface token,用于同步文件或者下载需要登录的文件\n","hidden_console_info bool 是否隐藏控制台信息\n","proxy_path {string:string} 将哪个路径映射到哪个服务,格式为:{'/':'http://127.0.0.1:5000/'}\n","kaggle_api_token string kaggle api token,用于访问kaggle的api\n","ngrok_config_or_file ngrok的token文件内容或者放token的文件的路径\n","frp_config_or_file frp的配置内容或者配置文件的路径\n","frp_ssl_dir frp的https证书存放目录\n","#  工具函数\n","执行cmd命令:\n","run(command, cwd=None, desc=None, errdesc=None, custom_env=None,try_error:bool=True)\n","检查网络是否可以访问\n","check_service(host, port)\n","检查gpu是否存在\n","check_gpu()\n","写入文本到文件\n","echoToFile(content:str,path:str)\n","获取一个免费frp配置\n","get_freefrp_confog(local_port)\n","\n","下载文件\n","link_or_download_flie(config:str, \n","skip_url:bool=False, \n","_link_instead_of_copy:bool=True, \n","base_path:str = '',\n","sync:bool=False, # 同步且按顺序下载\n","thread_num:int=None, \n","is_await:bool=False) # 异步不按顺序但等待下载完成\n","---- 下载文件功能的配置的格式\n","[目标目录] # 中括号必须写\n","下载链接1\n","名称:下载链接2 # 链接前面的名称可以用于对文件重命名\n","# 备注 井号后面的内容会被忽略\n","可以下载git仓库\n","可以下载huggingfacec仓库或者文件,会使用 huggingface_token 作为凭证下载\n","可以下载其他可以直接下载的互联网资源\n","可以作为文件链接功能,将某个目录或目录下的全部文件链接到目标目录\n","---- 下载文件功能结束结束\n","\n","创建kaggle数据集\n","createOrUpdateDataSet(path:str,datasetName:str)\n","下载kaggle数据集\n","downloadDatasetFiles(datasetName:str,outputPath:str)\n","同步目录到huggingface,可指定同步的文件类型\n","start_sync_log_to_huggingface(repo_id:str,directory_to_watch,repo_type='dataset',file_types=['.png','.jpg','.txt','.webp','.jpeg'])\n","压缩指定目录到指定位置\n","zipPath(path:str,zipName:str,format='tar') # 可用tar或gz\n","停止后台线程 # 仅停止线程名称前缀为 solo_ 的线程\n","stop_solo_threads()\n","'''\n","def get_docs():\n","    print(docs)\n","    \n","print('可以执行get_docs()查看文档')"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["if not (True if os.getenv('IS_INSTALL_NGINX','False') == 'True' else False):\n","    run('git lfs install')\n","    run('git config --global credential.helper store')\n","    run('sudo apt update -y')\n","    run('sudo apt install nginx -y')\n","    os.environ['IS_INSTALL_NGINX'] = 'True'\n","stop_solo_threads()\n","time.sleep(2)\n","threading.Thread(target = startProxy, daemon=True, name='solo_startProxy').start()\n","localProxy()\n","init_huggingface()"]}],"metadata":{"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[{"datasetId":2716934,"sourceId":6167400,"sourceType":"datasetVersion"},{"datasetId":3654544,"sourceId":6346544,"sourceType":"datasetVersion"},{"datasetId":2962375,"sourceId":6720235,"sourceType":"datasetVersion"},{"datasetId":3074484,"sourceId":6817788,"sourceType":"datasetVersion"}],"isGpuEnabled":true,"isInternetEnabled":true,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}