code2 / sync_run.py
gallyg's picture
Update sync_run.py
82b233b verified
import os
import time
import subprocess
import threading
from huggingface_hub import HfApi, snapshot_download
TOKEN = os.getenv("HF_TOKEN")
DATASET_ID = os.getenv("DATASET_ID")
# 定义需要持久化的文件夹和文件
# 该项目主要数据在 data,配置可能在根目录的 .json 或 .env
PERSIST_FILES = ["data", "output", "config.json", "settings.json", ".env"]
api = HfApi(token=TOKEN)
def download_data():
if not DATASET_ID: return
try:
print(f"[System] 正在从 Dataset 拉取持久化数据...")
# 仅下载数据文件,不下载代码文件 (.py, .html, .js)
snapshot_download(
repo_id=DATASET_ID,
repo_type="dataset",
local_dir=".",
token=TOKEN,
allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"],
ignore_patterns=["*.py", "static/*", "templates/*", "logs/*"]
)
print("[System] 拉取完成。")
except Exception as e:
print(f"[System] 拉取失败: {e}")
def upload_data():
while True:
time.sleep(120) # 每 2 分钟备份一次
if not DATASET_ID: continue
try:
# 上传所有数据和配置文件
api.upload_folder(
folder_path=".",
repo_id=DATASET_ID,
repo_type="dataset",
allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"],
ignore_patterns=["logs/*", "__pycache__/*", "*.py", "static/*", "templates/*"],
run_as_future=True
)
print(f"[Backup] {time.strftime('%H:%M:%S')} 同步已提交至 Dataset")
except Exception as e:
if "No files have been modified" not in str(e):
print(f"[Backup] 备份出错: {e}")
if __name__ == "__main__":
os.makedirs("data", exist_ok=True)
os.makedirs("output", exist_ok=True)
download_data()
threading.Thread(target=upload_data, daemon=True).start()
print("[System] 正在启动 Web UI...")
# 强制端口 7860
subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])