gallyg commited on
Commit
9e80337
·
verified ·
1 Parent(s): 2177a43

Update sync_run.py

Browse files
Files changed (1) hide show
  1. sync_run.py +23 -47
sync_run.py CHANGED
@@ -4,83 +4,59 @@ import subprocess
4
  import threading
5
  from huggingface_hub import HfApi, snapshot_download
6
 
7
- # 配置
8
  TOKEN = os.getenv("HF_TOKEN")
9
  DATASET_ID = os.getenv("DATASET_ID")
10
 
11
- # 定义需要监控目录和文件类型
12
- SYNC_FOLDERS = ["data", "output", "config"]
13
- SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"]
14
 
15
  api = HfApi(token=TOKEN)
16
 
17
  def download_data():
18
- """启动前:从 Dataset 下载所有备份"""
19
- if not DATASET_ID:
20
- print("[System] 警告: 未配置 DATASET_ID")
21
- return
22
  try:
23
- print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...")
24
- # 下载到当前目录允许覆盖,但排除代码文件防止版本冲突
25
  snapshot_download(
26
  repo_id=DATASET_ID,
27
  repo_type="dataset",
28
- local_dir=".",
29
  token=TOKEN,
30
- ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"]
 
31
  )
32
- print("[System] 数据拉取完成。")
33
  except Exception as e:
34
- print(f"[System] 拉取失败 (首次运行或网络问题): {e}")
35
 
36
  def upload_data():
37
- """运行中:定时将改动上传至 Dataset"""
38
  while True:
39
- # 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了)
40
- time.sleep(120)
41
- if not DATASET_ID:
42
- continue
43
-
44
  try:
45
- # 扫描当前目录下所有符合条件的文件进行上传
46
- # 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件
47
  api.upload_folder(
48
  folder_path=".",
49
  repo_id=DATASET_ID,
50
  repo_type="dataset",
51
- commit_message="Auto-backup data and configs",
52
- allow_patterns=[
53
- "data/**",
54
- "output/**",
55
- "config/**",
56
- "*.json",
57
- "*.db",
58
- "*.yaml",
59
- "*.yml",
60
- ".env"
61
- ],
62
- ignore_patterns=["logs/**", "__pycache__/**"],
63
  run_as_future=True
64
  )
65
- print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交")
66
  except Exception as e:
67
  if "No files have been modified" not in str(e):
68
  print(f"[Backup] 备份出错: {e}")
69
 
70
  if __name__ == "__main__":
71
- # 确保基础目录存在
72
- for folder in SYNC_FOLDERS:
73
- os.makedirs(folder, exist_ok=True)
74
 
75
- # 1. 启动前同步云端数据到本地
76
  download_data()
 
 
77
 
78
- # 2. 启动后台备份线程
79
- backup_thread = threading.Thread(target=upload_data, daemon=True)
80
- backup_thread.start()
81
-
82
- # 3. 启动主程序
83
- # 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860
84
- # 我们强制它运行在 7860
85
- print("[System] 正在启动 OpenAI/Codex CLI...")
86
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])
 
4
  import threading
5
  from huggingface_hub import HfApi, snapshot_download
6
 
 
7
  TOKEN = os.getenv("HF_TOKEN")
8
  DATASET_ID = os.getenv("DATASET_ID")
9
 
10
+ # 定义需要持久化文件夹和文件
11
+ # 该项目主要数据在 data,配置可能在根目录的 .json 或 .env
12
+ PERSIST_FILES = ["data", "output", "config.json", "settings.json", ".env"]
13
 
14
  api = HfApi(token=TOKEN)
15
 
16
  def download_data():
17
+ if not DATASET_ID: return
 
 
 
18
  try:
19
+ print(f"[System] 正在从 Dataset 拉取持久化数据...")
20
+ # 下载数据文件不下载代码文件 (.py, .html, .js)
21
  snapshot_download(
22
  repo_id=DATASET_ID,
23
  repo_type="dataset",
24
+ local_dir=".",
25
  token=TOKEN,
26
+ allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"],
27
+ ignore_patterns=["*.py", "static/*", "templates/*", "logs/*"]
28
  )
29
+ print("[System] 拉取完成。")
30
  except Exception as e:
31
+ print(f"[System] 拉取失败: {e}")
32
 
33
  def upload_data():
 
34
  while True:
35
+ time.sleep(120) # 每 2 分钟备份一次
36
+ if not DATASET_ID: continue
 
 
 
37
  try:
38
+ # 上传所有数据和配置文件
 
39
  api.upload_folder(
40
  folder_path=".",
41
  repo_id=DATASET_ID,
42
  repo_type="dataset",
43
+ allow_patterns=["data/*", "output/*", "*.json", "*.env", "*.db"],
44
+ ignore_patterns=["logs/*", "__pycache__/*", "*.py", "static/*", "templates/*"],
 
 
 
 
 
 
 
 
 
 
45
  run_as_future=True
46
  )
47
+ print(f"[Backup] {time.strftime('%H:%M:%S')} 同步已提交至 Dataset")
48
  except Exception as e:
49
  if "No files have been modified" not in str(e):
50
  print(f"[Backup] 备份出错: {e}")
51
 
52
  if __name__ == "__main__":
53
+ os.makedirs("data", exist_ok=True)
54
+ os.makedirs("output", exist_ok=True)
 
55
 
 
56
  download_data()
57
+
58
+ threading.Thread(target=upload_data, daemon=True).start()
59
 
60
+ print("[System] 正在启动 Web UI...")
61
+ # 强制端口 7860
 
 
 
 
 
 
62
  subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])