countclaw commited on
Commit
6dedae0
·
verified ·
1 Parent(s): 29c0b7d

Update sync_run.py

Browse files
Files changed (1) hide show
  1. sync_run.py +119 -63
sync_run.py CHANGED
@@ -1,86 +1,142 @@
 
 
1
  import os
2
- import time
3
  import subprocess
4
  import threading
 
 
 
5
  from huggingface_hub import HfApi, snapshot_download
6
 
7
- # 配置
8
- TOKEN = os.getenv("HF_TOKEN")
9
- DATASET_ID = os.getenv("DATASET_ID")
10
 
11
- # 定义需要监控的目录和文件类型
12
- SYNC_FOLDERS = ["data", "output", "config"]
13
- SYNC_EXTENSIONS = [".json", ".db", ".yaml", ".yml", ".txt", ".env"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- api = HfApi(token=TOKEN)
16
 
17
- def download_data():
18
- """启动前:从 Dataset 下载所有备份"""
 
 
 
 
19
  if not DATASET_ID:
20
- print("[System] 警告: 未配置 DATASET_ID")
21
  return
 
22
  try:
23
- print(f"[System] 正在从 Dataset ({DATASET_ID}) 拉取持久化数据...")
24
- # 下载到当前目录,允许覆盖,但排除代码文件防止版本冲突
25
  snapshot_download(
26
  repo_id=DATASET_ID,
27
  repo_type="dataset",
28
- local_dir=".",
29
- token=TOKEN,
30
- ignore_patterns=["*.py", "Dockerfile", "requirements.txt", "logs/*", ".git/*"]
 
31
  )
32
- print("[System] 数据拉取完成。")
33
- except Exception as e:
34
- print(f"[System] 拉取失败 (首次运行或网络问题): {e}")
 
35
 
36
- def upload_data():
37
- """运行中:定时将改动上传至 Dataset"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  while True:
39
- # 每 2 分钟备份一次(缩短时间,防止改完密码还没备份就休眠了)
40
- time.sleep(1800)
41
- if not DATASET_ID:
42
- continue
43
-
44
- try:
45
- # 扫描当前目录下所有符合条件的文件进行上传
46
- # 我们直接上传整个根目录,但通过 allow_patterns 过滤出我们要的文件
47
- api.upload_folder(
48
- folder_path=".",
49
- repo_id=DATASET_ID,
50
- repo_type="dataset",
51
- commit_message="Auto-backup data and configs",
52
- allow_patterns=[
53
- "data/**",
54
- "output/**",
55
- "config/**",
56
- "*.json",
57
- "*.db",
58
- "*.yaml",
59
- "*.yml",
60
- ".env"
61
- ],
62
- ignore_patterns=["logs/**", "__pycache__/**"],
63
- run_as_future=True
64
- )
65
- print(f"[Backup] {time.strftime('%H:%M:%S')} 备份同步任务已提交")
66
- except Exception as e:
67
- if "No files have been modified" not in str(e):
68
- print(f"[Backup] 备份出错: {e}")
69
 
70
- if __name__ == "__main__":
71
- # 确保基础目录存在
72
- for folder in SYNC_FOLDERS:
73
- os.makedirs(folder, exist_ok=True)
74
 
75
- # 1. 启动前同步云端数据到本地
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  download_data()
 
77
 
78
- # 2. 启动后台备份线程
79
- backup_thread = threading.Thread(target=upload_data, daemon=True)
80
  backup_thread.start()
81
 
82
- # 3. 启动主程序
83
- # 注意:根据你的日志,程序默认端口是 8000,但 HF 要求 7860
84
- # 我们强制它运行在 7860
85
- print("[System] 正在启动 OpenAI/Codex CLI...")
86
- subprocess.run(["python", "webui.py", "--host", "0.0.0.0", "--port", "7860"])
 
1
+ from __future__ import annotations
2
+
3
  import os
 
4
  import subprocess
5
  import threading
6
+ import time
7
+ from pathlib import Path
8
+
9
  from huggingface_hub import HfApi, snapshot_download
10
 
 
 
 
11
 
12
+ BASE_DIR = Path(__file__).resolve().parent
13
+ PROJECT_DIR = BASE_DIR / "grok2api"
14
+ DATA_DIR = Path(os.getenv("DATA_DIR", str(PROJECT_DIR / "data"))).expanduser()
15
+ LOG_DIR = Path(os.getenv("LOG_DIR", str(PROJECT_DIR / "logs"))).expanduser()
16
+
17
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
18
+ DATASET_ID = os.getenv("DATASET_ID", "")
19
+ SYNC_INTERVAL = max(int(os.getenv("HF_SYNC_INTERVAL", "1800")), 60)
20
+
21
+ SERVER_HOST = os.getenv("SERVER_HOST", "0.0.0.0")
22
+ SERVER_PORT = os.getenv("SERVER_PORT") or os.getenv("PORT") or "8000"
23
+ SERVER_WORKERS = os.getenv("SERVER_WORKERS", "1")
24
+
25
+ SYNC_ALLOW_PATTERNS = ["data/**"]
26
+ SYNC_IGNORE_PATTERNS = [
27
+ "data/.locks/**",
28
+ "data/tmp/**",
29
+ "logs/**",
30
+ "**/__pycache__/**",
31
+ ]
32
+
33
+
34
+ def log(message: str) -> None:
35
+ print(f"[HF-Space] {message}", flush=True)
36
 
 
37
 
38
+ def ensure_local_dirs() -> None:
39
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
40
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
41
+
42
+
43
+ def download_data() -> None:
44
  if not DATASET_ID:
45
+ log("未配置 DATASET_ID,跳过启动数据同步。")
46
  return
47
+
48
  try:
49
+ log(f"开始从 Dataset 拉取数据: {DATASET_ID}")
 
50
  snapshot_download(
51
  repo_id=DATASET_ID,
52
  repo_type="dataset",
53
+ local_dir=str(PROJECT_DIR),
54
+ token=HF_TOKEN or None,
55
+ allow_patterns=SYNC_ALLOW_PATTERNS,
56
+ ignore_patterns=SYNC_IGNORE_PATTERNS,
57
  )
58
+ log("数据拉取完成。")
59
+ except Exception as exc:
60
+ log(f"数据拉取失败,继续本地启动: {exc}")
61
+
62
 
63
+ def upload_data(run_as_future: bool) -> None:
64
+ if not DATASET_ID:
65
+ return
66
+
67
+ if not HF_TOKEN:
68
+ log("已配置 DATASET_ID,但未配置 HF_TOKEN,跳过数据上传。")
69
+ return
70
+
71
+ try:
72
+ api = HfApi(token=HF_TOKEN)
73
+ api.upload_folder(
74
+ folder_path=str(PROJECT_DIR),
75
+ repo_id=DATASET_ID,
76
+ repo_type="dataset",
77
+ commit_message="chore: sync Grok2API data from Space",
78
+ allow_patterns=SYNC_ALLOW_PATTERNS,
79
+ ignore_patterns=SYNC_IGNORE_PATTERNS,
80
+ run_as_future=run_as_future,
81
+ )
82
+ if run_as_future:
83
+ log("已提交后台数据同步任务。")
84
+ else:
85
+ log("退出前数据同步完成。")
86
+ except Exception as exc:
87
+ if "No files have been modified" not in str(exc):
88
+ log(f"数据上传失败: {exc}")
89
+
90
+
91
+ def upload_loop() -> None:
92
  while True:
93
+ time.sleep(SYNC_INTERVAL)
94
+ upload_data(run_as_future=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
 
 
 
 
96
 
97
+ def init_storage() -> None:
98
+ subprocess.run(
99
+ ["sh", "scripts/init_storage.sh"],
100
+ cwd=PROJECT_DIR,
101
+ check=True,
102
+ env=os.environ.copy(),
103
+ )
104
+
105
+
106
+ def run_server() -> None:
107
+ env = os.environ.copy()
108
+ env.setdefault("DATA_DIR", str(DATA_DIR))
109
+ env.setdefault("LOG_DIR", str(LOG_DIR))
110
+ env.setdefault("LOG_FILE_ENABLED", "false")
111
+
112
+ command = [
113
+ "granian",
114
+ "--interface",
115
+ "asgi",
116
+ "--host",
117
+ SERVER_HOST,
118
+ "--port",
119
+ SERVER_PORT,
120
+ "--workers",
121
+ SERVER_WORKERS,
122
+ "main:app",
123
+ ]
124
+
125
+ log(
126
+ f"启动 Grok2API: host={SERVER_HOST} port={SERVER_PORT} workers={SERVER_WORKERS}"
127
+ )
128
+ subprocess.run(command, cwd=PROJECT_DIR, check=True, env=env)
129
+
130
+
131
+ if __name__ == "__main__":
132
+ ensure_local_dirs()
133
  download_data()
134
+ init_storage()
135
 
136
+ backup_thread = threading.Thread(target=upload_loop, daemon=True)
 
137
  backup_thread.start()
138
 
139
+ try:
140
+ run_server()
141
+ finally:
142
+ upload_data(run_as_future=False)