| |
| """ |
| OpenClaw 配置同步脚本 |
| 功能:从 Hugging Face Dataset 拉取配置,并定时推送变更回 Dataset |
| 作者:根据用户需求生成 |
| 日期:2026-02-08 |
| 更新:修复 huggingface_hub 导入问题 |
| """ |
|
|
| import os |
| import json |
| import time |
| import logging |
| import hashlib |
| import shutil |
| from pathlib import Path |
| from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(levelname)s - %(message)s' |
| ) |
| logger = logging.getLogger(__name__) |
|
|
| class OpenClawConfigSync: |
| def __init__(self): |
| self.hf_token = os.getenv('HF_TOKEN', '') |
| self.dataset_repo = os.getenv('HF_DATASET', '') |
| self.local_config_dir = Path('/root/.openclaw') |
| self.sync_interval = 300 |
| |
| if not self.hf_token or not self.dataset_repo: |
| logger.error('HF_TOKEN 或 HF_DATASET 环境变量未设置') |
| raise ValueError('缺少必要的环境变量') |
| |
| self.api = HfApi(token=self.hf_token) |
| self.repo_dir = Path('/tmp/openclaw_dataset') |
| |
| def calculate_file_hash(self, file_path): |
| """计算文件MD5哈希值用于比较变更""" |
| hash_md5 = hashlib.md5() |
| try: |
| with open(file_path, "rb") as f: |
| for chunk in iter(lambda: f.read(4096), b""): |
| hash_md5.update(chunk) |
| return hash_md5.hexdigest() |
| except Exception as e: |
| logger.error(f"计算文件哈希失败 {file_path}: {e}") |
| return None |
| |
| def ensure_local_dir(self): |
| """确保本地配置目录存在""" |
| self.local_config_dir.mkdir(parents=True, exist_ok=True) |
| |
| def download_from_dataset(self): |
| """从Dataset拉取最新配置""" |
| try: |
| logger.info(f'从Dataset拉取配置: {self.dataset_repo}') |
| |
| |
| self.repo_dir.mkdir(parents=True, exist_ok=True) |
| |
| |
| files = list_repo_files( |
| repo_id=self.dataset_repo, |
| repo_type="dataset", |
| token=self.hf_token |
| ) |
| |
| if not files: |
| logger.warning('Dataset中未找到配置文件') |
| return False |
| |
| |
| downloaded_count = 0 |
| for file_name in files: |
| if file_name.endswith(('.json', '.yaml', '.yml')): |
| try: |
| |
| local_path = hf_hub_download( |
| repo_id=self.dataset_repo, |
| filename=file_name, |
| repo_type="dataset", |
| token=self.hf_token, |
| local_dir=self.repo_dir |
| ) |
| |
| |
| config_file = Path(local_path) |
| dest_file = self.local_config_dir / config_file.name |
| |
| |
| if dest_file.exists(): |
| backup_file = dest_file.with_suffix(f'.bak{int(time.time())}') |
| dest_file.rename(backup_file) |
| logger.debug(f'已备份原文件: {backup_file.name}') |
| |
| shutil.copy2(config_file, dest_file) |
| logger.info(f'已恢复配置: {config_file.name}') |
| downloaded_count += 1 |
| |
| except Exception as e: |
| logger.error(f'下载文件 {file_name} 失败: {e}') |
| continue |
| |
| logger.info(f'配置文件下载完成,共下载 {downloaded_count} 个文件') |
| return downloaded_count > 0 |
| |
| except Exception as e: |
| logger.error(f'从Dataset拉取配置失败: {e}') |
| return False |
| |
| def upload_to_dataset(self): |
| """推送配置变更回Dataset""" |
| try: |
| logger.info('推送配置变更到Dataset') |
| |
| |
| config_files = list(self.local_config_dir.glob('*')) |
| config_files = [f for f in config_files if f.suffix in ['.json', '.yaml', '.yml']] |
| |
| if not config_files: |
| logger.warning('没有配置文件需要上传') |
| return False |
| |
| uploaded_count = 0 |
| for config_file in config_files: |
| try: |
| |
| upload_file( |
| path_or_fileobj=str(config_file), |
| path_in_repo=config_file.name, |
| repo_id=self.dataset_repo, |
| repo_type="dataset", |
| token=self.hf_token, |
| commit_message=f"自动同步配置: {config_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}" |
| ) |
| logger.info(f'已上传配置: {config_file.name}') |
| uploaded_count += 1 |
| |
| except Exception as e: |
| logger.error(f'上传文件 {config_file.name} 失败: {e}') |
| continue |
| |
| logger.info(f'配置文件上传完成,共上传 {uploaded_count} 个文件') |
| return uploaded_count > 0 |
| |
| except Exception as e: |
| logger.error(f'推送配置到Dataset失败: {e}') |
| return False |
| |
| def config_changed(self): |
| """检查配置是否有变更""" |
| try: |
| |
| config_files = list(self.local_config_dir.glob('*.json')) |
| if not config_files: |
| return False |
| |
| |
| backup_files = list(self.local_config_dir.glob('*.bak*')) |
| if backup_files: |
| return True |
| |
| |
| return True |
| |
| except Exception as e: |
| logger.error(f'检查配置变更失败: {e}') |
| return True |
| |
| def run_sync(self, mode='download'): |
| """运行同步流程""" |
| self.ensure_local_dir() |
| |
| if mode == 'download': |
| return self.download_from_dataset() |
| elif mode == 'upload': |
| if self.config_changed(): |
| return self.upload_to_dataset() |
| else: |
| logger.info('配置无变更,跳过上传') |
| return True |
| return False |
| |
| def start_periodic_sync(self): |
| """启动定时同步服务""" |
| logger.info('启动定时同步服务') |
| while True: |
| try: |
| time.sleep(self.sync_interval) |
| self.run_sync('upload') |
| except Exception as e: |
| logger.error(f'定时同步失败: {e}') |
| time.sleep(60) |
|
|
| def main(): |
| """主函数""" |
| import sys |
| |
| if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload', 'sync']: |
| print('用法: python sync.py [download|upload|sync]') |
| sys.exit(1) |
| |
| mode = sys.argv[1] |
| |
| try: |
| sync = OpenClawConfigSync() |
| |
| if mode == 'download': |
| sync.run_sync('download') |
| elif mode == 'upload': |
| sync.run_sync('upload') |
| elif mode == 'sync': |
| |
| import threading |
| sync.run_sync('download') |
| sync_thread = threading.Thread(target=sync.start_periodic_sync) |
| sync_thread.daemon = True |
| sync_thread.start() |
| sync_thread.join() |
| |
| except Exception as e: |
| logger.error(f'同步服务失败: {e}') |
| sys.exit(1) |
|
|
| if __name__ == '__main__': |
| main() |