opencode / scripts /save_to_dataset.py
tao-shen's picture
fix: OPENCODE_DATASET_REPO 支持填完整 URL,自动解析为 repo_id
140619f
#!/usr/bin/env python3
"""
将 ~/.local/share/opencode 上传到 Hugging Face Dataset 仓库。
需设置环境变量: HF_TOKEN, OPENCODE_DATASET_REPO。
"""
import os
import re
import sys
def _normalize_repo_id(value):
"""接受 repo_id 或完整 URL,返回 namespace/repo_name。"""
if not value or not value.strip():
return None
value = value.strip()
m = re.search(r"(?:huggingface\.co/datasets/|^)([\w.-]+/[\w.-]+)/?$", value)
if m:
return m.group(1)
if "/" in value:
return value
return None
def main():
token = os.environ.get("HF_TOKEN")
raw = os.environ.get("OPENCODE_DATASET_REPO")
repo_id = _normalize_repo_id(raw)
data_dir = os.path.expanduser("~/.local/share/opencode")
if not token or not repo_id:
return 0
if not os.path.isdir(data_dir):
return 0
try:
from huggingface_hub import HfApi
except ImportError:
print("save: huggingface_hub not installed, skip save", file=sys.stderr)
return 0
try:
api = HfApi(token=token)
api.upload_folder(
folder_path=data_dir,
path_in_repo=".",
repo_id=repo_id,
repo_type="dataset",
token=token,
)
except Exception as e:
print(f"save: upload failed ({e})", file=sys.stderr)
return 1
return 0
if __name__ == "__main__":
sys.exit(main())