| # data_sync.py | |
| import json | |
| import os | |
| from datasets import Dataset | |
| from huggingface_hub import login | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| FEEDBACK_FILE = "feedback.json" | |
| HF_DATASET_REPO = os.getenv("HF_DATASET_REPO", "modular-ai/rlhf_feedback_dataset") | |
| def sync_to_hub(): | |
| login(token=HF_TOKEN) | |
| if not os.path.exists(FEEDBACK_FILE): | |
| print("No feedback file.") | |
| return | |
| with open(FEEDBACK_FILE, "r") as f: | |
| data = json.load(f) | |
| if not data: | |
| print("No data to sync.") | |
| return | |
| dataset = Dataset.from_list(data) | |
| dataset.push_to_hub(HF_DATASET_REPO, private=True) | |
| print(f"Pushed {len(data)} samples to {HF_DATASET_REPO}") | |
| if __name__ == "__main__": | |
| sync_to_hub() |