import json import os from datasets import load_dataset # 获取当前脚本所在的目录 current_dir = os.path.dirname(__file__) def save_dataset_to_json(dataset_split, output_path): """将一个 split 的数据集保存为 JSON 文件""" output_data = dataset_split.to_list() output_path = os.path.join(current_dir, output_path) with open(output_path, "w") as f: json.dump(output_data, f, indent=2) print(f"Saved {len(output_data)} items to {output_path}") def main(): # Load datasets math500 = load_dataset("HuggingFaceH4/MATH-500")["test"] gsm8k = load_dataset("openai/gsm8k", "main")["test"] aime2024 = load_dataset("Maxwell-Jia/AIME_2024")["train"] # Save each dataset to JSON save_dataset_to_json(math500, "MATH-500.json") save_dataset_to_json(gsm8k, "GSM8K_test.json") save_dataset_to_json(aime2024, "AIME_2024.json") if __name__ == "__main__": main()