llm_math_reasoning / data /load_data.py
MingLi
code
63c6bf0
import json
import os
from datasets import load_dataset
# 获取当前脚本所在的目录
current_dir = os.path.dirname(__file__)
def save_dataset_to_json(dataset_split, output_path):
"""将一个 split 的数据集保存为 JSON 文件"""
output_data = dataset_split.to_list()
output_path = os.path.join(current_dir, output_path)
with open(output_path, "w") as f:
json.dump(output_data, f, indent=2)
print(f"Saved {len(output_data)} items to {output_path}")
def main():
# Load datasets
math500 = load_dataset("HuggingFaceH4/MATH-500")["test"]
gsm8k = load_dataset("openai/gsm8k", "main")["test"]
aime2024 = load_dataset("Maxwell-Jia/AIME_2024")["train"]
# Save each dataset to JSON
save_dataset_to_json(math500, "MATH-500.json")
save_dataset_to_json(gsm8k, "GSM8K_test.json")
save_dataset_to_json(aime2024, "AIME_2024.json")
if __name__ == "__main__":
main()