File size: 930 Bytes
63c6bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import json
import os
from datasets import load_dataset

# 获取当前脚本所在的目录
current_dir = os.path.dirname(__file__)

def save_dataset_to_json(dataset_split, output_path):
    """将一个 split 的数据集保存为 JSON 文件"""
    output_data = dataset_split.to_list()
    output_path = os.path.join(current_dir, output_path)
    with open(output_path, "w") as f:
        json.dump(output_data, f, indent=2)
    print(f"Saved {len(output_data)} items to {output_path}")

def main():
    # Load datasets
    math500 = load_dataset("HuggingFaceH4/MATH-500")["test"]
    gsm8k = load_dataset("openai/gsm8k", "main")["test"]
    aime2024 = load_dataset("Maxwell-Jia/AIME_2024")["train"]

    # Save each dataset to JSON
    save_dataset_to_json(math500, "MATH-500.json")
    save_dataset_to_json(gsm8k, "GSM8K_test.json")
    save_dataset_to_json(aime2024, "AIME_2024.json")

if __name__ == "__main__":
    main()