|
import json |
|
import os |
|
from datasets import load_dataset |
|
|
|
|
|
current_dir = os.path.dirname(__file__) |
|
|
|
def save_dataset_to_json(dataset_split, output_path): |
|
"""将一个 split 的数据集保存为 JSON 文件""" |
|
output_data = dataset_split.to_list() |
|
output_path = os.path.join(current_dir, output_path) |
|
with open(output_path, "w") as f: |
|
json.dump(output_data, f, indent=2) |
|
print(f"Saved {len(output_data)} items to {output_path}") |
|
|
|
def main(): |
|
|
|
math500 = load_dataset("HuggingFaceH4/MATH-500")["test"] |
|
gsm8k = load_dataset("openai/gsm8k", "main")["test"] |
|
aime2024 = load_dataset("Maxwell-Jia/AIME_2024")["train"] |
|
|
|
|
|
save_dataset_to_json(math500, "MATH-500.json") |
|
save_dataset_to_json(gsm8k, "GSM8K_test.json") |
|
save_dataset_to_json(aime2024, "AIME_2024.json") |
|
|
|
if __name__ == "__main__": |
|
main() |