# import datasets # import logging import os import json # from tqdm import tqdm # dataset_subs = os.listdir(PATH) # print(dataset_subs) # for ds in tqdm(dataset_subs): # try: # print(ds) # dataset = datasets.load_dataset("CarperAI/pile-v2-small-filtered",data_files=f"data/{ds}/data.json", split="train") # dataset.save_to_disk(f"cache_ds/{ds}") # except: # print(f"Error at {ds}") ds_subsets = os.listdir("cache_ds") with open("documentation.json","w") as f: json.dump(ds_subsets,f)