import datasets import json def get_float_qid(qid): return float(qid[1:].replace('-', '.')) humaneval_v_data = datasets.load_from_disk("humaneval_v_test_hf") qid_to_idx_mapping = {x["qid"]: idx for idx, x in enumerate(humaneval_v_data)} reranked_data = sorted(humaneval_v_data, key=lambda x: get_float_qid(x["qid"])) id_idx_mappping = {x["qid"]: idx for idx, x in enumerate(reranked_data)} old_to_new_id = {id_idx_mappping[x["qid"]]: qid_to_idx_mapping[x["qid"]] for x in humaneval_v_data} print(json.dumps(old_to_new_id, indent=4))