ASSERT-KTH
/

codi-trace

Model card Files Files and versions

codi-trace / code /data /sources.py

sirui6011's picture

add code/ loader snapshot

aedd6ab verified 2 days ago

History Blame Contribute Delete

1.35 kB

	"""Dataset name(s) -> merged {id, code, input, output} rows.

	Add a converted dataset by running its folder's convert.py (saves ./data via
	save_to_disk) and listing it in _LOCAL. cruxeval keeps its own Hub-fallback
	loader and is held out entirely for eval (eval_cruxeval_*.py), never trained on.
	"""

	from __future__ import annotations

	import os
	from pathlib import Path

	_LOCAL = {"mbpp": "MBPP", "humaneval": "HumanEval", "pyx": "PyX"} # name -> folder, data in ./data


	def load_cruxeval():
	"""Full CRUXEval-O (the held-out test set). Prefer a local save_to_disk copy;
	the HF builder FileLock dies on NFS caches."""
	local_dir = os.environ.get("CRUXEVAL_DIR")
	if local_dir and os.path.isdir(local_dir):
	from datasets import load_from_disk

	return list(load_from_disk(local_dir))
	from datasets import load_dataset

	return list(load_dataset("cruxeval-org/cruxeval", split="test"))


	def load_one(name: str) -> list[dict]:
	key = name.strip().lower()
	if key == "cruxeval":
	return load_cruxeval()
	if key in _LOCAL:
	from datasets import load_from_disk

	d = os.environ.get(key.upper() + "_DIR") or str(Path(__file__).parent / _LOCAL[key] / "data")
	return list(load_from_disk(d))
	raise ValueError(f"unknown data source {name!r}; pick from {['cruxeval', *_LOCAL]}")