giskard-evaluator / fetch_utils.py
ZeroCommand's picture
add trust remote code to get dataset config names
4b59401
raw
history blame
894 Bytes
import logging
import datasets
def check_dataset_and_get_config(dataset_id):
try:
configs = datasets.get_dataset_config_names(dataset_id, trust_remote_code=True)
return configs
except Exception:
# Dataset may not exist
return None
def check_dataset_and_get_split(dataset_id, dataset_config):
try:
ds = datasets.load_dataset(dataset_id, dataset_config, trust_remote_code=True)
except Exception as e:
# Dataset may not exist
logging.warning(
f"Failed to load dataset {dataset_id} with config {dataset_config}: {e}"
)
return None
try:
splits = list(ds.keys())
return splits
except Exception as e:
# Dataset has no splits
logging.warning(
f"Dataset {dataset_id} with config {dataset_config} has no splits: {e}"
)
return None