Spaces:

InsanAlex
/

iris-at-text2sparql

Running on CPU Upgrade

Alex Latipov

Harden frozen eval prompts and judge JSON handling

d745844 7 days ago

2.98 kB

	"""Dataset-specific logic for the Text2SPARQL repair pipeline.

	Handles loading dataset configs, KG profiles, and endpoint URLs.
	All dataset-specific hacks remain inside this file.
	"""

	from __future__ import annotations

	import json
	import logging
	from pathlib import Path

	from .config import RuntimeConfig
	from .models import DatasetConfig

	logger = logging.getLogger(__name__)


	def load_dataset_config(dataset_id: str, config: RuntimeConfig) -> DatasetConfig:
	"""Load dataset configuration from the runtime config.

	Args:
	dataset_id: The dataset identifier URL.
	config: The runtime configuration containing dataset definitions.

	Returns:
	DatasetConfig for the requested dataset.

	Raises:
	ValueError: If dataset_id is not found in the config.
	"""
	if dataset_id not in config.datasets:
	available = list(config.datasets.keys())
	raise ValueError(
	f"Unknown dataset_id: {dataset_id!r}. "
	f"Available datasets: {available}"
	)

	ds_raw = config.datasets[dataset_id]
	return DatasetConfig(
	dataset_id=dataset_id,
	endpoint_url=ds_raw["endpoint_url"],
	kg_profile_path=ds_raw.get("kg_profile_path", ""),
	default_prefixes=ds_raw.get("default_prefixes", {}),
	mode=ds_raw.get("mode", "dbpedia"),
	)


	def load_kg_profile(dataset: DatasetConfig) -> dict:
	"""Load the KG profile for a dataset.

	The KG profile contains entity labels, property labels, and class labels
	used for context building. Returns an empty profile structure if the
	profile file does not exist yet.

	Args:
	dataset: The dataset configuration.

	Returns:
	Dictionary with keys: entities, properties, classes.
	"""
	empty_profile = {
	"entities": [],
	"properties": [],
	"classes": [],
	"metadata": {
	"dataset_id": dataset.dataset_id,
	"mode": dataset.mode,
	},
	}

	profile_path = Path(dataset.kg_profile_path)
	if not profile_path.exists():
	logger.warning(
	"KG profile not found at %s — using empty profile. "
	"Context building will rely on prefix hints only.",
	dataset.kg_profile_path,
	)
	return empty_profile

	try:
	with open(profile_path, "r") as f:
	profile = json.load(f)
	# Ensure required keys exist
	for key in ("entities", "properties", "classes"):
	if key not in profile:
	profile[key] = []
	return profile
	except (json.JSONDecodeError, OSError) as exc:
	logger.error("Failed to load KG profile from %s: %s", profile_path, exc)
	return empty_profile


	def get_endpoint_url(dataset: DatasetConfig) -> str:
	"""Get the SPARQL endpoint URL for a dataset.

	Args:
	dataset: The dataset configuration.

	Returns:
	The endpoint URL string.
	"""
	return dataset.endpoint_url