Spaces:

MCP-1st-Birthday
/

common_core_mcp

Sleeping

common_core_mcp / tools /data_manager.py

Lindow

initial commit

7602502 15 days ago

2.35 kB

	"""Manages local data storage and metadata tracking."""

	from __future__ import annotations

	import json
	from dataclasses import dataclass

	from loguru import logger

	from tools.config import get_settings
	from tools.models import StandardSetResponse

	settings = get_settings()

	# Data directories (from config)
	RAW_DATA_DIR = settings.raw_data_dir
	STANDARD_SETS_DIR = settings.standard_sets_dir
	PROCESSED_DATA_DIR = settings.processed_data_dir


	@dataclass
	class StandardSetInfo:
	"""Information about a downloaded standard set with processing status."""

	set_id: str
	title: str
	subject: str
	education_levels: list[str]
	jurisdiction: str
	publication_status: str
	valid_year: str
	processed: bool


	def list_downloaded_standard_sets() -> list[StandardSetInfo]:
	"""
	List all downloaded standard sets from the standardSets directory.

	Returns:
	List of StandardSetInfo with standard set info and processing status
	"""
	if not STANDARD_SETS_DIR.exists():
	return []

	datasets = []
	for set_dir in STANDARD_SETS_DIR.iterdir():
	if not set_dir.is_dir():
	continue

	data_file = set_dir / "data.json"
	if not data_file.exists():
	continue

	try:
	with open(data_file, encoding="utf-8") as f:
	raw_data = json.load(f)

	# Parse the API response wrapper
	response = StandardSetResponse(**raw_data)
	standard_set = response.data

	# Build the dataset info
	dataset_info = StandardSetInfo(
	set_id=standard_set.id,
	title=standard_set.title,
	subject=standard_set.subject,
	education_levels=standard_set.educationLevels,
	jurisdiction=standard_set.jurisdiction.title,
	publication_status=standard_set.document.publicationStatus or "Unknown",
	valid_year=standard_set.document.valid,
	processed=False, # TODO: Check against processed directory
	)

	datasets.append(dataset_info)

	except (json.JSONDecodeError, IOError, Exception) as e:
	logger.warning(f"Failed to read {data_file}: {e}")
	continue

	logger.debug(f"Found {len(datasets)} downloaded standard sets")
	return datasets