common_core_mcp / tools /data_manager.py
Lindow
initial commit
7602502
"""Manages local data storage and metadata tracking."""
from __future__ import annotations
import json
from dataclasses import dataclass
from loguru import logger
from tools.config import get_settings
from tools.models import StandardSetResponse
settings = get_settings()
# Data directories (from config)
RAW_DATA_DIR = settings.raw_data_dir
STANDARD_SETS_DIR = settings.standard_sets_dir
PROCESSED_DATA_DIR = settings.processed_data_dir
@dataclass
class StandardSetInfo:
"""Information about a downloaded standard set with processing status."""
set_id: str
title: str
subject: str
education_levels: list[str]
jurisdiction: str
publication_status: str
valid_year: str
processed: bool
def list_downloaded_standard_sets() -> list[StandardSetInfo]:
"""
List all downloaded standard sets from the standardSets directory.
Returns:
List of StandardSetInfo with standard set info and processing status
"""
if not STANDARD_SETS_DIR.exists():
return []
datasets = []
for set_dir in STANDARD_SETS_DIR.iterdir():
if not set_dir.is_dir():
continue
data_file = set_dir / "data.json"
if not data_file.exists():
continue
try:
with open(data_file, encoding="utf-8") as f:
raw_data = json.load(f)
# Parse the API response wrapper
response = StandardSetResponse(**raw_data)
standard_set = response.data
# Build the dataset info
dataset_info = StandardSetInfo(
set_id=standard_set.id,
title=standard_set.title,
subject=standard_set.subject,
education_levels=standard_set.educationLevels,
jurisdiction=standard_set.jurisdiction.title,
publication_status=standard_set.document.publicationStatus or "Unknown",
valid_year=standard_set.document.valid,
processed=False, # TODO: Check against processed directory
)
datasets.append(dataset_info)
except (json.JSONDecodeError, IOError, Exception) as e:
logger.warning(f"Failed to read {data_file}: {e}")
continue
logger.debug(f"Found {len(datasets)} downloaded standard sets")
return datasets