Spatial-Omics-Viewer / utils /data_source_manager.py
anway's picture
h5ad_viewer
05fdb87 verified
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass
from pathlib import Path
from anndata import AnnData
import datetime
@dataclass
class DataSource:
"""Represents a loaded h5ad data source"""
id: str # Unique identifier
name: str # Display name
source_type: str # 'demo', 'url', 'upload'
source_path: str # Original source (URL, file path, etc.)
adata: Optional[AnnData] # The loaded AnnData object (Optional for lazy loading)
loaded_at: Optional[datetime.datetime] # When it was loaded
n_obs: int = 0 # Number of observations
n_vars: int = 0 # Number of variables
def get_display_name(self) -> str:
"""Get formatted display name with metadata"""
if self.adata is not None:
return f"{self.name} ({self.n_obs:,} cells, {self.n_vars:,} genes)"
return f"{self.name} (Not loaded)"
def get_info(self) -> str:
"""Get detailed information string"""
return (
f"Dataset: {self.name}\n"
f"Source: {self.source_type}\n"
f"Cells/Spots: {self.n_obs:,}\n"
f"Genes: {self.n_vars:,}\n"
f"Loaded: {self.loaded_at.strftime('%Y-%m-%d %H:%M:%S')}"
)
class DataSourceManager:
"""
Manage multiple loaded h5ad datasets
This class handles:
- Tracking all loaded datasets
- Switching between datasets
- Providing dataset metadata
"""
def __init__(self):
self.sources: Dict[str, DataSource] = {}
self.current_id: Optional[str] = None
self._id_counter = 0
def add_source(
self,
name: str,
source_type: str,
source_path: str,
adata: Optional[AnnData] = None
) -> str:
"""
Add a new data source
Args:
name: Display name for the dataset
source_type: Type of source ('demo', 'url', 'upload')
source_path: Original source location
adata: Optional loaded AnnData object
Returns:
Unique ID of the added source
"""
# Check if already exists by source_path to avoid duplicates
for existing_id, source in self.sources.items():
if source.source_path == source_path:
if adata is not None and source.adata is None:
# Update existing source with loaded adata
source.adata = adata
source.loaded_at = datetime.datetime.now()
source.n_obs = adata.n_obs
source.n_vars = adata.n_vars
return existing_id
# Generate unique ID
source_id = f"ds_{self._id_counter}"
self._id_counter += 1
# Create data source
source = DataSource(
id=source_id,
name=name,
source_type=source_type,
source_path=source_path,
adata=adata,
loaded_at=datetime.datetime.now() if adata is not None else None,
n_obs=adata.n_obs if adata is not None else 0,
n_vars=adata.n_vars if adata is not None else 0
)
self.sources[source_id] = source
# Set as current if it's the first one
if self.current_id is None:
self.current_id = source_id
return source_id
def get_source(self, source_id: str) -> Optional[DataSource]:
"""Get a data source by ID"""
return self.sources.get(source_id)
def get_current_source(self) -> Optional[DataSource]:
"""Get the currently active data source"""
if self.current_id is None:
return None
return self.sources.get(self.current_id)
def set_current(self, source_id: str) -> bool:
"""
Set the current active data source
Args:
source_id: ID of the source to activate
Returns:
True if successful, False if source not found
"""
if source_id in self.sources:
self.current_id = source_id
return True
return False
def get_all_sources(self) -> List[DataSource]:
"""Get list of all loaded data sources"""
return list(self.sources.values())
def get_source_choices(self) -> List[Tuple[str, str]]:
"""
Get list of sources for dropdown/radio selection
Returns:
List of (display_name, source_id) tuples
"""
return [
(source.get_display_name(), source.id)
for source in self.sources.values()
]
def get_source_names(self) -> List[str]:
"""Get list of source display names"""
return [source.name for source in self.sources.values()]
def remove_source(self, source_id: str) -> bool:
"""
Remove a data source
Args:
source_id: ID of source to remove
Returns:
True if removed, False if not found
"""
if source_id in self.sources:
del self.sources[source_id]
# Update current_id if we removed the current source
if self.current_id == source_id:
if len(self.sources) > 0:
self.current_id = list(self.sources.keys())[0]
else:
self.current_id = None
return True
return False
def has_sources(self) -> bool:
"""Check if any sources are loaded"""
return len(self.sources) > 0
def count_sources(self) -> int:
"""Get number of loaded sources"""
return len(self.sources)
def clear_all(self):
"""Remove all data sources"""
self.sources.clear()
self.current_id = None
self._id_counter = 0