File size: 2,100 Bytes
1c73b10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from typing import Optional, Dict
import pandas as pd
from functools import lru_cache
from huggingface_hub import snapshot_download
import logging
from config import CONFIG

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class DataManager:
    def __init__(self):
        self._leaderboard_data: Optional[pd.DataFrame] = None
        self._responses_data: Optional[pd.DataFrame] = None
        self._section_results_data: Optional[pd.DataFrame] = None

    @lru_cache(maxsize=1)
    def _load_dataset(self, path: str) -> pd.DataFrame:
        """Load dataset with caching."""
        try:
            return pd.read_parquet(path)
        except Exception as e:
            logger.error(f"Error loading dataset from {path}: {e}")
            raise RuntimeError(f"Failed to load dataset: {e}")

    def refresh_datasets(self) -> None:
        """Refresh all datasets from source."""
        try:
            snapshot_download(
                repo_id="alibayram",
                repo_type="dataset",
                local_dir=CONFIG["dataset"].cache_dir
            )
            # Clear cache to force reload
            self._load_dataset.cache_clear()
            logger.info("Datasets refreshed successfully")
        except Exception as e:
            logger.error(f"Error refreshing datasets: {e}")

    @property
    def leaderboard_data(self) -> pd.DataFrame:
        if self._leaderboard_data is None:
            self._leaderboard_data = self._load_dataset(CONFIG["dataset"].leaderboard_path)
        return self._leaderboard_data

    @property
    def responses_data(self) -> pd.DataFrame:
        if self._responses_data is None:
            self._responses_data = self._load_dataset(CONFIG["dataset"].responses_path)
        return self._responses_data

    @property
    def section_results_data(self) -> pd.DataFrame:
        if self._section_results_data is None:
            self._section_results_data = self._load_dataset(CONFIG["dataset"].section_results_path)
        return self._section_results_data

# Global instance
data_manager = DataManager()