Spaces:
Running
Running
| """ | |
| Data loading functionality for the Tox21 leaderboard. | |
| Handles loading and processing results from HuggingFace datasets. | |
| """ | |
| import pandas as pd | |
| from datasets import load_dataset, Dataset | |
| from config.settings import RESULTS_DATASET, TEST_DATASET, HF_TOKEN | |
| from config.tasks import TOX21_TASKS | |
| def load_leaderboard_data() -> Dataset: | |
| """ | |
| Load leaderboard data from HuggingFace dataset. | |
| """ | |
| print(f"Loading dataset: {RESULTS_DATASET}") | |
| print(f"Using HF token: {'Yes' if HF_TOKEN else 'No'}") | |
| # Load the dataset (token already set globally via login in settings) | |
| dataset = load_dataset(RESULTS_DATASET) | |
| print(f"Dataset loaded successfully. Keys: {dataset.keys()}") | |
| # Look for test split (more appropriate for results) | |
| if "test" in dataset: | |
| results_data = dataset["test"] | |
| print(f"Test split has {len(results_data)} entries") | |
| if len(results_data) > 0: | |
| print(f"First entry keys: {results_data[0].keys()}") | |
| print(f"First entry: {results_data[0]}") | |
| return results_data | |
| else: | |
| raise ValueError("Dataset does not contain a 'test' split.") | |
| def load_test_dataset() -> tuple[list[str], list[dict[str, float]]]: | |
| # Get test smiles and labels (token already set globally via login in settings) | |
| dset = load_dataset(TEST_DATASET, split="test") | |
| tasks = [t.key for t in TOX21_TASKS] | |
| smiles = list(dset["smiles"]) | |
| labels = [] | |
| for sample in list(dset): | |
| labels.append({task: sample[task] for task in tasks}) | |
| print(f"Loaded test dataset") | |
| return smiles, labels | |