switch to dual eval system
Browse files- about.py +2 -1
- app.py +12 -8
- evaluate.py +16 -6
- requirements.txt +2 -1
- utils.py +4 -3
    	
        about.py
    CHANGED
    
    | @@ -35,5 +35,6 @@ THROTTLE_MINUTES =   480 # minutes between submissions | |
| 35 | 
             
            API = HfApi(token=TOKEN)
         | 
| 36 | 
             
            organization="OpenADMET"
         | 
| 37 | 
             
            submissions_repo = f'{organization}/openadmet-expansionrx-challenge-submissions' # private
         | 
| 38 | 
            -
             | 
|  | |
| 39 | 
             
            test_repo = f'{organization}/openadmet-expansionrx-challenge-test-data' # private
         | 
|  | |
| 35 | 
             
            API = HfApi(token=TOKEN)
         | 
| 36 | 
             
            organization="OpenADMET"
         | 
| 37 | 
             
            submissions_repo = f'{organization}/openadmet-expansionrx-challenge-submissions' # private
         | 
| 38 | 
            +
            results_repo_test = f'{organization}/openadmet-expansionrx-challenge-results' # public
         | 
| 39 | 
            +
            results_repo_validation = f'{organization}/openadmet-expansionrx-challenge-results-validation' # public
         | 
| 40 | 
             
            test_repo = f'{organization}/openadmet-expansionrx-challenge-test-data' # private
         | 
    	
        app.py
    CHANGED
    
    | @@ -12,13 +12,15 @@ from utils import ( | |
| 12 | 
             
            )
         | 
| 13 | 
             
            from datasets import load_dataset
         | 
| 14 | 
             
            import tempfile
         | 
| 15 | 
            -
             | 
| 16 | 
             
            from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
         | 
| 17 |  | 
| 18 |  | 
|  | |
| 19 | 
             
            ALL_EPS = ['Average'] + ENDPOINTS
         | 
| 20 |  | 
| 21 | 
             
            def build_leaderboard(df_results):
         | 
|  | |
| 22 | 
             
                per_ep = {}
         | 
| 23 | 
             
                for ep in ALL_EPS:
         | 
| 24 | 
             
                    df = df_results[df_results["Endpoint"] == ep].copy()
         | 
| @@ -45,7 +47,7 @@ def build_leaderboard(df_results): | |
| 45 | 
             
                        sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
         | 
| 46 | 
             
                        sorted_df = map_metric_to_stats(sorted_df)
         | 
| 47 | 
             
                        per_ep[ep] = sorted_df[LB_COLS]
         | 
| 48 | 
            -
             | 
| 49 | 
             
                return per_ep
         | 
| 50 |  | 
| 51 | 
             
            # Initialize global dataframe
         | 
| @@ -55,8 +57,8 @@ def gradio_interface(): | |
| 55 |  | 
| 56 | 
             
                with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
         | 
| 57 | 
             
                               theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
         | 
| 58 | 
            -
                    timer = gr.Timer( | 
| 59 | 
            -
                    data_version = gr.State(0)  # Track data changes | 
| 60 | 
             
                    def update_current_dataframe(v):
         | 
| 61 | 
             
                        global current_df
         | 
| 62 | 
             
                        new_df = fetch_dataset_df()
         | 
| @@ -174,7 +176,7 @@ def gradio_interface(): | |
| 174 |  | 
| 175 | 
             
                    π
 **Timeline**:  
         | 
| 176 | 
             
                    - **September 16:** Challenge announcement
         | 
| 177 | 
            -
                    - **October  | 
| 178 | 
             
                    - **October 27:** Challenge starts
         | 
| 179 | 
             
                    - **October-November:** Online Q&A sessions and support via the Discord channel
         | 
| 180 | 
             
                    - **January 19, 2026:** Submission closes
         | 
| @@ -244,7 +246,7 @@ def gradio_interface(): | |
| 244 | 
             
                                    select_columns=LB_AVG,
         | 
| 245 | 
             
                                    search_columns=["user"],
         | 
| 246 | 
             
                                    render=True,
         | 
| 247 | 
            -
                                    every= | 
| 248 | 
             
                                )
         | 
| 249 | 
             
                            # per-endpoint leaderboard
         | 
| 250 | 
             
                            for endpoint in ENDPOINTS:
         | 
| @@ -255,7 +257,7 @@ def gradio_interface(): | |
| 255 | 
             
                                        select_columns=LB_COLS,
         | 
| 256 | 
             
                                        search_columns=["user"],
         | 
| 257 | 
             
                                        render=True,
         | 
| 258 | 
            -
                                        every= | 
| 259 | 
             
                                    )
         | 
| 260 | 
             
                            # Auto-refresh 
         | 
| 261 | 
             
                            def refresh_if_changed():
         | 
| @@ -395,4 +397,6 @@ def gradio_interface(): | |
| 395 | 
             
                return demo
         | 
| 396 |  | 
| 397 | 
             
            if __name__ == "__main__":
         | 
| 398 | 
            -
                 | 
|  | |
|  | 
|  | |
| 12 | 
             
            )
         | 
| 13 | 
             
            from datasets import load_dataset
         | 
| 14 | 
             
            import tempfile
         | 
| 15 | 
            +
            from loguru import logger
         | 
| 16 | 
             
            from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
         | 
| 17 |  | 
| 18 |  | 
| 19 | 
            +
             | 
| 20 | 
             
            ALL_EPS = ['Average'] + ENDPOINTS
         | 
| 21 |  | 
| 22 | 
             
            def build_leaderboard(df_results):
         | 
| 23 | 
            +
                logger.info("Rebuilding leaderboard data...")
         | 
| 24 | 
             
                per_ep = {}
         | 
| 25 | 
             
                for ep in ALL_EPS:
         | 
| 26 | 
             
                    df = df_results[df_results["Endpoint"] == ep].copy()
         | 
|  | |
| 47 | 
             
                        sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
         | 
| 48 | 
             
                        sorted_df = map_metric_to_stats(sorted_df)
         | 
| 49 | 
             
                        per_ep[ep] = sorted_df[LB_COLS]
         | 
| 50 | 
            +
                logger.info("Finished rebuilding leaderboard data.")
         | 
| 51 | 
             
                return per_ep
         | 
| 52 |  | 
| 53 | 
             
            # Initialize global dataframe
         | 
|  | |
| 57 |  | 
| 58 | 
             
                with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
         | 
| 59 | 
             
                               theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
         | 
| 60 | 
            +
                    timer = gr.Timer(30)  # Run every 30 seconds when page is focused
         | 
| 61 | 
            +
                    data_version = gr.State(0)  # Track data changes
         | 
| 62 | 
             
                    def update_current_dataframe(v):
         | 
| 63 | 
             
                        global current_df
         | 
| 64 | 
             
                        new_df = fetch_dataset_df()
         | 
|  | |
| 176 |  | 
| 177 | 
             
                    π
 **Timeline**:  
         | 
| 178 | 
             
                    - **September 16:** Challenge announcement
         | 
| 179 | 
            +
                    - **October 10:** Second announcement and sample data release
         | 
| 180 | 
             
                    - **October 27:** Challenge starts
         | 
| 181 | 
             
                    - **October-November:** Online Q&A sessions and support via the Discord channel
         | 
| 182 | 
             
                    - **January 19, 2026:** Submission closes
         | 
|  | |
| 246 | 
             
                                    select_columns=LB_AVG,
         | 
| 247 | 
             
                                    search_columns=["user"],
         | 
| 248 | 
             
                                    render=True,
         | 
| 249 | 
            +
                                    every=30,
         | 
| 250 | 
             
                                )
         | 
| 251 | 
             
                            # per-endpoint leaderboard
         | 
| 252 | 
             
                            for endpoint in ENDPOINTS:
         | 
|  | |
| 257 | 
             
                                        select_columns=LB_COLS,
         | 
| 258 | 
             
                                        search_columns=["user"],
         | 
| 259 | 
             
                                        render=True,
         | 
| 260 | 
            +
                                        every=30,
         | 
| 261 | 
             
                                    )
         | 
| 262 | 
             
                            # Auto-refresh 
         | 
| 263 | 
             
                            def refresh_if_changed():
         | 
|  | |
| 397 | 
             
                return demo
         | 
| 398 |  | 
| 399 | 
             
            if __name__ == "__main__":
         | 
| 400 | 
            +
                logger.info("Starting Gradio app...")
         | 
| 401 | 
            +
                gradio_interface().launch(ssr_mode=False)
         | 
| 402 | 
            +
                logger.info("Gradio app closed.")
         | 
    	
        evaluate.py
    CHANGED
    
    | @@ -5,9 +5,9 @@ from typing import Optional | |
| 5 | 
             
            from about import (
         | 
| 6 | 
             
                ENDPOINTS, API, 
         | 
| 7 | 
             
                submissions_repo, 
         | 
| 8 | 
            -
                 | 
|  | |
| 9 | 
             
                test_repo,
         | 
| 10 | 
            -
                multiplier_dict,
         | 
| 11 | 
             
                THROTTLE_MINUTES
         | 
| 12 | 
             
            )
         | 
| 13 | 
             
            from utils import bootstrap_metrics, clip_and_log_transform, fetch_dataset_df
         | 
| @@ -23,6 +23,7 @@ from pydantic import ( | |
| 23 | 
             
                field_validator, 
         | 
| 24 | 
             
                ValidationError
         | 
| 25 | 
             
            )
         | 
|  | |
| 26 |  | 
| 27 | 
             
            HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
         | 
| 28 | 
             
            def _safeify_username(username: str) -> str:
         | 
| @@ -189,6 +190,15 @@ def submit_data(predictions_file: str, | |
| 189 | 
             
                return "β
 Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
         | 
| 190 |  | 
| 191 | 
             
            def evaluate_data(filename: str) -> None:
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 192 |  | 
| 193 | 
             
                # Load the submission csv
         | 
| 194 | 
             
                try:
         | 
| @@ -205,7 +215,7 @@ def evaluate_data(filename: str) -> None: | |
| 205 | 
             
                    test_path = hf_hub_download(
         | 
| 206 | 
             
                        repo_id=test_repo,
         | 
| 207 | 
             
                        repo_type="dataset",
         | 
| 208 | 
            -
                        filename= | 
| 209 | 
             
                    )
         | 
| 210 | 
             
                except Exception as e:
         | 
| 211 | 
             
                    raise gr.Error(f"Failed to download test file: {e}")
         | 
| @@ -277,9 +287,9 @@ def calculate_metrics( | |
| 277 | 
             
                _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
         | 
| 278 |  | 
| 279 |  | 
| 280 | 
            -
             | 
| 281 | 
            -
                if not ( | 
| 282 | 
            -
                    raise gr.Error(" | 
| 283 |  | 
| 284 |  | 
| 285 | 
             
                # 3) check no duplicated molecules in the predictions file
         | 
|  | |
| 5 | 
             
            from about import (
         | 
| 6 | 
             
                ENDPOINTS, API, 
         | 
| 7 | 
             
                submissions_repo, 
         | 
| 8 | 
            +
                results_repo_test, 
         | 
| 9 | 
            +
                results_repo_validation, 
         | 
| 10 | 
             
                test_repo,
         | 
|  | |
| 11 | 
             
                THROTTLE_MINUTES
         | 
| 12 | 
             
            )
         | 
| 13 | 
             
            from utils import bootstrap_metrics, clip_and_log_transform, fetch_dataset_df
         | 
|  | |
| 23 | 
             
                field_validator, 
         | 
| 24 | 
             
                ValidationError
         | 
| 25 | 
             
            )
         | 
| 26 | 
            +
            from loguru import logger
         | 
| 27 |  | 
| 28 | 
             
            HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
         | 
| 29 | 
             
            def _safeify_username(username: str) -> str:
         | 
|  | |
| 190 | 
             
                return "β
 Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
         | 
| 191 |  | 
| 192 | 
             
            def evaluate_data(filename: str) -> None:
         | 
| 193 | 
            +
                # do test set first as a more stringent check of the submission w.r.t matching molecules
         | 
| 194 | 
            +
                logger.info(f"Evaluating submission file {filename}")
         | 
| 195 | 
            +
                # evaluate on the test set
         | 
| 196 | 
            +
                _evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test.csv", results_repo=results_repo_test)
         | 
| 197 | 
            +
                # evaluate on the validation set
         | 
| 198 | 
            +
                _evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test_validation.csv", results_repo=results_repo_validation)
         | 
| 199 | 
            +
                logger.info(f"Finished evaluating submission file {filename}")
         | 
| 200 | 
            +
             | 
| 201 | 
            +
            def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_repo: str) -> None:
         | 
| 202 |  | 
| 203 | 
             
                # Load the submission csv
         | 
| 204 | 
             
                try:
         | 
|  | |
| 215 | 
             
                    test_path = hf_hub_download(
         | 
| 216 | 
             
                        repo_id=test_repo,
         | 
| 217 | 
             
                        repo_type="dataset",
         | 
| 218 | 
            +
                        filename=split_filename
         | 
| 219 | 
             
                    )
         | 
| 220 | 
             
                except Exception as e:
         | 
| 221 | 
             
                    raise gr.Error(f"Failed to download test file: {e}")
         | 
|  | |
| 287 | 
             
                _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
         | 
| 288 |  | 
| 289 |  | 
| 290 | 
            +
                   # 2) Check all Molecules in the test set are present in the predictions
         | 
| 291 | 
            +
                if not (test_dataframe['Molecule Name']).isin(results_dataframe['Molecule Name']).all():
         | 
| 292 | 
            +
                    raise gr.Error("Some molecules in the test set are missing from the predictions file. Please ensure all molecules are included.")
         | 
| 293 |  | 
| 294 |  | 
| 295 | 
             
                # 3) check no duplicated molecules in the predictions file
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -4,4 +4,5 @@ huggingface_hub | |
| 4 | 
             
            gradio-leaderboard
         | 
| 5 | 
             
            plotly
         | 
| 6 | 
             
            scipy
         | 
| 7 | 
            -
            scikit-learn
         | 
|  | 
|  | |
| 4 | 
             
            gradio-leaderboard
         | 
| 5 | 
             
            plotly
         | 
| 6 | 
             
            scipy
         | 
| 7 | 
            +
            scikit-learn
         | 
| 8 | 
            +
            loguru
         | 
    	
        utils.py
    CHANGED
    
    | @@ -3,8 +3,9 @@ import pandas as pd | |
| 3 | 
             
            import numpy as np
         | 
| 4 | 
             
            from typing import Tuple
         | 
| 5 | 
             
            from datasets import load_dataset, Features, Value
         | 
| 6 | 
            -
            from about import  | 
| 7 | 
             
            from about import METRICS, STANDARD_COLS
         | 
|  | |
| 8 |  | 
| 9 | 
             
            def make_user_clickable(name: str):
         | 
| 10 | 
             
                link =f'https://huggingface.co/{name}'
         | 
| @@ -13,7 +14,7 @@ def make_tag_clickable(tag: str): | |
| 13 | 
             
                return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
         | 
| 14 |  | 
| 15 | 
             
            def fetch_dataset_df():
         | 
| 16 | 
            -
             | 
| 17 | 
             
                # Specify feature types to load results dataset
         | 
| 18 | 
             
                metric_features = {
         | 
| 19 | 
             
                    f'mean_{m}': Value('float64') for m in METRICS
         | 
| @@ -30,7 +31,7 @@ def fetch_dataset_df(): | |
| 30 | 
             
                }
         | 
| 31 | 
             
                feature_schema = Features(metric_features | other_features)
         | 
| 32 |  | 
| 33 | 
            -
                dset = load_dataset( | 
| 34 | 
             
                                    split='train', 
         | 
| 35 | 
             
                                    features=feature_schema,
         | 
| 36 | 
             
                                    download_mode="force_redownload")
         | 
|  | |
| 3 | 
             
            import numpy as np
         | 
| 4 | 
             
            from typing import Tuple
         | 
| 5 | 
             
            from datasets import load_dataset, Features, Value
         | 
| 6 | 
            +
            from about import results_repo_validation, results_repo_test
         | 
| 7 | 
             
            from about import METRICS, STANDARD_COLS
         | 
| 8 | 
            +
            from loguru import logger
         | 
| 9 |  | 
| 10 | 
             
            def make_user_clickable(name: str):
         | 
| 11 | 
             
                link =f'https://huggingface.co/{name}'
         | 
|  | |
| 14 | 
             
                return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
         | 
| 15 |  | 
| 16 | 
             
            def fetch_dataset_df():
         | 
| 17 | 
            +
                logger.info("Fetching latest results dataset from Hugging Face Hub...")
         | 
| 18 | 
             
                # Specify feature types to load results dataset
         | 
| 19 | 
             
                metric_features = {
         | 
| 20 | 
             
                    f'mean_{m}': Value('float64') for m in METRICS
         | 
|  | |
| 31 | 
             
                }
         | 
| 32 | 
             
                feature_schema = Features(metric_features | other_features)
         | 
| 33 |  | 
| 34 | 
            +
                dset = load_dataset(results_repo_validation, # change to results_repo_test for test set
         | 
| 35 | 
             
                                    split='train', 
         | 
| 36 | 
             
                                    features=feature_schema,
         | 
| 37 | 
             
                                    download_mode="force_redownload")
         | 

