Merge branch 'main' of hf.co:spaces/openadmet/OpenADMET_Challenge_Demo
Browse files- about.py +5 -3
- app.py +28 -11
- data/expansion_data_test_blinded.csv +0 -0
- evaluate.py +81 -44
- requirements.txt +2 -1
- utils.py +19 -14
about.py
CHANGED
|
@@ -31,8 +31,10 @@ multiplier_dict = {"LogD": 1,
|
|
| 31 |
|
| 32 |
TOKEN = os.environ.get("HF_TOKEN")
|
| 33 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
|
|
|
| 34 |
API = HfApi(token=TOKEN)
|
| 35 |
organization="OpenADMET"
|
| 36 |
-
submissions_repo = f'{organization}/openadmet-challenge-submissions' # private
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
| 31 |
|
| 32 |
TOKEN = os.environ.get("HF_TOKEN")
|
| 33 |
CACHE_PATH=os.getenv("HF_HOME", ".")
|
| 34 |
+
THROTTLE_MINUTES = 480 # minutes between submissions
|
| 35 |
API = HfApi(token=TOKEN)
|
| 36 |
organization="OpenADMET"
|
| 37 |
+
submissions_repo = f'{organization}/openadmet-expansionrx-challenge-submissions' # private
|
| 38 |
+
results_repo_test = f'{organization}/openadmet-expansionrx-challenge-results' # public
|
| 39 |
+
results_repo_validation = f'{organization}/openadmet-expansionrx-challenge-results-validation' # public
|
| 40 |
+
test_repo = f'{organization}/openadmet-expansionrx-challenge-test-data' # private
|
app.py
CHANGED
|
@@ -10,13 +10,17 @@ from utils import (
|
|
| 10 |
fetch_dataset_df,
|
| 11 |
map_metric_to_stats,
|
| 12 |
)
|
| 13 |
-
|
|
|
|
|
|
|
| 14 |
from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
|
| 15 |
|
| 16 |
|
|
|
|
| 17 |
ALL_EPS = ['Average'] + ENDPOINTS
|
| 18 |
|
| 19 |
def build_leaderboard(df_results):
|
|
|
|
| 20 |
per_ep = {}
|
| 21 |
for ep in ALL_EPS:
|
| 22 |
df = df_results[df_results["Endpoint"] == ep].copy()
|
|
@@ -43,7 +47,7 @@ def build_leaderboard(df_results):
|
|
| 43 |
sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
|
| 44 |
sorted_df = map_metric_to_stats(sorted_df)
|
| 45 |
per_ep[ep] = sorted_df[LB_COLS]
|
| 46 |
-
|
| 47 |
return per_ep
|
| 48 |
|
| 49 |
# Initialize global dataframe
|
|
@@ -53,8 +57,8 @@ def gradio_interface():
|
|
| 53 |
|
| 54 |
with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
| 55 |
theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 56 |
-
timer = gr.Timer(
|
| 57 |
-
data_version = gr.State(0) # Track data changes
|
| 58 |
def update_current_dataframe(v):
|
| 59 |
global current_df
|
| 60 |
new_df = fetch_dataset_df()
|
|
@@ -105,7 +109,7 @@ def gradio_interface():
|
|
| 105 |
|
| 106 |
**OpenADMET** aims to address these challenges through an open science effort to build predictive models of ADMET properties by characterizing the proteins and mechanisms
|
| 107 |
that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
|
| 108 |
-
Read more about our strategy to transform drug discovery on our [website](https://openadmet.
|
| 109 |
|
| 110 |
Critical to our mission is developing open datasets and running community blind challenges to assess the current state of the art in ADMET modeling.
|
| 111 |
Building on the sucess of the recent [ASAP-Polaris-OpenADMET blind challenge](https://chemrxiv.org/engage/chemrxiv/article-details/68ac00d1728bf9025e22fe45) in computational methods for drug discovery,
|
|
@@ -128,7 +132,7 @@ def gradio_interface():
|
|
| 128 |
- Mouse Brain Protein Binding (**MBPB**): % Unbound
|
| 129 |
- Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
|
| 130 |
|
| 131 |
-
Find more information about these endpoints on our [blog](https://openadmet.
|
| 132 |
|
| 133 |
## ✅ How to Participate
|
| 134 |
1. **Register**: Create an account with Hugging Face.
|
|
@@ -157,6 +161,9 @@ def gradio_interface():
|
|
| 157 |
|
| 158 |
You can download the training data from the [Hugging Face dataset](https://huggingface.co/datasets/openadmet/openadmet-challenge-train-data).
|
| 159 |
The test set will remained blinded until the challenge submission deadline. You will be tasked with predicting the same set of ADMET endpoints for the test set molecules.
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
## 📝 Evaluation
|
| 162 |
The challenge will be judged based on the following criteria:
|
|
@@ -172,12 +179,16 @@ def gradio_interface():
|
|
| 172 |
|
| 173 |
📅 **Timeline**:
|
| 174 |
- **September 16:** Challenge announcement
|
| 175 |
-
- **October
|
| 176 |
- **October 27:** Challenge starts
|
| 177 |
- **October-November:** Online Q&A sessions and support via the Discord channel
|
| 178 |
- **January 19, 2026:** Submission closes
|
| 179 |
- **January 26, 2026:** Winners announced
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
---
|
| 182 |
|
| 183 |
"""
|
|
@@ -242,7 +253,7 @@ def gradio_interface():
|
|
| 242 |
select_columns=LB_AVG,
|
| 243 |
search_columns=["user"],
|
| 244 |
render=True,
|
| 245 |
-
every=
|
| 246 |
)
|
| 247 |
# per-endpoint leaderboard
|
| 248 |
for endpoint in ENDPOINTS:
|
|
@@ -253,7 +264,7 @@ def gradio_interface():
|
|
| 253 |
select_columns=LB_COLS,
|
| 254 |
search_columns=["user"],
|
| 255 |
render=True,
|
| 256 |
-
every=
|
| 257 |
)
|
| 258 |
# Auto-refresh
|
| 259 |
def refresh_if_changed():
|
|
@@ -346,11 +357,15 @@ def gradio_interface():
|
|
| 346 |
Only your latest submission will be considered.
|
| 347 |
|
| 348 |
Download a CSV file with the compounds in the test set here:
|
|
|
|
|
|
|
|
|
|
| 349 |
"""
|
| 350 |
)
|
|
|
|
| 351 |
download_btn = gr.DownloadButton(
|
| 352 |
label="📥 Download Test Set Compounds",
|
| 353 |
-
value="data/expansion_data_test_blinded.csv",
|
| 354 |
variant="secondary",
|
| 355 |
)
|
| 356 |
with gr.Column():
|
|
@@ -392,4 +407,6 @@ def gradio_interface():
|
|
| 392 |
return demo
|
| 393 |
|
| 394 |
if __name__ == "__main__":
|
| 395 |
-
|
|
|
|
|
|
|
|
|
| 10 |
fetch_dataset_df,
|
| 11 |
map_metric_to_stats,
|
| 12 |
)
|
| 13 |
+
from datasets import load_dataset
|
| 14 |
+
import tempfile
|
| 15 |
+
from loguru import logger
|
| 16 |
from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
|
| 17 |
|
| 18 |
|
| 19 |
+
|
| 20 |
ALL_EPS = ['Average'] + ENDPOINTS
|
| 21 |
|
| 22 |
def build_leaderboard(df_results):
|
| 23 |
+
logger.info("Rebuilding leaderboard data...")
|
| 24 |
per_ep = {}
|
| 25 |
for ep in ALL_EPS:
|
| 26 |
df = df_results[df_results["Endpoint"] == ep].copy()
|
|
|
|
| 47 |
sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
|
| 48 |
sorted_df = map_metric_to_stats(sorted_df)
|
| 49 |
per_ep[ep] = sorted_df[LB_COLS]
|
| 50 |
+
logger.info("Finished rebuilding leaderboard data.")
|
| 51 |
return per_ep
|
| 52 |
|
| 53 |
# Initialize global dataframe
|
|
|
|
| 57 |
|
| 58 |
with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
|
| 59 |
theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
|
| 60 |
+
timer = gr.Timer(30) # Run every 30 seconds when page is focused
|
| 61 |
+
data_version = gr.State(0) # Track data changes
|
| 62 |
def update_current_dataframe(v):
|
| 63 |
global current_df
|
| 64 |
new_df = fetch_dataset_df()
|
|
|
|
| 109 |
|
| 110 |
**OpenADMET** aims to address these challenges through an open science effort to build predictive models of ADMET properties by characterizing the proteins and mechanisms
|
| 111 |
that give rise to these properties through integrated structural biology, high throughput experimentation and integrative computational models.
|
| 112 |
+
Read more about our strategy to transform drug discovery on our [website](https://openadmet.ghost.io/what-is-openadmet/).
|
| 113 |
|
| 114 |
Critical to our mission is developing open datasets and running community blind challenges to assess the current state of the art in ADMET modeling.
|
| 115 |
Building on the sucess of the recent [ASAP-Polaris-OpenADMET blind challenge](https://chemrxiv.org/engage/chemrxiv/article-details/68ac00d1728bf9025e22fe45) in computational methods for drug discovery,
|
|
|
|
| 132 |
- Mouse Brain Protein Binding (**MBPB**): % Unbound
|
| 133 |
- Mouse Gastrocnemius Muscle Binding (**MGMB**): % Unbound
|
| 134 |
|
| 135 |
+
Find more information about these endpoints on our [blog](https://openadmet.ghost.io/openadmet-expansionrx-blind-challenge/).
|
| 136 |
|
| 137 |
## ✅ How to Participate
|
| 138 |
1. **Register**: Create an account with Hugging Face.
|
|
|
|
| 161 |
|
| 162 |
You can download the training data from the [Hugging Face dataset](https://huggingface.co/datasets/openadmet/openadmet-challenge-train-data).
|
| 163 |
The test set will remained blinded until the challenge submission deadline. You will be tasked with predicting the same set of ADMET endpoints for the test set molecules.
|
| 164 |
+
|
| 165 |
+
The training and blinded test set will also be made available on the [CDD Vault](https://www.collaborativedrug.com/). An account to access the CDD Vault can be requested by emailing **openadmet@omsf.io**.
|
| 166 |
+
Note that by joining the Vault, your account will be visible to other participants, so this option is **not recommended for those wishing to remain anonymous.**
|
| 167 |
|
| 168 |
## 📝 Evaluation
|
| 169 |
The challenge will be judged based on the following criteria:
|
|
|
|
| 179 |
|
| 180 |
📅 **Timeline**:
|
| 181 |
- **September 16:** Challenge announcement
|
| 182 |
+
- **October 14:** Second announcement and sample data release
|
| 183 |
- **October 27:** Challenge starts
|
| 184 |
- **October-November:** Online Q&A sessions and support via the Discord channel
|
| 185 |
- **January 19, 2026:** Submission closes
|
| 186 |
- **January 26, 2026:** Winners announced
|
| 187 |
|
| 188 |
+
## Acknowledgements
|
| 189 |
+
We gratefully acknowledge Jon Ainsley, Andrew Good, Elyse Bourque, Lakshminarayana Vogeti, Renato Skerlj, Tiansheng Wang, and Mark Ledeboer for generously
|
| 190 |
+
providing the Expansion Therapeutics dataset used in this challenge as an in-kind contribution.
|
| 191 |
+
|
| 192 |
---
|
| 193 |
|
| 194 |
"""
|
|
|
|
| 253 |
select_columns=LB_AVG,
|
| 254 |
search_columns=["user"],
|
| 255 |
render=True,
|
| 256 |
+
every=30,
|
| 257 |
)
|
| 258 |
# per-endpoint leaderboard
|
| 259 |
for endpoint in ENDPOINTS:
|
|
|
|
| 264 |
select_columns=LB_COLS,
|
| 265 |
search_columns=["user"],
|
| 266 |
render=True,
|
| 267 |
+
every=30,
|
| 268 |
)
|
| 269 |
# Auto-refresh
|
| 270 |
def refresh_if_changed():
|
|
|
|
| 357 |
Only your latest submission will be considered.
|
| 358 |
|
| 359 |
Download a CSV file with the compounds in the test set here:
|
| 360 |
+
|
| 361 |
+
**NOTE: Submission can sometimes take a few minutes to process**
|
| 362 |
+
**Please be patient and wait for the status message to update and your submission to reach the leaderboard.**
|
| 363 |
"""
|
| 364 |
)
|
| 365 |
+
|
| 366 |
download_btn = gr.DownloadButton(
|
| 367 |
label="📥 Download Test Set Compounds",
|
| 368 |
+
value="./data/expansion_data_test_blinded.csv",
|
| 369 |
variant="secondary",
|
| 370 |
)
|
| 371 |
with gr.Column():
|
|
|
|
| 407 |
return demo
|
| 408 |
|
| 409 |
if __name__ == "__main__":
|
| 410 |
+
logger.info("Starting Gradio app...")
|
| 411 |
+
gradio_interface().launch(ssr_mode=False)
|
| 412 |
+
logger.info("Gradio app closed.")
|
data/expansion_data_test_blinded.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
evaluate.py
CHANGED
|
@@ -5,11 +5,12 @@ from typing import Optional
|
|
| 5 |
from about import (
|
| 6 |
ENDPOINTS, API,
|
| 7 |
submissions_repo,
|
| 8 |
-
|
|
|
|
| 9 |
test_repo,
|
| 10 |
-
|
| 11 |
)
|
| 12 |
-
from utils import bootstrap_metrics,
|
| 13 |
from huggingface_hub import hf_hub_download
|
| 14 |
import datetime
|
| 15 |
import io
|
|
@@ -22,6 +23,7 @@ from pydantic import (
|
|
| 22 |
field_validator,
|
| 23 |
ValidationError
|
| 24 |
)
|
|
|
|
| 25 |
|
| 26 |
HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
|
| 27 |
def _safeify_username(username: str) -> str:
|
|
@@ -107,6 +109,16 @@ def submit_data(predictions_file: str,
|
|
| 107 |
if user_state is None:
|
| 108 |
raise gr.Error("Username or alias is required for submission.")
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
file_path = Path(predictions_file).resolve()
|
| 111 |
if not file_path.exists():
|
| 112 |
raise gr.Error("Uploaded file object does not have a valid file path.")
|
|
@@ -178,6 +190,15 @@ def submit_data(predictions_file: str,
|
|
| 178 |
return "✅ Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
|
| 179 |
|
| 180 |
def evaluate_data(filename: str) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
# Load the submission csv
|
| 183 |
try:
|
|
@@ -194,7 +215,7 @@ def evaluate_data(filename: str) -> None:
|
|
| 194 |
test_path = hf_hub_download(
|
| 195 |
repo_id=test_repo,
|
| 196 |
repo_type="dataset",
|
| 197 |
-
filename=
|
| 198 |
)
|
| 199 |
except Exception as e:
|
| 200 |
raise gr.Error(f"Failed to download test file: {e}")
|
|
@@ -252,6 +273,7 @@ def evaluate_data(filename: str) -> None:
|
|
| 252 |
Path(tmp_name).unlink()
|
| 253 |
|
| 254 |
|
|
|
|
| 255 |
def calculate_metrics(
|
| 256 |
results_dataframe: pd.DataFrame,
|
| 257 |
test_dataframe: pd.DataFrame
|
|
@@ -263,60 +285,75 @@ def calculate_metrics(
|
|
| 263 |
# 1) Check all columns are present
|
| 264 |
_check_required_columns(results_dataframe, "Results file", ["Molecule Name"] + ENDPOINTS)
|
| 265 |
_check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
|
| 272 |
-
# Compute leaderboard DataFrame
|
| 273 |
final_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"]
|
| 274 |
all_endpoint_results = []
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
|
|
|
| 285 |
|
| 286 |
-
#
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
y_true = merged[true_col].to_numpy()
|
| 307 |
-
# Calculate dataframe with the metrics for 1000 bootstraps
|
| 308 |
-
bootstrap_df = bootstrap_metrics(y_pred, y_true, measurement, n_bootstrap_samples=1000)
|
| 309 |
df_endpoint = bootstrap_df.pivot_table(
|
| 310 |
index=["Endpoint"],
|
| 311 |
columns="Metric",
|
| 312 |
values="Value",
|
| 313 |
aggfunc=["mean", "std"]
|
| 314 |
).reset_index()
|
|
|
|
| 315 |
# Get a df with columns 'mean_MAE', 'std_MAE', ...
|
| 316 |
df_endpoint.columns = [
|
| 317 |
f'{i}_{j}' if i != '' else j for i, j in df_endpoint.columns
|
| 318 |
]
|
| 319 |
-
|
|
|
|
| 320 |
all_endpoint_results.append(df_endpoint)
|
| 321 |
|
| 322 |
df_results = pd.concat(all_endpoint_results, ignore_index=True)
|
|
|
|
| 5 |
from about import (
|
| 6 |
ENDPOINTS, API,
|
| 7 |
submissions_repo,
|
| 8 |
+
results_repo_test,
|
| 9 |
+
results_repo_validation,
|
| 10 |
test_repo,
|
| 11 |
+
THROTTLE_MINUTES
|
| 12 |
)
|
| 13 |
+
from utils import bootstrap_metrics, clip_and_log_transform, fetch_dataset_df
|
| 14 |
from huggingface_hub import hf_hub_download
|
| 15 |
import datetime
|
| 16 |
import io
|
|
|
|
| 23 |
field_validator,
|
| 24 |
ValidationError
|
| 25 |
)
|
| 26 |
+
from loguru import logger
|
| 27 |
|
| 28 |
HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
|
| 29 |
def _safeify_username(username: str) -> str:
|
|
|
|
| 109 |
if user_state is None:
|
| 110 |
raise gr.Error("Username or alias is required for submission.")
|
| 111 |
|
| 112 |
+
|
| 113 |
+
# check the last time the user submitted
|
| 114 |
+
data = fetch_dataset_df()
|
| 115 |
+
if not data[data['user'] == user_state].empty:
|
| 116 |
+
last_time = data[data['user'] == user_state]['submission time'].max()
|
| 117 |
+
delta = datetime.datetime.now(datetime.timezone.utc) - last_time.to_pydatetime()
|
| 118 |
+
if delta < datetime.timedelta(minutes=THROTTLE_MINUTES):
|
| 119 |
+
raise gr.Error(f"You have submitted within the last {THROTTLE_MINUTES} minutes. Please wait {THROTTLE_MINUTES - int(delta.total_seconds() // 60)} minutes before submitting again.")
|
| 120 |
+
|
| 121 |
+
|
| 122 |
file_path = Path(predictions_file).resolve()
|
| 123 |
if not file_path.exists():
|
| 124 |
raise gr.Error("Uploaded file object does not have a valid file path.")
|
|
|
|
| 190 |
return "✅ Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
|
| 191 |
|
| 192 |
def evaluate_data(filename: str) -> None:
|
| 193 |
+
# do test set first as a more stringent check of the submission w.r.t matching molecules
|
| 194 |
+
logger.info(f"Evaluating submission file {filename}")
|
| 195 |
+
# evaluate on the test set
|
| 196 |
+
_evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test.csv", results_repo=results_repo_test)
|
| 197 |
+
# evaluate on the validation set
|
| 198 |
+
_evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test_validation.csv", results_repo=results_repo_validation)
|
| 199 |
+
logger.info(f"Finished evaluating submission file {filename}")
|
| 200 |
+
|
| 201 |
+
def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_repo: str) -> None:
|
| 202 |
|
| 203 |
# Load the submission csv
|
| 204 |
try:
|
|
|
|
| 215 |
test_path = hf_hub_download(
|
| 216 |
repo_id=test_repo,
|
| 217 |
repo_type="dataset",
|
| 218 |
+
filename=split_filename
|
| 219 |
)
|
| 220 |
except Exception as e:
|
| 221 |
raise gr.Error(f"Failed to download test file: {e}")
|
|
|
|
| 273 |
Path(tmp_name).unlink()
|
| 274 |
|
| 275 |
|
| 276 |
+
|
| 277 |
def calculate_metrics(
|
| 278 |
results_dataframe: pd.DataFrame,
|
| 279 |
test_dataframe: pd.DataFrame
|
|
|
|
| 285 |
# 1) Check all columns are present
|
| 286 |
_check_required_columns(results_dataframe, "Results file", ["Molecule Name"] + ENDPOINTS)
|
| 287 |
_check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
# 2) Check all Molecules in the test set are present in the predictions
|
| 291 |
+
if not (test_dataframe['Molecule Name']).isin(results_dataframe['Molecule Name']).all():
|
| 292 |
+
raise gr.Error("Some molecules in the test set are missing from the predictions file. Please ensure all molecules are included.")
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
# 3) check no duplicated molecules in the predictions file
|
| 296 |
+
if results_dataframe['Molecule Name'].duplicated().any():
|
| 297 |
+
raise gr.Error("The predictions file contains duplicated molecules. Please ensure each molecule is only listed once.")
|
| 298 |
+
|
| 299 |
+
# 4) Merge dataframes to ensure alignment
|
| 300 |
+
merged_df = results_dataframe.merge(
|
| 301 |
+
test_dataframe,
|
| 302 |
+
on="Molecule Name",
|
| 303 |
+
suffixes=('_pred', '_true'),
|
| 304 |
+
how="inner"
|
| 305 |
+
)
|
| 306 |
+
merged_df = merged_df.sort_values("Molecule Name")
|
| 307 |
+
|
| 308 |
+
# 5) loop over endpoints
|
| 309 |
|
|
|
|
| 310 |
final_cols = ["MAE", "RAE", "R2", "Spearman R", "Kendall's Tau"]
|
| 311 |
all_endpoint_results = []
|
| 312 |
+
|
| 313 |
+
for ept in ENDPOINTS:
|
| 314 |
+
pred_col = f"{ept}_pred"
|
| 315 |
+
true_col = f"{ept}_true"
|
| 316 |
+
|
| 317 |
+
# cast to numeric, coerce errors to NaN
|
| 318 |
+
merged_df[pred_col] = pd.to_numeric(merged_df[pred_col], errors="coerce")
|
| 319 |
+
merged_df[true_col] = pd.to_numeric(merged_df[true_col], errors="coerce")
|
| 320 |
+
|
| 321 |
+
if merged_df[pred_col].isnull().all():
|
| 322 |
+
raise gr.Error(f"All predictions are missing for endpoint {ept}. Please provide valid predictions.")
|
| 323 |
|
| 324 |
+
# subset and drop NaNs
|
| 325 |
+
subset = merged_df[[pred_col, true_col]].dropna()
|
| 326 |
+
if subset.empty:
|
| 327 |
+
raise gr.Error(f"No valid data available for endpoint {ept} after removing NaNs.")
|
| 328 |
+
|
| 329 |
+
# extract numpy arrays
|
| 330 |
+
y_pred = subset[pred_col].to_numpy()
|
| 331 |
+
y_true = subset[true_col].to_numpy()
|
| 332 |
+
|
| 333 |
+
# apply log10 + 1 transform except for logD
|
| 334 |
+
if ept.lower() not in ['logd']:
|
| 335 |
+
y_true_log = clip_and_log_transform(y_true)
|
| 336 |
+
y_pred_log = clip_and_log_transform(y_pred)
|
| 337 |
+
|
| 338 |
+
else:
|
| 339 |
+
y_true_log = y_true
|
| 340 |
+
y_pred_log = y_pred
|
| 341 |
+
|
| 342 |
+
# calculate metrics with bootstrapping
|
| 343 |
+
bootstrap_df = bootstrap_metrics(y_pred_log, y_true_log, ept, n_bootstrap_samples=1000)
|
|
|
|
|
|
|
|
|
|
| 344 |
df_endpoint = bootstrap_df.pivot_table(
|
| 345 |
index=["Endpoint"],
|
| 346 |
columns="Metric",
|
| 347 |
values="Value",
|
| 348 |
aggfunc=["mean", "std"]
|
| 349 |
).reset_index()
|
| 350 |
+
|
| 351 |
# Get a df with columns 'mean_MAE', 'std_MAE', ...
|
| 352 |
df_endpoint.columns = [
|
| 353 |
f'{i}_{j}' if i != '' else j for i, j in df_endpoint.columns
|
| 354 |
]
|
| 355 |
+
|
| 356 |
+
df_endpoint.rename(columns={'Endpoint_': 'Endpoint'}, inplace=True)
|
| 357 |
all_endpoint_results.append(df_endpoint)
|
| 358 |
|
| 359 |
df_results = pd.concat(all_endpoint_results, ignore_index=True)
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ huggingface_hub
|
|
| 4 |
gradio-leaderboard
|
| 5 |
plotly
|
| 6 |
scipy
|
| 7 |
-
scikit-learn
|
|
|
|
|
|
| 4 |
gradio-leaderboard
|
| 5 |
plotly
|
| 6 |
scipy
|
| 7 |
+
scikit-learn
|
| 8 |
+
loguru
|
utils.py
CHANGED
|
@@ -3,8 +3,9 @@ import pandas as pd
|
|
| 3 |
import numpy as np
|
| 4 |
from typing import Tuple
|
| 5 |
from datasets import load_dataset, Features, Value
|
| 6 |
-
from about import
|
| 7 |
from about import METRICS, STANDARD_COLS
|
|
|
|
| 8 |
|
| 9 |
def make_user_clickable(name: str):
|
| 10 |
link =f'https://huggingface.co/{name}'
|
|
@@ -13,7 +14,7 @@ def make_tag_clickable(tag: str):
|
|
| 13 |
return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
|
| 14 |
|
| 15 |
def fetch_dataset_df():
|
| 16 |
-
|
| 17 |
# Specify feature types to load results dataset
|
| 18 |
metric_features = {
|
| 19 |
f'mean_{m}': Value('float64') for m in METRICS
|
|
@@ -30,7 +31,7 @@ def fetch_dataset_df():
|
|
| 30 |
}
|
| 31 |
feature_schema = Features(metric_features | other_features)
|
| 32 |
|
| 33 |
-
dset = load_dataset(
|
| 34 |
split='train',
|
| 35 |
features=feature_schema,
|
| 36 |
download_mode="force_redownload")
|
|
@@ -57,11 +58,19 @@ def fetch_dataset_df():
|
|
| 57 |
latest.rename(columns={"submission_time": "submission time"}, inplace=True)
|
| 58 |
return latest
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
def bootstrap_sampling(size: int, n_samples: int) -> np.ndarray:
|
| 67 |
"""
|
|
@@ -87,14 +96,12 @@ def metrics_per_ep(pred: np.ndarray,
|
|
| 87 |
true: np.ndarray
|
| 88 |
)->Tuple[float, float, float, float]:
|
| 89 |
"""Predict evaluation metrics for a single sample
|
| 90 |
-
|
| 91 |
Parameters
|
| 92 |
----------
|
| 93 |
pred : np.ndarray
|
| 94 |
Array with predictions
|
| 95 |
true : np.ndarray
|
| 96 |
Array with actual values
|
| 97 |
-
|
| 98 |
Returns
|
| 99 |
-------
|
| 100 |
Tuple[float, float, float, float]
|
|
@@ -119,12 +126,11 @@ def metrics_per_ep(pred: np.ndarray,
|
|
| 119 |
return mae, rae, r2, spr, ktau
|
| 120 |
|
| 121 |
def bootstrap_metrics(pred: np.ndarray,
|
| 122 |
-
true: np.ndarray,
|
| 123 |
-
endpoint: str,
|
| 124 |
n_bootstrap_samples=1000
|
| 125 |
)->pd.DataFrame:
|
| 126 |
"""Calculate bootstrap metrics given predicted and true values
|
| 127 |
-
|
| 128 |
Parameters
|
| 129 |
----------
|
| 130 |
pred : np.ndarray
|
|
@@ -135,7 +141,6 @@ def bootstrap_metrics(pred: np.ndarray,
|
|
| 135 |
String with endpoint
|
| 136 |
n_bootstrap_samples : int, optional
|
| 137 |
Size of bootstrapsample, by default 1000
|
| 138 |
-
|
| 139 |
Returns
|
| 140 |
-------
|
| 141 |
pd.DataFrame
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
from typing import Tuple
|
| 5 |
from datasets import load_dataset, Features, Value
|
| 6 |
+
from about import results_repo_validation, results_repo_test
|
| 7 |
from about import METRICS, STANDARD_COLS
|
| 8 |
+
from loguru import logger
|
| 9 |
|
| 10 |
def make_user_clickable(name: str):
|
| 11 |
link =f'https://huggingface.co/{name}'
|
|
|
|
| 14 |
return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
|
| 15 |
|
| 16 |
def fetch_dataset_df():
|
| 17 |
+
logger.info("Fetching latest results dataset from Hugging Face Hub...")
|
| 18 |
# Specify feature types to load results dataset
|
| 19 |
metric_features = {
|
| 20 |
f'mean_{m}': Value('float64') for m in METRICS
|
|
|
|
| 31 |
}
|
| 32 |
feature_schema = Features(metric_features | other_features)
|
| 33 |
|
| 34 |
+
dset = load_dataset(results_repo_validation, # change to results_repo_test for test set
|
| 35 |
split='train',
|
| 36 |
features=feature_schema,
|
| 37 |
download_mode="force_redownload")
|
|
|
|
| 58 |
latest.rename(columns={"submission_time": "submission time"}, inplace=True)
|
| 59 |
return latest
|
| 60 |
|
| 61 |
+
|
| 62 |
+
def clip_and_log_transform(y: np.ndarray):
|
| 63 |
+
"""
|
| 64 |
+
Clip to a detection limit and transform to log10 scale.
|
| 65 |
+
|
| 66 |
+
Parameters
|
| 67 |
+
----------
|
| 68 |
+
y : np.ndarray
|
| 69 |
+
The array to be clipped and transformed.
|
| 70 |
+
"""
|
| 71 |
+
y = np.clip(y, a_min=0, a_max=None)
|
| 72 |
+
return np.log10(y + 1)
|
| 73 |
+
|
| 74 |
|
| 75 |
def bootstrap_sampling(size: int, n_samples: int) -> np.ndarray:
|
| 76 |
"""
|
|
|
|
| 96 |
true: np.ndarray
|
| 97 |
)->Tuple[float, float, float, float]:
|
| 98 |
"""Predict evaluation metrics for a single sample
|
|
|
|
| 99 |
Parameters
|
| 100 |
----------
|
| 101 |
pred : np.ndarray
|
| 102 |
Array with predictions
|
| 103 |
true : np.ndarray
|
| 104 |
Array with actual values
|
|
|
|
| 105 |
Returns
|
| 106 |
-------
|
| 107 |
Tuple[float, float, float, float]
|
|
|
|
| 126 |
return mae, rae, r2, spr, ktau
|
| 127 |
|
| 128 |
def bootstrap_metrics(pred: np.ndarray,
|
| 129 |
+
true: np.ndarray,
|
| 130 |
+
endpoint: str,
|
| 131 |
n_bootstrap_samples=1000
|
| 132 |
)->pd.DataFrame:
|
| 133 |
"""Calculate bootstrap metrics given predicted and true values
|
|
|
|
| 134 |
Parameters
|
| 135 |
----------
|
| 136 |
pred : np.ndarray
|
|
|
|
| 141 |
String with endpoint
|
| 142 |
n_bootstrap_samples : int, optional
|
| 143 |
Size of bootstrapsample, by default 1000
|
|
|
|
| 144 |
Returns
|
| 145 |
-------
|
| 146 |
pd.DataFrame
|