hmacdope commited on
Commit
cbd8177
Β·
1 Parent(s): dcf55ea

switch to dual eval system

Browse files
Files changed (5) hide show
  1. about.py +2 -1
  2. app.py +12 -8
  3. evaluate.py +16 -6
  4. requirements.txt +2 -1
  5. utils.py +4 -3
about.py CHANGED
@@ -35,5 +35,6 @@ THROTTLE_MINUTES = 480 # minutes between submissions
35
  API = HfApi(token=TOKEN)
36
  organization="OpenADMET"
37
  submissions_repo = f'{organization}/openadmet-expansionrx-challenge-submissions' # private
38
- results_repo = f'{organization}/openadmet-expansionrx-challenge-results' # public
 
39
  test_repo = f'{organization}/openadmet-expansionrx-challenge-test-data' # private
 
35
  API = HfApi(token=TOKEN)
36
  organization="OpenADMET"
37
  submissions_repo = f'{organization}/openadmet-expansionrx-challenge-submissions' # private
38
+ results_repo_test = f'{organization}/openadmet-expansionrx-challenge-results' # public
39
+ results_repo_validation = f'{organization}/openadmet-expansionrx-challenge-results-validation' # public
40
  test_repo = f'{organization}/openadmet-expansionrx-challenge-test-data' # private
app.py CHANGED
@@ -12,13 +12,15 @@ from utils import (
12
  )
13
  from datasets import load_dataset
14
  import tempfile
15
-
16
  from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
17
 
18
 
 
19
  ALL_EPS = ['Average'] + ENDPOINTS
20
 
21
  def build_leaderboard(df_results):
 
22
  per_ep = {}
23
  for ep in ALL_EPS:
24
  df = df_results[df_results["Endpoint"] == ep].copy()
@@ -45,7 +47,7 @@ def build_leaderboard(df_results):
45
  sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
46
  sorted_df = map_metric_to_stats(sorted_df)
47
  per_ep[ep] = sorted_df[LB_COLS]
48
-
49
  return per_ep
50
 
51
  # Initialize global dataframe
@@ -55,8 +57,8 @@ def gradio_interface():
55
 
56
  with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
57
  theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
58
- timer = gr.Timer(15) # Run every 15 seconds when page is focused
59
- data_version = gr.State(0) # Track data changes
60
  def update_current_dataframe(v):
61
  global current_df
62
  new_df = fetch_dataset_df()
@@ -174,7 +176,7 @@ def gradio_interface():
174
 
175
  πŸ“… **Timeline**:
176
  - **September 16:** Challenge announcement
177
- - **October XX:** Second announcement and sample data release
178
  - **October 27:** Challenge starts
179
  - **October-November:** Online Q&A sessions and support via the Discord channel
180
  - **January 19, 2026:** Submission closes
@@ -244,7 +246,7 @@ def gradio_interface():
244
  select_columns=LB_AVG,
245
  search_columns=["user"],
246
  render=True,
247
- every=15,
248
  )
249
  # per-endpoint leaderboard
250
  for endpoint in ENDPOINTS:
@@ -255,7 +257,7 @@ def gradio_interface():
255
  select_columns=LB_COLS,
256
  search_columns=["user"],
257
  render=True,
258
- every=15,
259
  )
260
  # Auto-refresh
261
  def refresh_if_changed():
@@ -395,4 +397,6 @@ def gradio_interface():
395
  return demo
396
 
397
  if __name__ == "__main__":
398
- gradio_interface().launch(ssr_mode=False)
 
 
 
12
  )
13
  from datasets import load_dataset
14
  import tempfile
15
+ from loguru import logger
16
  from about import ENDPOINTS, LB_COLS, LB_AVG, LB_DTYPES
17
 
18
 
19
+
20
  ALL_EPS = ['Average'] + ENDPOINTS
21
 
22
  def build_leaderboard(df_results):
23
+ logger.info("Rebuilding leaderboard data...")
24
  per_ep = {}
25
  for ep in ALL_EPS:
26
  df = df_results[df_results["Endpoint"] == ep].copy()
 
47
  sorted_df = df.sort_values(by="mean_MAE", ascending=True, kind="stable")
48
  sorted_df = map_metric_to_stats(sorted_df)
49
  per_ep[ep] = sorted_df[LB_COLS]
50
+ logger.info("Finished rebuilding leaderboard data.")
51
  return per_ep
52
 
53
  # Initialize global dataframe
 
57
 
58
  with gr.Blocks(title="OpenADMET ADMET Challenge", fill_height=False,
59
  theme=gr.themes.Default(text_size=sizes.text_lg)) as demo:
60
+ timer = gr.Timer(30) # Run every 30 seconds when page is focused
61
+ data_version = gr.State(0) # Track data changes
62
  def update_current_dataframe(v):
63
  global current_df
64
  new_df = fetch_dataset_df()
 
176
 
177
  πŸ“… **Timeline**:
178
  - **September 16:** Challenge announcement
179
+ - **October 10:** Second announcement and sample data release
180
  - **October 27:** Challenge starts
181
  - **October-November:** Online Q&A sessions and support via the Discord channel
182
  - **January 19, 2026:** Submission closes
 
246
  select_columns=LB_AVG,
247
  search_columns=["user"],
248
  render=True,
249
+ every=30,
250
  )
251
  # per-endpoint leaderboard
252
  for endpoint in ENDPOINTS:
 
257
  select_columns=LB_COLS,
258
  search_columns=["user"],
259
  render=True,
260
+ every=30,
261
  )
262
  # Auto-refresh
263
  def refresh_if_changed():
 
397
  return demo
398
 
399
  if __name__ == "__main__":
400
+ logger.info("Starting Gradio app...")
401
+ gradio_interface().launch(ssr_mode=False)
402
+ logger.info("Gradio app closed.")
evaluate.py CHANGED
@@ -5,9 +5,9 @@ from typing import Optional
5
  from about import (
6
  ENDPOINTS, API,
7
  submissions_repo,
8
- results_repo,
 
9
  test_repo,
10
- multiplier_dict,
11
  THROTTLE_MINUTES
12
  )
13
  from utils import bootstrap_metrics, clip_and_log_transform, fetch_dataset_df
@@ -23,6 +23,7 @@ from pydantic import (
23
  field_validator,
24
  ValidationError
25
  )
 
26
 
27
  HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
28
  def _safeify_username(username: str) -> str:
@@ -189,6 +190,15 @@ def submit_data(predictions_file: str,
189
  return "βœ… Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
190
 
191
  def evaluate_data(filename: str) -> None:
 
 
 
 
 
 
 
 
 
192
 
193
  # Load the submission csv
194
  try:
@@ -205,7 +215,7 @@ def evaluate_data(filename: str) -> None:
205
  test_path = hf_hub_download(
206
  repo_id=test_repo,
207
  repo_type="dataset",
208
- filename="data/expansion_data_test.csv",
209
  )
210
  except Exception as e:
211
  raise gr.Error(f"Failed to download test file: {e}")
@@ -277,9 +287,9 @@ def calculate_metrics(
277
  _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
278
 
279
 
280
- # 2) Check all Molecules in the test set are present in the predictions
281
- if not (results_dataframe['Molecule Name'].isin(test_dataframe['Molecule Name'])).all():
282
- raise gr.Error("The predictions file is missing some molecules present in the test set. Please ensure all molecules are included.")
283
 
284
 
285
  # 3) check no duplicated molecules in the predictions file
 
5
  from about import (
6
  ENDPOINTS, API,
7
  submissions_repo,
8
+ results_repo_test,
9
+ results_repo_validation,
10
  test_repo,
 
11
  THROTTLE_MINUTES
12
  )
13
  from utils import bootstrap_metrics, clip_and_log_transform, fetch_dataset_df
 
23
  field_validator,
24
  ValidationError
25
  )
26
+ from loguru import logger
27
 
28
  HF_USERNAME_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-_]{1,38})$")
29
  def _safeify_username(username: str) -> str:
 
190
  return "βœ… Your submission has been received! Your scores will appear on the leaderboard shortly.", destination_csv
191
 
192
  def evaluate_data(filename: str) -> None:
193
+ # do test set first as a more stringent check of the submission w.r.t matching molecules
194
+ logger.info(f"Evaluating submission file {filename}")
195
+ # evaluate on the test set
196
+ _evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test.csv", results_repo=results_repo_test)
197
+ # evaluate on the validation set
198
+ _evaluate_data(filename, test_repo=test_repo, split_filename="data/expansion_data_test_validation.csv", results_repo=results_repo_validation)
199
+ logger.info(f"Finished evaluating submission file {filename}")
200
+
201
+ def _evaluate_data(filename: str, test_repo: str, split_filename: str, results_repo: str) -> None:
202
 
203
  # Load the submission csv
204
  try:
 
215
  test_path = hf_hub_download(
216
  repo_id=test_repo,
217
  repo_type="dataset",
218
+ filename=split_filename
219
  )
220
  except Exception as e:
221
  raise gr.Error(f"Failed to download test file: {e}")
 
287
  _check_required_columns(test_dataframe, "Test file", ["Molecule Name"] + ENDPOINTS)
288
 
289
 
290
+ # 2) Check all Molecules in the test set are present in the predictions
291
+ if not (test_dataframe['Molecule Name']).isin(results_dataframe['Molecule Name']).all():
292
+ raise gr.Error("Some molecules in the test set are missing from the predictions file. Please ensure all molecules are included.")
293
 
294
 
295
  # 3) check no duplicated molecules in the predictions file
requirements.txt CHANGED
@@ -4,4 +4,5 @@ huggingface_hub
4
  gradio-leaderboard
5
  plotly
6
  scipy
7
- scikit-learn
 
 
4
  gradio-leaderboard
5
  plotly
6
  scipy
7
+ scikit-learn
8
+ loguru
utils.py CHANGED
@@ -3,8 +3,9 @@ import pandas as pd
3
  import numpy as np
4
  from typing import Tuple
5
  from datasets import load_dataset, Features, Value
6
- from about import results_repo
7
  from about import METRICS, STANDARD_COLS
 
8
 
9
  def make_user_clickable(name: str):
10
  link =f'https://huggingface.co/{name}'
@@ -13,7 +14,7 @@ def make_tag_clickable(tag: str):
13
  return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
14
 
15
  def fetch_dataset_df():
16
-
17
  # Specify feature types to load results dataset
18
  metric_features = {
19
  f'mean_{m}': Value('float64') for m in METRICS
@@ -30,7 +31,7 @@ def fetch_dataset_df():
30
  }
31
  feature_schema = Features(metric_features | other_features)
32
 
33
- dset = load_dataset(results_repo,
34
  split='train',
35
  features=feature_schema,
36
  download_mode="force_redownload")
 
3
  import numpy as np
4
  from typing import Tuple
5
  from datasets import load_dataset, Features, Value
6
+ from about import results_repo_validation, results_repo_test
7
  from about import METRICS, STANDARD_COLS
8
+ from loguru import logger
9
 
10
  def make_user_clickable(name: str):
11
  link =f'https://huggingface.co/{name}'
 
14
  return f'<a target="_blank" href="{tag}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
15
 
16
  def fetch_dataset_df():
17
+ logger.info("Fetching latest results dataset from Hugging Face Hub...")
18
  # Specify feature types to load results dataset
19
  metric_features = {
20
  f'mean_{m}': Value('float64') for m in METRICS
 
31
  }
32
  feature_schema = Features(metric_features | other_features)
33
 
34
+ dset = load_dataset(results_repo_validation, # change to results_repo_test for test set
35
  split='train',
36
  features=feature_schema,
37
  download_mode="force_redownload")