| import hashlib |
| import io |
| import json |
| import os |
| import zipfile |
| from datetime import datetime, timezone |
| from typing import Any |
|
|
| import pandas as pd |
| from huggingface_hub import HfApi, hf_hub_download |
|
|
| API = HfApi() |
|
|
| SUBMISSIONS_REPO = os.environ.get("SUBMISSIONS_REPO", "your-org/the-well-submissions") |
| RESULTS_REPO = os.environ.get("RESULTS_REPO", "your-org/the-well-results") |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| MAX_SUBMISSION_MB = int(os.environ.get("MAX_SUBMISSION_MB", "200")) |
|
|
| EXPECTED_TASK = "turbulent_radiative_layer_2D_1step" |
| RESULT_COLUMNS = [ |
| "rank", |
| "model_name", |
| "team_name", |
| "avg_vrmse", |
| "density_vrmse", |
| "pressure_vrmse", |
| "velocity_x_vrmse", |
| "velocity_y_vrmse", |
| "submitted_at", |
| "status", |
| ] |
|
|
|
|
| def _utc_now_iso() -> str: |
| return datetime.now(timezone.utc).replace(microsecond=0).isoformat() |
|
|
|
|
| def _safe_slug(value: str) -> str: |
| cleaned = "".join(ch if ch.isalnum() or ch in "-_." else "_" for ch in value.strip()) |
| return cleaned[:80] or "submission" |
|
|
|
|
| def _read_submission_manifest(zip_bytes: bytes) -> dict[str, Any]: |
| with zipfile.ZipFile(io.BytesIO(zip_bytes), "r") as zf: |
| names = sorted(zf.namelist()) |
| if names != ["predictions.npz", "submission.json"]: |
| raise ValueError( |
| "The zip must contain exactly two root files: submission.json and predictions.npz." |
| ) |
| with zf.open("submission.json") as f: |
| manifest = json.load(f) |
| if manifest.get("task_name") != EXPECTED_TASK: |
| raise ValueError(f"task_name must be '{EXPECTED_TASK}'.") |
| if not str(manifest.get("model_name", "")).strip(): |
| raise ValueError("submission.json must include a non-empty model_name.") |
| if not str(manifest.get("team_name", "")).strip(): |
| raise ValueError("submission.json must include a non-empty team_name.") |
| return manifest |
|
|
|
|
| def submit_zip(zip_file) -> str: |
| if zip_file is None: |
| return "Please upload a submission `.zip` file." |
|
|
| local_path = zip_file.name |
| if not local_path.lower().endswith(".zip"): |
| return "Invalid file type. Please upload a `.zip` file." |
|
|
| file_size = os.path.getsize(local_path) |
| if file_size > MAX_SUBMISSION_MB * 1024 * 1024: |
| return f"Submission too large. Limit is {MAX_SUBMISSION_MB} MB." |
|
|
| with open(local_path, "rb") as f: |
| zip_bytes = f.read() |
|
|
| try: |
| manifest = _read_submission_manifest(zip_bytes) |
| except Exception as exc: |
| return f"Submission rejected: {exc}" |
|
|
| submitted_at = _utc_now_iso() |
| base_name = _safe_slug(manifest["model_name"]) |
| submission_id = f"{base_name}_{submitted_at}".replace(":", "-") |
| sha256 = hashlib.sha256(zip_bytes).hexdigest() |
|
|
| package_path = f"packages/{submission_id}.zip" |
| metadata_path = f"metadata/{submission_id}.json" |
|
|
| metadata = { |
| "submission_id": submission_id, |
| "task_name": manifest["task_name"], |
| "model_name": manifest["model_name"], |
| "team_name": manifest["team_name"], |
| "method_name": manifest.get("method_name", ""), |
| "submitted_at": submitted_at, |
| "package_path": package_path, |
| "sha256": sha256, |
| "status": "pending", |
| } |
|
|
| API.upload_file( |
| path_or_fileobj=zip_bytes, |
| path_in_repo=package_path, |
| repo_id=SUBMISSIONS_REPO, |
| repo_type="dataset", |
| token=HF_TOKEN, |
| ) |
| API.upload_file( |
| path_or_fileobj=json.dumps(metadata, indent=2).encode("utf-8"), |
| path_in_repo=metadata_path, |
| repo_id=SUBMISSIONS_REPO, |
| repo_type="dataset", |
| token=HF_TOKEN, |
| ) |
|
|
| return ( |
| f"Submission received: `{submission_id}`\n\n" |
| "It was uploaded to the submissions dataset and will appear on the leaderboard " |
| "after the private evaluator processes it." |
| ) |
|
|
|
|
| def _download_json_records(repo_id: str, prefix: str) -> list[dict[str, Any]]: |
| files = [ |
| path |
| for path in API.list_repo_files(repo_id=repo_id, repo_type="dataset", token=HF_TOKEN) |
| if path.startswith(prefix) and path.endswith(".json") |
| ] |
| records = [] |
| for path in files: |
| local_path = hf_hub_download( |
| repo_id=repo_id, |
| repo_type="dataset", |
| filename=path, |
| token=HF_TOKEN, |
| ) |
| with open(local_path, "r", encoding="utf-8") as f: |
| records.append(json.load(f)) |
| return records |
|
|
|
|
| def load_results_dataframe() -> pd.DataFrame: |
| try: |
| records = _download_json_records(RESULTS_REPO, "results/") |
| except Exception: |
| return pd.DataFrame(columns=RESULT_COLUMNS) |
|
|
| if not records: |
| return pd.DataFrame(columns=RESULT_COLUMNS) |
|
|
| df = pd.DataFrame.from_records(records) |
| if "status" in df.columns: |
| df = df[df["status"] == "succeeded"].copy() |
| if df.empty: |
| return pd.DataFrame(columns=RESULT_COLUMNS) |
|
|
| for column in [ |
| "avg_vrmse", |
| "density_vrmse", |
| "pressure_vrmse", |
| "velocity_x_vrmse", |
| "velocity_y_vrmse", |
| ]: |
| df[column] = pd.to_numeric(df[column], errors="coerce") |
|
|
| df = df.sort_values("avg_vrmse", ascending=True).reset_index(drop=True) |
| df.insert(0, "rank", range(1, len(df) + 1)) |
|
|
| for column in RESULT_COLUMNS: |
| if column not in df.columns: |
| df[column] = None |
| return df[RESULT_COLUMNS] |
|
|