Harshit Ghosh commited on
Commit
65e6c8d
Β·
1 Parent(s): 8b1df1a

some correction

Browse files
Files changed (2) hide show
  1. README.md +24 -0
  2. app.py +99 -23
README.md CHANGED
@@ -22,6 +22,9 @@ Model weights and related inference artifacts are hosted on Hugging Face:
22
 
23
  - [Hugging Face Model Repository](https://huggingface.co/HarshCode/eff_b4_brain)
24
 
 
 
 
25
  ## Detailed Performance Report
26
 
27
  Detailed performance and B4-specific analysis are documented separately in:
@@ -72,6 +75,8 @@ Important variables in `.env`:
72
  - `ICH_FOLD_SELECTION`: `ensemble`, `best`, or fold id (`0` to `4`)
73
  - `ICH_LOCAL_MODE`: enables local directory scanning mode
74
  - `ICH_LOG_LEVEL`: `DEBUG`, `INFO`, `WARNING`, `ERROR`
 
 
75
 
76
  ## Run the Application
77
 
@@ -85,6 +90,25 @@ Open in browser:
85
  http://127.0.0.1:7860
86
  ```
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  ## Basic Usage
89
 
90
  1. Go to the upload page.
 
22
 
23
  - [Hugging Face Model Repository](https://huggingface.co/HarshCode/eff_b4_brain)
24
 
25
+ When model files are not present locally (for example on Render), the app can
26
+ download required artifacts from this Hugging Face repository at runtime.
27
+
28
  ## Detailed Performance Report
29
 
30
  Detailed performance and B4-specific analysis are documented separately in:
 
75
  - `ICH_FOLD_SELECTION`: `ensemble`, `best`, or fold id (`0` to `4`)
76
  - `ICH_LOCAL_MODE`: enables local directory scanning mode
77
  - `ICH_LOG_LEVEL`: `DEBUG`, `INFO`, `WARNING`, `ERROR`
78
+ - `ICH_HF_MODEL_REPO`: Hugging Face model repo used for runtime artifact download
79
+ - `ICH_HF_TOKEN`: optional token (required only if the Hugging Face repo is private)
80
 
81
  ## Run the Application
82
 
 
90
  http://127.0.0.1:7860
91
  ```
92
 
93
+ ## Deploy on Render
94
+
95
+ This repository includes `render.yaml` for Render deployment.
96
+
97
+ 1. Push the repository to GitHub.
98
+ 2. In Render, create a new Blueprint/Web Service from the repository.
99
+ 3. Ensure these environment variables are set in Render:
100
+ - `ICH_HF_MODEL_REPO=HarshCode/eff_b4_brain`
101
+ - `ICH_HF_TOKEN` (only if repo is private)
102
+ - `ICH_SECRET_KEY` (recommended custom value)
103
+ 4. Deploy. The service will start with:
104
+
105
+ ```bash
106
+ gunicorn app:app --bind 0.0.0.0:$PORT --workers 1 --timeout 180
107
+ ```
108
+
109
+ Note: first startup can take longer because model artifacts may be downloaded
110
+ from Hugging Face.
111
+
112
  ## Basic Usage
113
 
114
  1. Go to the upload page.
app.py CHANGED
@@ -27,7 +27,7 @@ import time
27
  import uuid
28
  import zipfile
29
  from collections import Counter
30
- from dataclasses import dataclass, field
31
  from pathlib import Path
32
  from typing import Any
33
 
@@ -36,23 +36,30 @@ try:
36
  except Exception:
37
  load_dotenv = None
38
 
 
39
  try:
40
- import blackbox_recorder as bbr
 
 
 
 
 
 
41
  except Exception:
42
  class _NoopRecorder:
43
- def configure(self, **_kwargs):
44
  return None
45
 
46
- def start(self):
47
  return None
48
 
49
- def stop(self):
50
  return None
51
 
52
- def save_report(self, _path: str):
53
  return None
54
 
55
- def save_json(self, _path: str):
56
  return None
57
 
58
  bbr = _NoopRecorder()
@@ -110,11 +117,13 @@ if load_dotenv is not None:
110
  load_dotenv(BASE_DIR / ".env")
111
 
112
  APP_DEBUG = _env_bool("ICH_APP_DEBUG", True)
113
- APP_PORT = _env_int("ICH_APP_PORT", 7860, minimum=1)
114
  MAX_UPLOAD_MB = _env_int("ICH_MAX_UPLOAD_MB", 2048, minimum=1)
115
  LOG_LEVEL_NAME = os.environ.get("ICH_LOG_LEVEL", "INFO").strip().upper()
116
  LOG_LEVEL = getattr(logging, LOG_LEVEL_NAME, logging.INFO)
117
  SECRET_KEY = os.environ.get("ICH_SECRET_KEY", "").strip()
 
 
118
 
119
  app = Flask(__name__, template_folder="templates", static_folder="static")
120
  app.secret_key = SECRET_KEY or os.urandom(24)
@@ -149,7 +158,7 @@ bbr.configure(
149
  )
150
 
151
 
152
- def _save_trace(image_id: str) -> dict:
153
  """
154
  Save the current blackbox trace to logs/ and return metadata about it.
155
  Called immediately after bbr.stop().
@@ -208,7 +217,7 @@ def _new_batch(total: int, temp_dir: str | None = None) -> str:
208
  return batch_id
209
 
210
 
211
- def _batch_update(batch_id: str, **kw):
212
  """Thread-safe update of a batch record."""
213
  with _BATCHES_LOCK:
214
  if batch_id in _BATCHES:
@@ -313,6 +322,63 @@ _MODEL: dict[str, Any] = {
313
  }
314
 
315
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
  def _ensure_model_loaded() -> bool:
317
  """Lazy-load the ML model on first inference request."""
318
  if _MODEL["loaded"]:
@@ -326,6 +392,15 @@ def _ensure_model_loaded() -> bool:
326
  device = "cuda" if torch.cuda.is_available() else "cpu"
327
  fold_selection = os.environ.get("ICH_FOLD_SELECTION", "ensemble")
328
 
 
 
 
 
 
 
 
 
 
329
  with open(CALIB_JSON) as f:
330
  calib_cfg = json.load(f)
331
 
@@ -372,7 +447,7 @@ def _ensure_model_loaded() -> bool:
372
  return False
373
 
374
 
375
- def _run_inference_on_dcm(dcm_path: Path) -> tuple[dict | None, dict | None]:
376
  """
377
  Run inference on one .dcm file, with blackbox tracing.
378
  Returns (report_dict, trace_metadata) or (None, None) on failure.
@@ -427,10 +502,10 @@ def _run_inference_on_dcm(dcm_path: Path) -> tuple[dict | None, dict | None]:
427
  return report, trace_meta
428
 
429
 
430
- def _append_to_summary_csv(image_id: str, report: dict):
431
  """Append one report row to the summary CSV."""
432
  pred = report["prediction"]
433
- row = {
434
  "image_id": image_id,
435
  "true_label": "",
436
  "screening_outcome": pred["screening_outcome"],
@@ -446,7 +521,7 @@ def _append_to_summary_csv(image_id: str, report: dict):
446
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
447
 
448
  with open(SUMMARY_CSV, "a", newline="", encoding="utf-8") as f:
449
- writer = csv.DictWriter(f, fieldnames=row.keys())
450
  if not file_exists:
451
  writer.writeheader()
452
  writer.writerow(row)
@@ -767,12 +842,12 @@ def filter_cases(
767
  return rows
768
 
769
 
770
- def load_logs() -> list[dict]:
771
  """Scan the logs/ directory and return metadata for each trace."""
772
  if not LOGS_DIR.exists():
773
  return []
774
 
775
- log_files: dict[str, dict] = {} # base_name -> {txt_file, json_file, ...}
776
 
777
  for path in sorted(LOGS_DIR.iterdir(), reverse=True):
778
  if not path.is_file():
@@ -789,7 +864,7 @@ def load_logs() -> list[dict]:
789
  elif path.suffix == ".json":
790
  log_files.setdefault(stem, {})["json_file"] = path.name
791
 
792
- entries = []
793
  for stem in sorted(log_files, reverse=True):
794
  info = log_files[stem]
795
  ts_raw = info.get("timestamp", "")
@@ -815,12 +890,12 @@ def load_logs() -> list[dict]:
815
  # ══════════════════════════════════════════════════════════════════════════
816
 
817
  @app.before_request
818
- def _start_timer():
819
  g._start_time = time.perf_counter()
820
 
821
 
822
  @app.after_request
823
- def _log_timing(response):
824
  elapsed = (time.perf_counter() - getattr(g, "_start_time", time.perf_counter())) * 1000
825
  logger.info("%s %s -> %s (%.1f ms)", request.method, request.path, response.status_code, elapsed)
826
  return response
@@ -867,11 +942,12 @@ def analyze():
867
  temp_dir: str | None = None # set if a zip needed extraction
868
 
869
  for f in files:
870
- fname = f.filename.lower()
 
871
 
872
  if fname.endswith(".zip"):
873
  temp_dir = tempfile.mkdtemp(prefix="ich_zip_")
874
- zip_save = Path(temp_dir) / secure_filename(f.filename)
875
  f.save(str(zip_save))
876
  try:
877
  with zipfile.ZipFile(zip_save, "r") as zf:
@@ -884,7 +960,7 @@ def analyze():
884
  dcm_paths.extend(sorted(Path(temp_dir).rglob("*.dcm")))
885
 
886
  elif fname.endswith(".dcm"):
887
- safe = secure_filename(f.filename)
888
  save_path = UPLOAD_DIR / safe
889
  f.save(str(save_path))
890
  dcm_paths.append(save_path)
@@ -903,7 +979,7 @@ def analyze():
903
  if len(dcm_paths) == 1 and temp_dir is None:
904
  single_path = dcm_paths[0]
905
  try:
906
- report, trace = _run_inference_on_dcm(single_path)
907
  if report is None:
908
  flash("Model failed to load. Check server logs.", "error")
909
  return redirect(url_for("upload"))
 
27
  import uuid
28
  import zipfile
29
  from collections import Counter
30
+ from dataclasses import dataclass
31
  from pathlib import Path
32
  from typing import Any
33
 
 
36
  except Exception:
37
  load_dotenv = None
38
 
39
+ hf_hub_download: Any = None
40
  try:
41
+ import huggingface_hub
42
+ hf_hub_download = getattr(huggingface_hub, "hf_hub_download", None)
43
+ except Exception:
44
+ hf_hub_download = None
45
+
46
+ try:
47
+ import blackbox_recorder as bbr # type: ignore[import-untyped]
48
  except Exception:
49
  class _NoopRecorder:
50
+ def configure(self, **_kwargs: Any) -> None:
51
  return None
52
 
53
+ def start(self) -> None:
54
  return None
55
 
56
+ def stop(self) -> None:
57
  return None
58
 
59
+ def save_report(self, _path: str) -> None:
60
  return None
61
 
62
+ def save_json(self, _path: str) -> None:
63
  return None
64
 
65
  bbr = _NoopRecorder()
 
117
  load_dotenv(BASE_DIR / ".env")
118
 
119
  APP_DEBUG = _env_bool("ICH_APP_DEBUG", True)
120
+ APP_PORT = _env_int("ICH_APP_PORT", _env_int("PORT", 7860, minimum=1), minimum=1)
121
  MAX_UPLOAD_MB = _env_int("ICH_MAX_UPLOAD_MB", 2048, minimum=1)
122
  LOG_LEVEL_NAME = os.environ.get("ICH_LOG_LEVEL", "INFO").strip().upper()
123
  LOG_LEVEL = getattr(logging, LOG_LEVEL_NAME, logging.INFO)
124
  SECRET_KEY = os.environ.get("ICH_SECRET_KEY", "").strip()
125
+ HF_MODEL_REPO = os.environ.get("ICH_HF_MODEL_REPO", os.environ.get("HF_REPO_ID", "")).strip()
126
+ HF_TOKEN = os.environ.get("ICH_HF_TOKEN", os.environ.get("HF_TOKEN", "")).strip()
127
 
128
  app = Flask(__name__, template_folder="templates", static_folder="static")
129
  app.secret_key = SECRET_KEY or os.urandom(24)
 
158
  )
159
 
160
 
161
+ def _save_trace(image_id: str) -> dict[str, str | None]:
162
  """
163
  Save the current blackbox trace to logs/ and return metadata about it.
164
  Called immediately after bbr.stop().
 
217
  return batch_id
218
 
219
 
220
+ def _batch_update(batch_id: str, **kw: Any) -> None:
221
  """Thread-safe update of a batch record."""
222
  with _BATCHES_LOCK:
223
  if batch_id in _BATCHES:
 
322
  }
323
 
324
 
325
+ def _required_model_files(fold_selection: str) -> list[str]:
326
+ files = [
327
+ "calibration_params.json",
328
+ "normalization_stats.json",
329
+ ]
330
+ raw = (fold_selection or "ensemble").strip().lower()
331
+ if raw in ("", "ensemble", "all"):
332
+ files.extend([f"best_model_fold{i}.pth" for i in range(5)])
333
+ return files
334
+ if raw == "best":
335
+ files.append("best_model_fold4.pth")
336
+ return files
337
+ if raw.isdigit():
338
+ files.append(f"best_model_fold{int(raw)}.pth")
339
+ return files
340
+ # Fallback to ensemble behavior for unknown values.
341
+ files.extend([f"best_model_fold{i}.pth" for i in range(5)])
342
+ return files
343
+
344
+
345
+ def _download_runtime_artifacts_if_needed(fold_selection: str) -> bool:
346
+ required_files = _required_model_files(fold_selection)
347
+ missing = [name for name in required_files if not (MODEL_DIR / name).exists()]
348
+ if not missing:
349
+ return True
350
+
351
+ if not HF_MODEL_REPO:
352
+ logger.warning(
353
+ "Missing runtime model files (%s) and ICH_HF_MODEL_REPO/HF_REPO_ID is not set.",
354
+ ", ".join(missing),
355
+ )
356
+ return False
357
+
358
+ if hf_hub_download is None:
359
+ logger.error(
360
+ "huggingface_hub is not installed, cannot download missing model artifacts."
361
+ )
362
+ return False
363
+
364
+ MODEL_DIR.mkdir(parents=True, exist_ok=True)
365
+ logger.info("Downloading missing model artifacts from Hugging Face repo: %s", HF_MODEL_REPO)
366
+ try:
367
+ for filename in missing:
368
+ hf_hub_download(
369
+ repo_id=HF_MODEL_REPO,
370
+ filename=filename,
371
+ repo_type="model",
372
+ local_dir=str(MODEL_DIR),
373
+ token=HF_TOKEN or None,
374
+ )
375
+ logger.info("Downloaded artifact: %s", filename)
376
+ return True
377
+ except Exception as exc:
378
+ logger.error("Failed downloading model artifacts from Hugging Face: %s", exc)
379
+ return False
380
+
381
+
382
  def _ensure_model_loaded() -> bool:
383
  """Lazy-load the ML model on first inference request."""
384
  if _MODEL["loaded"]:
 
392
  device = "cuda" if torch.cuda.is_available() else "cpu"
393
  fold_selection = os.environ.get("ICH_FOLD_SELECTION", "ensemble")
394
 
395
+ _download_runtime_artifacts_if_needed(fold_selection)
396
+
397
+ if not CALIB_JSON.exists():
398
+ logger.error(
399
+ "Missing calibration file at %s. Provide local files or set ICH_HF_MODEL_REPO.",
400
+ CALIB_JSON,
401
+ )
402
+ return False
403
+
404
  with open(CALIB_JSON) as f:
405
  calib_cfg = json.load(f)
406
 
 
447
  return False
448
 
449
 
450
+ def _run_inference_on_dcm(dcm_path: Path) -> tuple[dict[str, Any] | None, dict[str, str | None] | None]:
451
  """
452
  Run inference on one .dcm file, with blackbox tracing.
453
  Returns (report_dict, trace_metadata) or (None, None) on failure.
 
502
  return report, trace_meta
503
 
504
 
505
+ def _append_to_summary_csv(image_id: str, report: dict[str, Any]) -> None:
506
  """Append one report row to the summary CSV."""
507
  pred = report["prediction"]
508
+ row: dict[str, Any] = {
509
  "image_id": image_id,
510
  "true_label": "",
511
  "screening_outcome": pred["screening_outcome"],
 
521
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
522
 
523
  with open(SUMMARY_CSV, "a", newline="", encoding="utf-8") as f:
524
+ writer = csv.DictWriter(f, fieldnames=list(row.keys()))
525
  if not file_exists:
526
  writer.writeheader()
527
  writer.writerow(row)
 
842
  return rows
843
 
844
 
845
+ def load_logs() -> list[dict[str, Any]]:
846
  """Scan the logs/ directory and return metadata for each trace."""
847
  if not LOGS_DIR.exists():
848
  return []
849
 
850
+ log_files: dict[str, dict[str, Any]] = {} # base_name -> {txt_file, json_file, ...}
851
 
852
  for path in sorted(LOGS_DIR.iterdir(), reverse=True):
853
  if not path.is_file():
 
864
  elif path.suffix == ".json":
865
  log_files.setdefault(stem, {})["json_file"] = path.name
866
 
867
+ entries: list[dict[str, Any]] = []
868
  for stem in sorted(log_files, reverse=True):
869
  info = log_files[stem]
870
  ts_raw = info.get("timestamp", "")
 
890
  # ══════════════════════════════════════════════════════════════════════════
891
 
892
  @app.before_request
893
+ def _start_timer() -> None: # pyright: ignore[reportUnusedFunction]
894
  g._start_time = time.perf_counter()
895
 
896
 
897
  @app.after_request
898
+ def _log_timing(response: Any) -> Any: # pyright: ignore[reportUnusedFunction]
899
  elapsed = (time.perf_counter() - getattr(g, "_start_time", time.perf_counter())) * 1000
900
  logger.info("%s %s -> %s (%.1f ms)", request.method, request.path, response.status_code, elapsed)
901
  return response
 
942
  temp_dir: str | None = None # set if a zip needed extraction
943
 
944
  for f in files:
945
+ filename = f.filename or ""
946
+ fname = filename.lower()
947
 
948
  if fname.endswith(".zip"):
949
  temp_dir = tempfile.mkdtemp(prefix="ich_zip_")
950
+ zip_save = Path(temp_dir) / secure_filename(filename)
951
  f.save(str(zip_save))
952
  try:
953
  with zipfile.ZipFile(zip_save, "r") as zf:
 
960
  dcm_paths.extend(sorted(Path(temp_dir).rglob("*.dcm")))
961
 
962
  elif fname.endswith(".dcm"):
963
+ safe = secure_filename(filename)
964
  save_path = UPLOAD_DIR / safe
965
  f.save(str(save_path))
966
  dcm_paths.append(save_path)
 
979
  if len(dcm_paths) == 1 and temp_dir is None:
980
  single_path = dcm_paths[0]
981
  try:
982
+ report, _trace = _run_inference_on_dcm(single_path)
983
  if report is None:
984
  flash("Model failed to load. Check server logs.", "error")
985
  return redirect(url_for("upload"))