shiowo commited on 7 days ago

Commit

63089c1

verified ·

1 Parent(s): a051bb4

Upload ProtoMorph-DINO scaffold and random head checkpoint

Browse files

Files changed (27) hide show

.env.example +8 -0
.gitattributes +5 -32
Dockerfile +22 -0
HF_UPLOAD_GUIDE.md +67 -0
LICENSE-WEIGHTS.md +15 -0
README.md +132 -172
README_RUNPOD.md +181 -0
checkpoints/config.json +17 -0
checkpoints/labels.txt +10 -0
checkpoints/protomorph_head.safetensors +3 -0
config.json +17 -0
infer.py +47 -0
labels.txt +10 -0
notebooks/ProtoMorph_DINOv3_Inference.ipynb +127 -0
pyproject.toml +8 -0
requirements-core.txt +10 -0
runpod/setup_runpod.sh +30 -0
runpod/start_jupyter.sh +19 -0
runpod/upload_to_hf.sh +22 -0
scripts/create_random_head.py +62 -0
scripts/smoke_test_head_only.py +35 -0
scripts/upload_to_hf.py +149 -0
src/protomorph/__init__.py +15 -0
src/protomorph/config.py +51 -0
src/protomorph/hf_utils.py +67 -0
src/protomorph/inference.py +97 -0
src/protomorph/model.py +420 -0

.env.example ADDED Viewed

	@@ -0,0 +1,8 @@

+# RunPod environment variables
+# Do not commit real secrets.
+hf_key=hf_your_huggingface_write_token_here
+hf_repo=shiowo/DINO-Protomorph
+# Standard names are also supported:
+# HF_TOKEN=hf_your_huggingface_write_token_here
+# HF_REPO_ID=shiowo/DINO-Protomorph

.gitattributes CHANGED Viewed

@@ -1,35 +1,8 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.safetensors filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# Optional Dockerfile. On RunPod, it is usually easier to start from a PyTorch
+# 2.4.0 / CUDA 12.4 template and run runpod/setup_runpod.sh.
+FROM nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
+ENV DEBIAN_FRONTEND=noninteractive \
+    HF_HOME=/workspace/hf_cache \
+    TRANSFORMERS_CACHE=/workspace/hf_cache \
+    PYTHONUNBUFFERED=1
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    software-properties-common git curl wget ca-certificates build-essential \
+    && add-apt-repository ppa:deadsnakes/ppa -y \
+    && apt-get update && apt-get install -y --no-install-recommends \
+    python3.11 python3.11-venv python3.11-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /workspace/protomorph_dinov3_runpod
+COPY . /workspace/protomorph_dinov3_runpod
+RUN bash runpod/setup_runpod.sh
+EXPOSE 8888
+CMD ["bash", "runpod/start_jupyter.sh"]

HF_UPLOAD_GUIDE.md ADDED Viewed

	@@ -0,0 +1,67 @@

+# Hugging Face Upload Guide
+This project is configured for the Hugging Face model repo:
+```text
+shiowo/DINO-Protomorph
+```
+## 1. Set RunPod environment variables
+In RunPod, add:
+```text
+hf_key=hf_your_huggingface_write_token_here
+hf_repo=shiowo/DINO-Protomorph
+```
+`hf_key` must be a Hugging Face token with write access to the target repo.
+The script also supports standard names:
+```text
+HF_TOKEN=hf_your_huggingface_write_token_here
+HF_REPO_ID=shiowo/DINO-Protomorph
+```
+Never commit real tokens.
+## 2. Install dependencies
+```bash
+cd /workspace/protomorph_dinov3_runpod
+bash runpod/setup_runpod.sh
+source .venv/bin/activate
+```
+## 3. Dry run
+```bash
+python scripts/upload_to_hf.py --dry-run
+```
+This checks the required files and prints the file list without uploading.
+## 4. Upload
+```bash
+python scripts/upload_to_hf.py
+```
+Or:
+```bash
+bash runpod/upload_to_hf.sh
+```
+## 5. Important notes
+This upload includes the custom ProtoMorph head checkpoint:
+```text
+checkpoints/protomorph_head.safetensors
+```
+It does not include DINOv3 backbone weights. DINOv3 is loaded separately during inference.
+The model card marks all results as **Pending** because training and benchmarking have not been completed yet.

LICENSE-WEIGHTS.md ADDED Viewed

	@@ -0,0 +1,15 @@

+# ProtoMorph Head Weights License
+The file below is licensed under the Creative Commons Attribution-ShareAlike 4.0 International License:
+```text
+checkpoints/protomorph_head.safetensors
+```
+You may use, share, and adapt these weights, including for commercial purposes, provided that you give appropriate credit and distribute adapted versions under CC BY-SA 4.0 or a compatible license.
+This license applies only to the ProtoMorph head weights released by this project.
+It does not apply to DINOv3, PyTorch, Hugging Face Transformers, third-party datasets, third-party model weights, or upstream dependencies.
+DINOv3 weights are not redistributed in this repository. Users are responsible for obtaining DINOv3 separately from its official source and complying with its license.

README.md CHANGED Viewed

@@ -9,15 +9,10 @@ tags:
   - dinov3
   - pytorch
   - safetensors
-  - architecture
-  - research
-  - untrained
   - prototype-learning
   - hard-example-mining
   - feedback-routing
   - experimental
-datasets:
-  - pending
 metrics:
   - accuracy
   - f1
@@ -25,27 +20,21 @@ metrics:
   - recall
 ---
-# DINO-Protomorph
 **Feedback-Gated Prototype Morphing for Hard-Case Image Classification**
 ProtoMorph-DINO is an experimental image classification head designed to run on top of a frozen DINOv3 vision backbone.
-The model explores a custom architecture for hard-case image classification using:
-- frozen DINOv3 patch embeddings
-- ProtoMorph prototype-style transformation blocks
-- layer memory attention
-- confidence-based hard-case routing
-- top-2 probability feedback
-- Delta-RBF hard expert refinement
-- logit fusion for difficult samples
-This repository currently contains the early project/model-card setup for ProtoMorph-DINO. Training and evaluation results are still pending.
-This repository does **not** redistribute DINOv3 weights. Users must download DINOv3 separately from its official source and comply with the upstream DINOv3 license.
-This project is an independent research implementation and is not affiliated with Meta AI, Hugging Face, or the official DINOv3 project.
 ---
@@ -81,13 +70,48 @@ Hard-case gate
 ## Model Summary
-ProtoMorph-DINO is built around the idea that not every image needs the same amount of computation.
-For easy images, the model returns the main classifier output directly.
-For difficult or ambiguous images, the model activates a feedback branch. This branch uses the top-2 predicted probabilities to modulate the DINO patch map, then sends the modified representation through a specialized Delta-RBF hard expert before fusing the logits.
-The main research goal is to test whether feedback-guided hard-case refinement can improve classification performance over a standard frozen-backbone linear or MLP head.
 ---
@@ -100,11 +124,11 @@ This model is intended for:
 - prototype learning experiments
 - frozen-backbone classifier research
 - fine-grained classification experiments
-- educational and experimental computer vision projects
 This model is **not** intended for safety-critical use.
-Do not use this model for medical, legal, financial, biometric, security-critical, or production decisions without proper validation.
 ---
@@ -115,23 +139,29 @@ Recommended repository layout:
 ```text
 .
 ├── README.md
 ├── config.json
 ├── labels.txt
-├── protomorph_head.safetensors
-└── inference/
-    ├── model.py
-    └── infer.py
 ```
-The main weight file is expected to be:
 ```text
-protomorph_head.safetensors
 ```
 This file contains only the custom ProtoMorph classification head.
-DINOv3 backbone weights are not included.
 ---
@@ -145,7 +175,7 @@ facebook/dinov3-vits16-pretrain-lvd1689m
 The backbone is used as a frozen visual feature extractor.
-For RTX 3090-class GPUs, the ViT-S/16 DINOv3 variant is recommended as a practical starting point because it keeps VRAM usage manageable while still producing strong patch embeddings.
 ---
@@ -162,159 +192,105 @@ CUDA 12.4 PyTorch wheel
 Install PyTorch:
 ```bash
-pip install torch==2.4.0 torchvision==0.19.0 --index-url https://download.pytorch.org/whl/cu124
 ```
 Install dependencies:
 ```bash
-pip install transformers safetensors pillow numpy tqdm accelerate
 ```
 ---
-## Example Usage
-```python
-import torch
-from PIL import Image
-from transformers import AutoImageProcessor, AutoModel
-from safetensors.torch import load_file
-# Replace with your local or Hugging Face repo path.
-REPO_ID = "shiowo/DINO-Protomorph"
-# DINOv3 is loaded separately.
-BACKBONE_NAME = "facebook/dinov3-vits16-pretrain-lvd1689m"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-processor = AutoImageProcessor.from_pretrained(BACKBONE_NAME)
-backbone = AutoModel.from_pretrained(
-    BACKBONE_NAME,
-    torch_dtype=torch.float16 if device == "cuda" else torch.float32,
-).to(device)
-backbone.eval()
-for p in backbone.parameters():
-    p.requires_grad = False
-# Load your ProtoMorph model class from your local code.
-# from model import ProtoMorphDINOClassifier
-#
-# model = ProtoMorphDINOClassifier(...)
-# state = load_file("protomorph_head.safetensors")
-# model.load_state_dict(state, strict=True)
-# model.to(device)
-# model.eval()
-image = Image.open("example.jpg").convert("RGB")
-inputs = processor(images=image, return_tensors="pt").to(device)
-with torch.no_grad():
-    outputs = backbone(**inputs)
-    tokens = outputs.last_hidden_state
-    # DINOv3 ViT outputs include special tokens before patch tokens.
-    # Your implementation should remove CLS/register tokens according to its config.
-    #
-    # logits = model(tokens)
-    # probs = torch.softmax(logits, dim=-1)
-    # print(probs)
-```
-For the full runnable inference script, see the associated GitHub repository.
----
-## Config Example
-```json
-{
-  "model_name": "ProtoMorph-DINO",
-  "backbone_name": "facebook/dinov3-vits16-pretrain-lvd1689m",
-  "num_classes": "pending",
-  "patch_dim": 384,
-  "hidden_dim": 512,
-  "num_prototypes": 64,
-  "memory_heads": 8,
-  "hard_gate_confidence_threshold": 0.65,
-  "hard_gate_margin_threshold": 0.15,
-  "hard_expert_weight": 0.5,
-  "dtype": "float16"
-}
 ```
----
-## Training Status
-**Status: Pending**
-This repository is being prepared before full training and evaluation. At the moment, final training runs, benchmark comparisons, and validated metrics are not available yet.
-If this repository contains an untrained or randomly initialized head, predictions are not meaningful yet.
 ---
-## Dataset
-**Dataset: Pending**
-Training dataset information will be added after the dataset selection and training split are finalized.
-Expected fields to add later:
-- dataset name
-- number of classes
-- train/validation/test split
-- preprocessing steps
-- augmentation strategy
-- label mapping
-Class labels are expected to be stored in:
-```text
-labels.txt
 ```
 ---
-## Evaluation
-**Evaluation results: Pending**
-The model has not yet been fully trained and evaluated. Metrics will be added after experiments are complete.
-| Metric | Value |
-|---|---:|
-| Accuracy | Pending |
-| F1 | Pending |
-| Precision | Pending |
-| Recall | Pending |
-Recommended baselines:
-| Baseline | Why Compare |
-|---|---|
-| DINOv3 + Linear Probe | Minimal frozen-backbone baseline |
-| DINOv3 + MLP Head | Strong simple head baseline |
-| CLIP + Linear Probe | Popular vision-language baseline |
-| ConvNeXt | Strong CNN-style baseline |
-| ViT | Standard transformer baseline |
----
-## Planned Experiments
-Planned research questions:
-- Can feedback from top-2 probabilities improve hard-case classification?
-- Can prototype-style transformations improve frozen DINO features?
-- Does hard-case routing reduce unnecessary compute?
-- Can a Delta-RBF expert improve class-boundary decisions?
-- Does memory attention help preserve useful intermediate representations?
-- Can this approach outperform a normal linear or MLP head on fine-grained datasets?
 ---
@@ -323,12 +299,12 @@ Planned research questions:
 Known limitations:
 - The architecture is experimental.
-- Training and evaluation results are currently pending.
 - The hard-case gate requires threshold tuning.
 - The Delta-RBF hard expert may overfit small datasets.
 - Inference may be slower for hard samples.
 - The model should be compared against simple baselines before claiming improvement.
-- This repo does not include DINOv3 weights.
 - The custom head may not generalize outside the dataset it was trained on.
 ---
@@ -365,8 +341,8 @@ If you use this model or build on it, please credit:
 ```text
 ProtoMorph-DINO: Feedback-Gated Prototype Morphing for Hard-Case Image Classification
-Author: YOUR_NAME
-Repository: https://huggingface.co/YOUR_USERNAME/protomorph-dino
 ```
 BibTeX:
@@ -374,9 +350,9 @@ BibTeX:
 ```bibtex
 @software{protomorph_dino_2026,
   title = {ProtoMorph-DINO: Feedback-Gated Prototype Morphing for Hard-Case Image Classification},
-  author = {YOUR_NAME},
   year = {2026},
-  url = {https://huggingface.co/YOUR_USERNAME/protomorph-dino}
 }
 ```
@@ -387,19 +363,3 @@ BibTeX:
 This is a research prototype.
 The model is provided for experimentation and educational use. It should not be used in production or high-stakes environments without independent validation, dataset auditing, robustness testing, and bias evaluation.
----
-## Project Links
-GitHub repository: coming soon
-```text
-https://github.com/shiowo/DINO-Protomorph
-```
-Hugging Face model page:
-```text
-https://huggingface.co/shiowo/DINO-Protomorph
-```

   - dinov3
   - pytorch
   - safetensors
   - prototype-learning
   - hard-example-mining
   - feedback-routing
   - experimental
 metrics:
   - accuracy
   - f1
   - recall
 ---
+# ProtoMorph-DINO
 **Feedback-Gated Prototype Morphing for Hard-Case Image Classification**
 ProtoMorph-DINO is an experimental image classification head designed to run on top of a frozen DINOv3 vision backbone.
+This model card is for the Hugging Face repository:
+```text
+shiowo/DINO-Protomorph
+```
+This repository currently contains an initial research scaffold and custom ProtoMorph head checkpoint. Evaluation results are **pending** because the repository is being created before full training and benchmarking.
+This project is independent and is not affiliated with Meta AI, Hugging Face, or the official DINOv3 project.
 ---
 ## Model Summary
+ProtoMorph-DINO explores whether a frozen foundation vision backbone can be improved with a custom hard-case refinement head.
+For easy images, the model returns the main classifier output directly. For difficult or ambiguous images, the model activates a feedback branch. The feedback branch uses the top-2 predicted probabilities to modulate the DINO patch map, sends the modified representation through a Delta-RBF hard expert, and fuses the refined logits with the main logits.
+The main research question is whether feedback-guided hard-case refinement can improve classification performance over simpler frozen-backbone heads such as a linear probe or MLP classifier.
+---
+## Current Status
+**Status: research scaffold / pre-training setup**
+The current checkpoint may be randomly initialized or only intended for smoke testing unless a later release says otherwise.
+Predictions are **not meaningful** until the ProtoMorph head is trained on a real dataset.
+---
+## Results
+**Evaluation results: Pending**
+No benchmark results are reported yet because the repository is being prepared before training and evaluation.
+| Metric | Value |
+|---|---:|
+| Accuracy | Pending |
+| F1 | Pending |
+| Precision | Pending |
+| Recall | Pending |
+| Confusion-pair improvement | Pending |
+| Hard-case routing benefit | Pending |
+Recommended future baselines:
+| Baseline | Purpose |
+|---|---|
+| DINOv3 + Linear Probe | Minimal frozen-backbone baseline |
+| DINOv3 + MLP Head | Strong simple head baseline |
+| CLIP + Linear Probe | Popular vision-language comparison |
+| ConvNeXt | Strong CNN-style baseline |
+| ViT | Standard transformer baseline |
 ---
 - prototype learning experiments
 - frozen-backbone classifier research
 - fine-grained classification experiments
+- educational computer vision experiments
 This model is **not** intended for safety-critical use.
+Do not use this model for medical, legal, financial, biometric, security-critical, or production decisions without independent validation.
 ---
 ```text
 .
 ├── README.md
+├── LICENSE-WEIGHTS.md
 ├── config.json
 ├── labels.txt
+├── checkpoints/
+│   ├── config.json
+│   ├── labels.txt
+│   └── protomorph_head.safetensors
+├── infer.py
+├── scripts/
+│   └── upload_to_hf.py
+└── src/
+    └── protomorph/
 ```
+The main weight file is:
 ```text
+checkpoints/protomorph_head.safetensors
 ```
 This file contains only the custom ProtoMorph classification head.
+DINOv3 backbone weights are **not** included in this repository.
 ---
 The backbone is used as a frozen visual feature extractor.
+For RTX 3090-class GPUs, ViT-S/16 is a practical starting point because it keeps VRAM usage manageable while still producing useful patch embeddings.
 ---
 Install PyTorch:
 ```bash
+pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
 ```
 Install dependencies:
 ```bash
+pip install -r requirements-core.txt
 ```
 ---
+## RunPod Environment Variables
+This project supports the RunPod environment variable names shown below:
+```text
+hf_key=hf_your_huggingface_write_token_here
+hf_repo=shiowo/DINO-Protomorph
 ```
+Standard Hugging Face names are also supported:
+```text
+HF_TOKEN=hf_your_huggingface_write_token_here
+HF_REPO_ID=shiowo/DINO-Protomorph
+```
+Never commit your real Hugging Face token to the repository.
 ---
+## Inference
+Run inference from the command line:
+```bash
+python infer.py \
+  --image examples/sample_image.jpg \
+  --config checkpoints/config.json \
+  --checkpoint checkpoints/protomorph_head.safetensors \
+  --labels checkpoints/labels.txt \
+  --topk 5
+```
+For smoke testing only:
+```bash
+python infer.py --image examples/sample_image.jpg --allow-random-head
 ```
+If the head is untrained, the output is only useful for checking that the pipeline runs.
 ---
+## Upload to Hugging Face from RunPod
+After setting `hf_key` and `hf_repo` in RunPod, run:
+```bash
+cd /workspace/protomorph_dinov3_runpod
+source .venv/bin/activate
+python scripts/upload_to_hf.py
+```
+Or use the helper script:
+```bash
+bash runpod/upload_to_hf.sh
+```
+Dry run before upload:
+```bash
+python scripts/upload_to_hf.py --dry-run
+```
+---
+## Config Example
+```json
+{
+  "dino_model_name": "facebook/dinov3-vits16-pretrain-lvd1689m",
+  "num_classes": 10,
+  "embed_dim": 384,
+  "patch_size": 16,
+  "proto_count": 64,
+  "memory_tokens": 16,
+  "rbf_count": 128,
+  "num_heads": 8,
+  "dropout": 0.0,
+  "hard_pmax_threshold": 0.65,
+  "hard_margin_threshold": 0.15,
+  "hard_entropy_threshold": 1.35,
+  "image_size": 512,
+  "use_bf16_autocast": true,
+  "normalize_patch_tokens": true
+}
+```
 ---
 Known limitations:
 - The architecture is experimental.
+- Evaluation results are pending.
 - The hard-case gate requires threshold tuning.
 - The Delta-RBF hard expert may overfit small datasets.
 - Inference may be slower for hard samples.
 - The model should be compared against simple baselines before claiming improvement.
+- This repository does not include DINOv3 weights.
 - The custom head may not generalize outside the dataset it was trained on.
 ---
 ```text
 ProtoMorph-DINO: Feedback-Gated Prototype Morphing for Hard-Case Image Classification
+Author: shiowo
+Repository: https://huggingface.co/shiowo/DINO-Protomorph
 ```
 BibTeX:
 ```bibtex
 @software{protomorph_dino_2026,
   title = {ProtoMorph-DINO: Feedback-Gated Prototype Morphing for Hard-Case Image Classification},
+  author = {shiowo},
   year = {2026},
+  url = {https://huggingface.co/shiowo/DINO-Protomorph}
 }
 ```
 This is a research prototype.
 The model is provided for experimentation and educational use. It should not be used in production or high-stakes environments without independent validation, dataset auditing, robustness testing, and bias evaluation.

README_RUNPOD.md ADDED Viewed

	@@ -0,0 +1,181 @@

+# ProtoMorph-DINOv3 RunPod Inference + Hugging Face Upload Template
+This is a runnable experimental inference scaffold for:
+```text
+Image
+↓
+Frozen DINOv3
+↓
+Patch map z0
+↓
+ProtoMorph block 1
+↓
+Layer Memory Attention
+↓
+ProtoMorph block 2
+↓
+Layer Memory Attention
+↓
+Main logits
+↓
+Hard-case gate
+    ├── easy: return main logits
+    └── hard:
+          feedback from top-2 probabilities
+          modulate DINO patch map
+          run Delta-RBF hard expert
+          fuse logits
+```
+## Practical GPU choice
+Default backbone: `facebook/dinov3-vits16-pretrain-lvd1689m`.
+Reason: RTX 3090 has 24 GB VRAM. ViT-S/16 gives 384-dim patch tokens, leaves room for Jupyter, batch inference, the custom hard expert, and future training experiments. You can switch to ViT-B/16 by recreating the head with `--dino-model-name facebook/dinov3-vitb16-pretrain-lvd1689m --embed-dim 768`, but start with ViT-S until the plumbing is stable.
+## Important compatibility note
+You said PyTorch 2.4.0 and CUDA 13. PyTorch 2.4.0 official wheels are for CUDA 11.8, 12.1, and 12.4. On RunPod, use the CUDA 12.4 wheel even when the host driver/toolkit is newer:
+```bash
+pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
+```
+## RunPod setup
+Recommended RunPod template:
+- RTX 3090
+- Python 3.11
+- PyTorch 2.4.0 if available, otherwise a clean CUDA 12.4 Ubuntu image
+- Persistent volume mounted at `/workspace`
+```bash
+cd /workspace
+git clone <your-repo-or-upload-this-folder> protomorph_dinov3_runpod
+cd /workspace/protomorph_dinov3_runpod
+bash runpod/setup_runpod.sh
+bash runpod/start_jupyter.sh
+```
+Open Jupyter on port `8888`. Default token: `protomorph`, or set `JUPYTER_TOKEN`.
+## Hugging Face access and upload env
+This package is configured for your Hugging Face model repo:
+```text
+https://huggingface.co/shiowo/DINO-Protomorph
+```
+In RunPod, you can use the environment variable names you added:
+```text
+hf_key=hf_your_huggingface_write_token_here
+hf_repo=shiowo/DINO-Protomorph
+```
+The code also supports standard Hugging Face names:
+```text
+HF_TOKEN=hf_your_huggingface_write_token_here
+HF_REPO_ID=shiowo/DINO-Protomorph
+```
+`hf_key` is never printed by the scripts. Do not commit real tokens.
+DINOv3 checkpoints may require accepting the model/license on Hugging Face before the frozen backbone can be downloaded. The inference code automatically passes `hf_key`/`HF_TOKEN` to `transformers`.
+## Create the initial safetensors head
+The setup script creates a random custom head only if the checkpoint bundle does not already exist:
+```bash
+python scripts/create_random_head.py --num-classes 10 --out-dir checkpoints
+```
+This writes:
+- `checkpoints/config.json`
+- `checkpoints/protomorph_head.safetensors`
+- `checkpoints/labels.txt`
+The random head is only for smoke tests. Train the head before trusting predictions.
+To intentionally overwrite it:
+```bash
+python scripts/create_random_head.py --num-classes 10 --out-dir checkpoints --force
+```
+## CLI inference
+```bash
+source .venv/bin/activate
+python infer.py \
+  --image /workspace/my_image.jpg \
+  --config checkpoints/config.json \
+  --checkpoint checkpoints/protomorph_head.safetensors \
+  --labels checkpoints/labels.txt \
+  --topk 5
+```
+For plumbing tests without a trained checkpoint:
+```bash
+python infer.py --image /workspace/my_image.jpg --allow-random-head
+```
+## Switch to DINOv3 ViT-B/16
+```bash
+python scripts/create_random_head.py \
+  --dino-model-name facebook/dinov3-vitb16-pretrain-lvd1689m \
+  --embed-dim 768 \
+  --num-classes 10 \
+  --out-dir checkpoints_vitb
+```
+Then use `--config checkpoints_vitb/config.json --checkpoint checkpoints_vitb/protomorph_head.safetensors`.
+## Upload to Hugging Face
+After setting `hf_key` and `hf_repo` in RunPod, upload the model card, config, labels, custom head checkpoint, and related inference files with:
+```bash
+source .venv/bin/activate
+python scripts/upload_to_hf.py --dry-run
+python scripts/upload_to_hf.py
+```
+Or use the helper script:
+```bash
+bash runpod/upload_to_hf.sh
+```
+The target repo defaults to:
+```text
+shiowo/DINO-Protomorph
+```
+The upload includes `checkpoints/protomorph_head.safetensors`, but it does not include DINOv3 backbone weights. DINOv3 is loaded separately from Hugging Face during inference.
+## What is actually saved in safetensors?
+The `.safetensors` file stores the custom ProtoMorph head only. DINOv3 remains frozen and is loaded from Hugging Face cache. This keeps the experiment checkpoint small and avoids duplicating the foundation model weights.
+## Files
+- `src/protomorph/model.py`: architecture implementation
+- `src/protomorph/config.py`: config dataclass
+- `src/protomorph/inference.py`: image loading and prediction helpers
+- `infer.py`: CLI inference
+- `scripts/create_random_head.py`: initialize config + safetensors
+- `scripts/smoke_test_head_only.py`: tests custom head without downloading DINOv3
+- `scripts/upload_to_hf.py`: uploads model card/checkpoint/source files to Hugging Face
+- `runpod/upload_to_hf.sh`: RunPod helper for `hf_key` and `hf_repo` env variables
+- `notebooks/ProtoMorph_DINOv3_Inference.ipynb`: Jupyter inference notebook

checkpoints/config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "dino_model_name": "facebook/dinov3-vits16-pretrain-lvd1689m",
+  "num_classes": 10,
+  "embed_dim": 384,
+  "patch_size": 16,
+  "proto_count": 64,
+  "memory_tokens": 16,
+  "rbf_count": 128,
+  "num_heads": 8,
+  "dropout": 0.0,
+  "hard_pmax_threshold": 0.65,
+  "hard_margin_threshold": 0.15,
+  "hard_entropy_threshold": 1.35,
+  "image_size": 512,
+  "use_bf16_autocast": true,
+  "normalize_patch_tokens": true
+}

checkpoints/labels.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+class_0
+class_1
+class_2
+class_3
+class_4
+class_5
+class_6
+class_7
+class_8
+class_9

checkpoints/protomorph_head.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ab309078e2db41027fe0148415bb3e2e8e3e6059e0dd89633fd3476d0b72bebe
+size 32451516

config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "dino_model_name": "facebook/dinov3-vits16-pretrain-lvd1689m",
+  "num_classes": 10,
+  "embed_dim": 384,
+  "patch_size": 16,
+  "proto_count": 64,
+  "memory_tokens": 16,
+  "rbf_count": 128,
+  "num_heads": 8,
+  "dropout": 0.0,
+  "hard_pmax_threshold": 0.65,
+  "hard_margin_threshold": 0.15,
+  "hard_entropy_threshold": 1.35,
+  "image_size": 512,
+  "use_bf16_autocast": true,
+  "normalize_patch_tokens": true
+}

infer.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from __future__ import annotations
+import argparse
+import json
+from pathlib import Path
+import torch
+from src.protomorph.inference import build_model, load_labels, predict_paths
+def parse_args() -> argparse.Namespace:
+    ap = argparse.ArgumentParser(description="ProtoMorph-DINOv3 inference CLI")
+    ap.add_argument("--config", default="checkpoints/config.json")
+    ap.add_argument("--checkpoint", default="checkpoints/protomorph_head.safetensors")
+    ap.add_argument("--labels", default=None, help="txt/json labels. Defaults to class_0..class_N")
+    ap.add_argument("--image", action="append", required=True, help="Image path. Repeat for batch inference.")
+    ap.add_argument("--topk", type=int, default=5)
+    ap.add_argument("--device", default="cuda")
+    ap.add_argument("--force-hard", action="store_true", help="Always run/fuse hard expert branch.")
+    ap.add_argument("--local-files-only", action="store_true")
+    ap.add_argument("--allow-random-head", action="store_true", help="Smoke test only; logits are random.")
+    ap.add_argument("--output", default=None, help="Optional JSON output path")
+    return ap.parse_args()
+def main() -> None:
+    args = parse_args()
+    device = args.device if torch.cuda.is_available() or args.device == "cpu" else "cpu"
+    model = build_model(
+        args.config,
+        args.checkpoint,
+        device=device,
+        local_files_only=args.local_files_only,
+        allow_random_head=args.allow_random_head,
+    )
+    labels = load_labels(args.labels, model.cfg.num_classes)
+    results = predict_paths(model, args.image, labels, topk=args.topk, device=device, force_hard=args.force_hard)
+    text = json.dumps(results, indent=2)
+    print(text)
+    if args.output:
+        Path(args.output).parent.mkdir(parents=True, exist_ok=True)
+        Path(args.output).write_text(text + "\n")
+if __name__ == "__main__":
+    main()

labels.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+class_0
+class_1
+class_2
+class_3
+class_4
+class_5
+class_6
+class_7
+class_8
+class_9

notebooks/ProtoMorph_DINOv3_Inference.ipynb ADDED Viewed

	@@ -0,0 +1,127 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# ProtoMorph-DINOv3 Inference Notebook\n",
+        "\n",
+        "This notebook loads a frozen DINOv3 backbone, the custom ProtoMorph head from `safetensors`, and runs the hard-case gated inference path.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "import os, sys, json, torch\n",
+        "from pathlib import Path\n",
+        "ROOT = Path('/workspace/protomorph_dinov3_runpod') if Path('/workspace/protomorph_dinov3_runpod').exists() else Path.cwd().parent\n",
+        "os.chdir(ROOT)\n",
+        "sys.path.insert(0, str(ROOT))\n",
+        "print('cwd:', ROOT)\n",
+        "print('torch:', torch.__version__)\n",
+        "print('cuda available:', torch.cuda.is_available())\n",
+        "if torch.cuda.is_available():\n",
+        "    print('gpu:', torch.cuda.get_device_name(0))\n",
+        "    print('torch cuda runtime:', torch.version.cuda)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Create a random head checkpoint for smoke testing\n",
+        "Run this once. Random logits are not meaningful; this just proves the pipeline works.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "!python scripts/create_random_head.py --num-classes 10 --out-dir checkpoints\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from src.protomorph.inference import build_model, load_labels, predict_paths\n",
+        "CONFIG = 'checkpoints/config.json'\n",
+        "CKPT = 'checkpoints/protomorph_head.safetensors'\n",
+        "LABELS = 'checkpoints/labels.txt'\n",
+        "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n",
+        "labels = load_labels(LABELS, num_classes=json.load(open(CONFIG))['num_classes'])\n",
+        "model = build_model(CONFIG, CKPT, device=device, allow_random_head=False)\n",
+        "print('loaded model on', device)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Run inference\n",
+        "Set `IMAGE_PATH` to an image on your RunPod volume.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "from PIL import Image\n",
+        "import matplotlib.pyplot as plt\n",
+        "IMAGE_PATH = '/workspace/my_image.jpg'  # change this\n",
+        "img = Image.open(IMAGE_PATH).convert('RGB')\n",
+        "plt.imshow(img); plt.axis('off');\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "results = predict_paths(model, [IMAGE_PATH], labels, topk=5, device=device)\n",
+        "print(json.dumps(results[0], indent=2))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Force hard branch for debugging\n",
+        "This runs the feedback + Delta-RBF expert even if the gate says the image is easy.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "debug_results = predict_paths(model, [IMAGE_PATH], labels, topk=5, device=device, force_hard=True)\n",
+        "print(json.dumps(debug_results[0], indent=2))\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.11"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}

pyproject.toml ADDED Viewed

	@@ -0,0 +1,8 @@

+[project]
+name = "protomorph-dinov3-runpod"
+version = "0.1.0"
+description = "Experimental ProtoMorph + frozen DINOv3 hard-case inference scaffold"
+requires-python = ">=3.11"
+[tool.setuptools]
+packages = ["src.protomorph"]

requirements-core.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+transformers>=4.56.0,<5
+safetensors>=0.4.5
+accelerate>=0.33.0
+pillow>=10.0.0
+numpy>=1.26.0
+tqdm>=4.66.0
+jupyterlab>=4.2.0
+ipywidgets>=8.1.0
+matplotlib>=3.8.0
+huggingface_hub>=0.24.0

runpod/setup_runpod.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/usr/bin/env bash
+set -euo pipefail
+PROJECT_DIR="${PROJECT_DIR:-/workspace/protomorph_dinov3_runpod}"
+cd "$PROJECT_DIR"
+python3.11 -m venv .venv
+source .venv/bin/activate
+python -m pip install --upgrade pip wheel setuptools
+# PyTorch 2.4.0 does not have official CUDA 13 wheels. Use cu124 on RunPod/RTX 3090.
+pip install torch==2.4.0 torchvision==0.19.0 torchaudio==2.4.0 --index-url https://download.pytorch.org/whl/cu124
+pip install -r requirements-core.txt
+# Map RunPod variable names to standard Hugging Face names for download/upload tools.
+if [[ -n "${hf_key:-}" && -z "${HF_TOKEN:-}" ]]; then
+  export HF_TOKEN="$hf_key"
+fi
+if [[ -n "${hf_repo:-}" && -z "${HF_REPO_ID:-}" ]]; then
+  export HF_REPO_ID="$hf_repo"
+fi
+mkdir -p "${HF_HOME:-/workspace/hf_cache}"
+python scripts/create_random_head.py --num-classes 10 --out-dir checkpoints
+python scripts/smoke_test_head_only.py
+echo
+echo "Setup complete."
+echo "To start Jupyter: bash runpod/start_jupyter.sh"
+echo "To upload to Hugging Face: bash runpod/upload_to_hf.sh"

runpod/start_jupyter.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/usr/bin/env bash
+set -euo pipefail
+PROJECT_DIR="${PROJECT_DIR:-/workspace/protomorph_dinov3_runpod}"
+cd "$PROJECT_DIR"
+source .venv/bin/activate
+if [[ -n "${hf_key:-}" && -z "${HF_TOKEN:-}" ]]; then
+  export HF_TOKEN="$hf_key"
+fi
+if [[ -n "${hf_repo:-}" && -z "${HF_REPO_ID:-}" ]]; then
+  export HF_REPO_ID="$hf_repo"
+fi
+export HF_HOME="${HF_HOME:-/workspace/hf_cache}"
+export JUPYTER_TOKEN="${JUPYTER_TOKEN:-protomorph}"
+echo "Jupyter token: $JUPYTER_TOKEN"
+jupyter lab --ip=0.0.0.0 --port=8888 --no-browser --allow-root --NotebookApp.token="$JUPYTER_TOKEN"

runpod/upload_to_hf.sh ADDED Viewed

	@@ -0,0 +1,22 @@

+#!/usr/bin/env bash
+set -euo pipefail
+PROJECT_DIR="${PROJECT_DIR:-/workspace/protomorph_dinov3_runpod}"
+cd "$PROJECT_DIR"
+if [[ -d .venv ]]; then
+  source .venv/bin/activate
+fi
+# Map the user's RunPod env names to Hugging Face standard names for tools
+# that only look for HF_TOKEN/HF_REPO_ID.
+if [[ -n "${hf_key:-}" && -z "${HF_TOKEN:-}" ]]; then
+  export HF_TOKEN="$hf_key"
+fi
+if [[ -n "${hf_repo:-}" && -z "${HF_REPO_ID:-}" ]]; then
+  export HF_REPO_ID="$hf_repo"
+fi
+python scripts/upload_to_hf.py \
+  --repo-id "${HF_REPO_ID:-${hf_repo:-shiowo/DINO-Protomorph}}" \
+  --commit-message "${HF_COMMIT_MESSAGE:-Upload ProtoMorph-DINO scaffold and checkpoint}"

scripts/create_random_head.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+def parse_args() -> argparse.Namespace:
+    ap = argparse.ArgumentParser(description="Create initial ProtoMorph custom-head safetensors checkpoint")
+    ap.add_argument("--out-dir", default="checkpoints")
+    ap.add_argument("--dino-model-name", default="facebook/dinov3-vits16-pretrain-lvd1689m")
+    ap.add_argument("--num-classes", type=int, default=10)
+    ap.add_argument("--embed-dim", type=int, default=None)
+    ap.add_argument("--image-size", type=int, default=512)
+    ap.add_argument("--proto-count", type=int, default=64)
+    ap.add_argument("--memory-tokens", type=int, default=16)
+    ap.add_argument("--rbf-count", type=int, default=128)
+    ap.add_argument("--num-heads", type=int, default=8)
+    ap.add_argument("--force", action="store_true", help="Overwrite existing config/checkpoint/labels")
+    return ap.parse_args()
+def main() -> None:
+    args = parse_args()
+    out_dir = Path(args.out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    cfg_path = out_dir / "config.json"
+    ckpt_path = out_dir / "protomorph_head.safetensors"
+    labels_path = out_dir / "labels.txt"
+    if not args.force and cfg_path.exists() and ckpt_path.exists() and labels_path.exists():
+        print(f"Existing checkpoint bundle found in {out_dir}; not overwriting. Pass --force to recreate it.")
+        return
+    from safetensors.torch import save_file
+    from src.protomorph.config import ProtoMorphConfig
+    from src.protomorph.model import ProtoMorphHead, infer_embed_dim_from_model_name
+    embed_dim = args.embed_dim or infer_embed_dim_from_model_name(args.dino_model_name)
+    cfg = ProtoMorphConfig(
+        dino_model_name=args.dino_model_name,
+        num_classes=args.num_classes,
+        embed_dim=embed_dim,
+        image_size=args.image_size,
+        proto_count=args.proto_count,
+        memory_tokens=args.memory_tokens,
+        rbf_count=args.rbf_count,
+        num_heads=args.num_heads,
+    )
+    head = ProtoMorphHead(cfg)
+    cfg.to_json(cfg_path)
+    save_file(head.state_dict(), str(ckpt_path))
+    labels_path.write_text("\n".join([f"class_{i}" for i in range(args.num_classes)]) + "\n")
+    print(f"Wrote {cfg_path}")
+    print(f"Wrote {ckpt_path}")
+    print("Important: this is a random head for plumbing/smoke tests. Train it before real predictions.")
+if __name__ == "__main__":
+    main()

scripts/smoke_test_head_only.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from __future__ import annotations
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
+import torch
+from src.protomorph.config import ProtoMorphConfig
+from src.protomorph.model import ProtoMorphHead
+def main() -> None:
+    torch.set_num_threads(1)
+    cfg = ProtoMorphConfig(
+        num_classes=7,
+        embed_dim=32,
+        proto_count=8,
+        memory_tokens=4,
+        rbf_count=16,
+        num_heads=4,
+    )
+    head = ProtoMorphHead(cfg).eval()
+    cls = torch.randn(2, cfg.embed_dim)
+    patches = torch.randn(2, 8 * 8, cfg.embed_dim)
+    with torch.no_grad():
+        out = head(cls, patches)
+    assert out["logits"].shape == (2, cfg.num_classes)
+    assert out["hard_mask"].shape == (2,)
+    print("OK head-only smoke test", out["logits"].shape, out["hard_mask"].tolist())
+if __name__ == "__main__":
+    main()

scripts/upload_to_hf.py ADDED Viewed

	@@ -0,0 +1,149 @@

+#!/usr/bin/env python3
+from __future__ import annotations
+import argparse
+import os
+import sys
+from pathlib import Path
+from typing import Iterable, List
+# Allow running from the repo root without installing the package.
+ROOT = Path(__file__).resolve().parents[1]
+if str(ROOT) not in sys.path:
+    sys.path.insert(0, str(ROOT))
+from src.protomorph.hf_utils import get_hf_repo_id, get_hf_token, normalize_repo_id
+DEFAULT_REPO = "shiowo/DINO-Protomorph"
+REQUIRED_FILES = [
+    "README.md",
+    "checkpoints/config.json",
+    "checkpoints/labels.txt",
+    "checkpoints/protomorph_head.safetensors",
+    "src/protomorph/model.py",
+    "src/protomorph/config.py",
+    "infer.py",
+]
+IGNORE_PATTERNS = [
+    ".git/*",
+    ".venv/*",
+    "venv/*",
+    "env/*",
+    "__pycache__/*",
+    "**/__pycache__/*",
+    "*.pyc",
+    ".ipynb_checkpoints/*",
+    "**/.ipynb_checkpoints/*",
+    ".cache/*",
+    "hf_cache/*",
+    "outputs/*",
+    "wandb/*",
+    "data/*",
+    "datasets/*",
+    "*.zip",
+    "*.tar",
+    "*.tar.gz",
+    "*.7z",
+]
+def human_size(n: int) -> str:
+    units = ["B", "KB", "MB", "GB", "TB"]
+    size = float(n)
+    for unit in units:
+        if size < 1024 or unit == units[-1]:
+            return f"{size:.1f} {unit}" if unit != "B" else f"{int(size)} B"
+        size /= 1024
+    return f"{n} B"
+def iter_upload_files(source: Path, ignore_dirs: Iterable[str]) -> List[Path]:
+    ignore_dir_names = set(ignore_dirs)
+    files: List[Path] = []
+    for path in source.rglob("*"):
+        if path.is_dir():
+            continue
+        rel = path.relative_to(source)
+        parts = set(rel.parts)
+        if parts & ignore_dir_names:
+            continue
+        if path.suffix in {".pyc", ".zip", ".7z"}:
+            continue
+        files.append(rel)
+    return sorted(files)
+def check_required(source: Path) -> None:
+    missing = [rel for rel in REQUIRED_FILES if not (source / rel).exists()]
+    if missing:
+        joined = "\n  - ".join(missing)
+        raise FileNotFoundError(f"Missing required files for HF upload:\n  - {joined}")
+def parse_args() -> argparse.Namespace:
+    p = argparse.ArgumentParser(description="Upload ProtoMorph-DINO files to a Hugging Face model repo.")
+    p.add_argument("--source", default=".", help="Folder to upload. Default: current project root.")
+    p.add_argument("--repo-id", default=None, help="HF repo id or URL. Default: env hf_repo/HF_REPO_ID, then shiowo/DINO-Protomorph.")
+    p.add_argument("--token", default=None, help="HF token. Default: env hf_key/HF_TOKEN/etc. Do not paste this into logs.")
+    p.add_argument("--revision", default="main", help="Target branch/revision. Default: main.")
+    p.add_argument("--private", action="store_true", help="Create repo as private if it does not exist yet.")
+    p.add_argument("--no-create", action="store_true", help="Do not create the repo if missing.")
+    p.add_argument("--dry-run", action="store_true", help="Print what would be uploaded, then exit.")
+    p.add_argument("--commit-message", default="Upload ProtoMorph-DINO scaffold and checkpoint", help="HF commit message.")
+    return p.parse_args()
+def main() -> None:
+    args = parse_args()
+    source = Path(args.source).resolve()
+    if not source.exists() or not source.is_dir():
+        raise NotADirectoryError(f"Source folder does not exist: {source}")
+    repo_id = normalize_repo_id(args.repo_id) if args.repo_id else get_hf_repo_id(DEFAULT_REPO)
+    token = args.token or get_hf_token()
+    check_required(source)
+    files = iter_upload_files(source, ignore_dirs={".git", ".venv", "venv", "env", "__pycache__", ".ipynb_checkpoints", ".cache", "hf_cache", "outputs", "wandb", "data", "datasets"})
+    total_bytes = sum((source / f).stat().st_size for f in files)
+    print(f"HF repo: {repo_id}")
+    print(f"Source:  {source}")
+    print(f"Files:   {len(files)} files, {human_size(total_bytes)}")
+    print("Token:   " + ("found" if token else "missing"))
+    if args.dry_run:
+        print("\nDry run file list:")
+        for rel in files:
+            print(f"  {rel}")
+        print("\nNo upload performed.")
+        return
+    if not token:
+        raise RuntimeError(
+            "No Hugging Face token found. In RunPod environment variables, set `hf_key=hf_xxx`, "
+            "or set standard `HF_TOKEN=hf_xxx`."
+        )
+    try:
+        from huggingface_hub import HfApi
+    except ImportError as e:
+        raise ImportError("Install huggingface_hub first: pip install huggingface_hub") from e
+    api = HfApi(token=token)
+    if not args.no_create:
+        api.create_repo(repo_id=repo_id, repo_type="model", private=args.private, exist_ok=True)
+    api.upload_folder(
+        folder_path=str(source),
+        repo_id=repo_id,
+        repo_type="model",
+        revision=args.revision,
+        commit_message=args.commit_message,
+        ignore_patterns=IGNORE_PATTERNS,
+    )
+    print(f"\nUpload complete: https://huggingface.co/{repo_id}")
+if __name__ == "__main__":
+    main()

src/protomorph/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from .config import ProtoMorphConfig
+from .model import ProtoMorphDINOv3, ProtoMorphHead, infer_embed_dim_from_model_name
+from .inference import build_model, predict_paths, load_labels
+__all__ = [
+    "ProtoMorphConfig",
+    "ProtoMorphDINOv3",
+    "ProtoMorphHead",
+    "infer_embed_dim_from_model_name",
+    "build_model",
+    "predict_paths",
+    "load_labels",
+]
+from .hf_utils import get_hf_token, get_hf_repo_id, normalize_repo_id

src/protomorph/config.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from __future__ import annotations
+from dataclasses import asdict, dataclass
+import json
+from pathlib import Path
+from typing import Any, Dict
+@dataclass
+class ProtoMorphConfig:
+    """Configuration for the custom ProtoMorph head.
+    The frozen DINOv3 backbone is loaded separately from Hugging Face. The
+    safetensors checkpoint stores only the trainable experimental head, which is
+    what you will train/tune for your dataset.
+    """
+    dino_model_name: str = "facebook/dinov3-vits16-pretrain-lvd1689m"
+    num_classes: int = 10
+    embed_dim: int = 384
+    patch_size: int = 16
+    # ProtoMorph blocks
+    proto_count: int = 64
+    memory_tokens: int = 16
+    rbf_count: int = 128
+    num_heads: int = 8
+    dropout: float = 0.0
+    # Hard-case gate thresholds
+    hard_pmax_threshold: float = 0.65
+    hard_margin_threshold: float = 0.15
+    hard_entropy_threshold: float = 1.35
+    # Inference / performance knobs
+    image_size: int = 512
+    use_bf16_autocast: bool = True
+    normalize_patch_tokens: bool = True
+    @classmethod
+    def from_json(cls, path: str | Path) -> "ProtoMorphConfig":
+        data = json.loads(Path(path).read_text())
+        return cls(**data)
+    def to_dict(self) -> Dict[str, Any]:
+        return asdict(self)
+    def to_json(self, path: str | Path) -> None:
+        p = Path(path)
+        p.parent.mkdir(parents=True, exist_ok=True)
+        p.write_text(json.dumps(self.to_dict(), indent=2) + "\n")

src/protomorph/hf_utils.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from __future__ import annotations
+import os
+from typing import Iterable, Optional
+HF_TOKEN_ENV_NAMES = (
+    "hf_key",                    # RunPod env name used by this project
+    "HF_TOKEN",                  # Hugging Face standard
+    "HUGGINGFACE_HUB_TOKEN",
+    "HUGGING_FACE_HUB_TOKEN",
+    "HUGGINGFACE_TOKEN",
+    "HF_API_TOKEN",
+)
+HF_REPO_ENV_NAMES = (
+    "hf_repo",                   # RunPod env name used by this project
+    "HF_REPO",
+    "HF_REPO_ID",
+    "HUGGINGFACE_REPO",
+    "HUGGINGFACE_REPO_ID",
+)
+def first_env(names: Iterable[str]) -> Optional[str]:
+    """Return the first non-empty environment variable value from names."""
+    for name in names:
+        value = os.environ.get(name)
+        if value and value.strip():
+            return value.strip()
+    return None
+def get_hf_token() -> Optional[str]:
+    """Read a Hugging Face token from common env names.
+    RunPod users can set `hf_key=hf_...`. This helper maps that to the token
+    argument used by `transformers` and `huggingface_hub` without printing it.
+    """
+    return first_env(HF_TOKEN_ENV_NAMES)
+def normalize_repo_id(repo_id_or_url: str) -> str:
+    """Accept `shiowo/DINO-Protomorph` or full HF URLs and return a repo_id."""
+    value = repo_id_or_url.strip()
+    prefixes = (
+        "https://huggingface.co/",
+        "http://huggingface.co/",
+        "huggingface.co/",
+    )
+    for prefix in prefixes:
+        if value.startswith(prefix):
+            value = value[len(prefix):]
+            break
+    value = value.strip("/")
+    if value.startswith("models/"):
+        value = value[len("models/"):]
+    if "/tree/" in value:
+        value = value.split("/tree/", 1)[0]
+    if "/blob/" in value:
+        value = value.split("/blob/", 1)[0]
+    return value
+def get_hf_repo_id(default: Optional[str] = None) -> Optional[str]:
+    value = first_env(HF_REPO_ENV_NAMES) or default
+    return normalize_repo_id(value) if value else None

src/protomorph/inference.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Dict, List, Optional, Sequence
+import torch
+from PIL import Image
+from .config import ProtoMorphConfig
+from .model import ProtoMorphDINOv3
+def load_image(path: str | Path) -> Image.Image:
+    return Image.open(path).convert("RGB")
+def load_labels(path: Optional[str | Path], num_classes: int) -> List[str]:
+    if path is None:
+        return [f"class_{i}" for i in range(num_classes)]
+    p = Path(path)
+    if p.suffix.lower() == ".json":
+        data = json.loads(p.read_text())
+        if isinstance(data, dict):
+            return [data.get(str(i), data.get(i, f"class_{i}")) for i in range(num_classes)]
+        return list(data)
+    labels = [line.strip() for line in p.read_text().splitlines() if line.strip()]
+    if len(labels) < num_classes:
+        labels += [f"class_{i}" for i in range(len(labels), num_classes)]
+    return labels[:num_classes]
+def build_model(
+    config_path: str | Path,
+    checkpoint_path: Optional[str | Path],
+    device: str = "cuda",
+    local_files_only: bool = False,
+    allow_random_head: bool = False,
+) -> ProtoMorphDINOv3:
+    cfg = ProtoMorphConfig.from_json(config_path)
+    device_obj = torch.device(device if torch.cuda.is_available() or device == "cpu" else "cpu")
+    model = ProtoMorphDINOv3(cfg, local_files_only=local_files_only).to(device_obj).eval()
+    if checkpoint_path is not None and Path(checkpoint_path).exists():
+        model.load_custom_head(checkpoint_path)
+    elif not allow_random_head:
+        raise FileNotFoundError(
+            f"Missing custom-head checkpoint: {checkpoint_path}. "
+            "Pass --allow-random-head only for smoke tests; random logits are not meaningful."
+        )
+    return model
+@torch.no_grad()
+def predict_paths(
+    model: ProtoMorphDINOv3,
+    image_paths: Sequence[str | Path],
+    labels: List[str],
+    topk: int = 5,
+    device: str = "cuda",
+    force_hard: bool = False,
+) -> List[Dict]:
+    images = [load_image(p) for p in image_paths]
+    out = model(images, device=device, force_hard=force_hard)
+    probs = out["logits"].softmax(dim=-1).float().cpu()
+    main_probs = out["main_logits"].softmax(dim=-1).float().cpu()
+    hard_mask = out["hard_mask"].cpu().tolist()
+    gate_pmax = out["gate_pmax"].float().cpu().tolist()
+    gate_margin = out["gate_margin"].float().cpu().tolist()
+    gate_entropy = out["gate_entropy"].float().cpu().tolist()
+    results: List[Dict] = []
+    for i, path in enumerate(image_paths):
+        k = min(topk, probs.shape[-1])
+        values, indices = probs[i].topk(k)
+        main_values, main_indices = main_probs[i].topk(k)
+        results.append(
+            {
+                "image": str(path),
+                "hard_case": bool(hard_mask[i]),
+                "gate": {
+                    "pmax": float(gate_pmax[i]),
+                    "margin": float(gate_margin[i]),
+                    "entropy": float(gate_entropy[i]),
+                },
+                "topk": [
+                    {"rank": r + 1, "class_id": int(idx), "label": labels[int(idx)], "prob": float(val)}
+                    for r, (idx, val) in enumerate(zip(indices.tolist(), values.tolist()))
+                ],
+                "main_topk": [
+                    {"rank": r + 1, "class_id": int(idx), "label": labels[int(idx)], "prob": float(val)}
+                    for r, (idx, val) in enumerate(zip(main_indices.tolist(), main_values.tolist()))
+                ],
+                "patch_hw": out["patch_hw"],
+                "pixel_hw": out["pixel_hw"],
+            }
+        )
+    return results

src/protomorph/model.py ADDED Viewed

	@@ -0,0 +1,420 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+import torch
+from torch import Tensor, nn
+import torch.nn.functional as F
+from PIL import Image
+try:
+    from safetensors.torch import load_file as safe_load_file
+    from safetensors.torch import save_file as safe_save_file
+except Exception:  # pragma: no cover - handled at runtime with better error.
+    safe_load_file = None
+    safe_save_file = None
+try:
+    from transformers import AutoImageProcessor, AutoModel
+except Exception:  # pragma: no cover - handled at runtime with better error.
+    AutoImageProcessor = None
+    AutoModel = None
+from .config import ProtoMorphConfig
+from .hf_utils import get_hf_token
+@dataclass
+class DinoFeatures:
+    cls: Tensor
+    registers: Optional[Tensor]
+    patches: Tensor
+    patch_hw: Tuple[int, int]
+    pixel_hw: Tuple[int, int]
+class FrozenDINOv3(nn.Module):
+    """Hugging Face DINOv3 wrapper that returns CLS/register/patch tokens.
+    DINOv3 is kept frozen. Use torch.autocast during forward for memory savings
+    on RTX 3090; the custom head remains regular PyTorch modules.
+    """
+    def __init__(self, model_name: str, image_size: int = 512, local_files_only: bool = False):
+        super().__init__()
+        if AutoImageProcessor is None or AutoModel is None:
+            raise ImportError(
+                "transformers is required. Install transformers>=4.56.0 before loading DINOv3."
+            )
+        self.model_name = model_name
+        self.image_size = image_size
+        hf_token = get_hf_token()
+        hf_kwargs = {"local_files_only": local_files_only}
+        if hf_token:
+            # Supports RunPod env variable `hf_key` as well as standard HF_TOKEN.
+            hf_kwargs["token"] = hf_token
+        self.processor = AutoImageProcessor.from_pretrained(model_name, **hf_kwargs)
+        self.model = AutoModel.from_pretrained(model_name, **hf_kwargs)
+        self.model.eval().requires_grad_(False)
+        config = self.model.config
+        self.patch_size = int(getattr(config, "patch_size", 16))
+        self.hidden_size = int(getattr(config, "hidden_size", 0))
+        self.num_register_tokens = int(getattr(config, "num_register_tokens", 0))
+    def _prepare_images(self, images: Image.Image | Sequence[Image.Image]) -> Dict[str, Tensor]:
+        if isinstance(images, Image.Image):
+            images = [images]
+        # HF processors support overriding target size at call time for ViT-like image processors.
+        # We request a square size that is divisible by patch_size for clean patch grids.
+        size = {"height": self.image_size, "width": self.image_size}
+        return self.processor(images=list(images), return_tensors="pt", size=size)
+    @torch.no_grad()
+    def forward(self, images: Image.Image | Sequence[Image.Image], device: torch.device | str) -> DinoFeatures:
+        inputs = self._prepare_images(images)
+        pixel_values = inputs["pixel_values"].to(device, non_blocking=True)
+        outputs = self.model(pixel_values=pixel_values)
+        tokens = outputs.last_hidden_state
+        cls = tokens[:, 0]
+        reg_start = 1
+        reg_end = 1 + self.num_register_tokens
+        registers = tokens[:, reg_start:reg_end] if self.num_register_tokens > 0 else None
+        patches = tokens[:, reg_end:]
+        h, w = pixel_values.shape[-2:]
+        ph, pw = h // self.patch_size, w // self.patch_size
+        expected = ph * pw
+        if patches.shape[1] != expected:
+            # Fallback for processors/checkpoints that return a non-square crop or resize.
+            # This keeps inference running and makes the mismatch visible to the caller.
+            side = int(patches.shape[1] ** 0.5)
+            if side * side == patches.shape[1]:
+                ph, pw = side, side
+            else:
+                ph, pw = patches.shape[1], 1
+        return DinoFeatures(cls=cls, registers=registers, patches=patches, patch_hw=(ph, pw), pixel_hw=(h, w))
+class FeedForward(nn.Module):
+    def __init__(self, dim: int, expansion: int = 4, dropout: float = 0.0):
+        super().__init__()
+        hidden = dim * expansion
+        self.net = nn.Sequential(
+            nn.Linear(dim, hidden),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden, dim),
+            nn.Dropout(dropout),
+        )
+    def forward(self, x: Tensor) -> Tensor:
+        return self.net(x)
+class ProtoMorphBlock(nn.Module):
+    """Prototype-morphing residual block over DINO patch tokens.
+    It computes soft assignment of each patch token to learnable prototypes, then
+    mixes original token, nearest prototype context, difference, and product.
+    This creates a lightweight nonstandard CNN replacement over patch embeddings.
+    """
+    def __init__(self, dim: int, proto_count: int, dropout: float = 0.0):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim)
+        self.prototypes = nn.Parameter(torch.randn(proto_count, dim) * 0.02)
+        self.log_temperature = nn.Parameter(torch.tensor(0.0))
+        self.mix = nn.Sequential(
+            nn.Linear(dim * 4, dim * 2),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(dim * 2, dim),
+        )
+        self.gamma = nn.Parameter(torch.tensor(0.1))
+        self.out_norm = nn.LayerNorm(dim)
+    def forward(self, z: Tensor) -> Tuple[Tensor, Tensor]:
+        zn = self.norm(z)
+        p = F.normalize(self.prototypes, dim=-1)
+        q = F.normalize(zn, dim=-1)
+        # cosine distance in [0, 2]
+        dist = 1.0 - torch.matmul(q, p.t())
+        temp = F.softplus(self.log_temperature) + 1e-4
+        assign = F.softmax(-dist / temp, dim=-1)
+        context = torch.matmul(assign, self.prototypes)
+        mixed = self.mix(torch.cat([zn, context, zn - context, zn * context], dim=-1))
+        z = z + self.gamma.tanh() * mixed
+        return self.out_norm(z), assign
+class LayerMemoryAttention(nn.Module):
+    """A small learned memory bank attended by every patch token."""
+    def __init__(self, dim: int, memory_tokens: int, num_heads: int, dropout: float = 0.0):
+        super().__init__()
+        self.memory = nn.Parameter(torch.randn(memory_tokens, dim) * 0.02)
+        self.norm_q = nn.LayerNorm(dim)
+        self.norm_out = nn.LayerNorm(dim)
+        self.attn = nn.MultiheadAttention(dim, num_heads=num_heads, dropout=dropout, batch_first=True)
+        self.ffn = FeedForward(dim, expansion=4, dropout=dropout)
+        self.gamma_attn = nn.Parameter(torch.tensor(0.1))
+        self.gamma_ffn = nn.Parameter(torch.tensor(0.1))
+    def forward(self, z: Tensor) -> Tuple[Tensor, Tensor]:
+        b = z.shape[0]
+        mem = self.memory.unsqueeze(0).expand(b, -1, -1)
+        q = self.norm_q(z)
+        attn_out, attn_weights = self.attn(q, mem, mem, need_weights=True)
+        z = z + self.gamma_attn.tanh() * attn_out
+        z = z + self.gamma_ffn.tanh() * self.ffn(self.norm_out(z))
+        return z, attn_weights
+class MainClassifier(nn.Module):
+    def __init__(self, dim: int, num_classes: int, dropout: float = 0.0):
+        super().__init__()
+        self.norm = nn.LayerNorm(dim * 3)
+        self.head = nn.Sequential(
+            nn.Linear(dim * 3, dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(dim, num_classes),
+        )
+    def forward(self, cls: Tensor, z: Tensor) -> Tensor:
+        mean_pool = z.mean(dim=1)
+        max_pool = z.max(dim=1).values
+        feat = torch.cat([cls, mean_pool, max_pool], dim=-1)
+        return self.head(self.norm(feat))
+class Top2FeedbackModulator(nn.Module):
+    """Turns top-2 class probabilities into scale/shift over patch tokens."""
+    def __init__(self, dim: int, num_classes: int):
+        super().__init__()
+        self.class_embed = nn.Embedding(num_classes, dim)
+        self.stats_mlp = nn.Sequential(
+            nn.Linear(4, dim),
+            nn.GELU(),
+            nn.Linear(dim, dim),
+        )
+        self.to_scale_shift = nn.Sequential(
+            nn.LayerNorm(dim * 2),
+            nn.Linear(dim * 2, dim * 2),
+        )
+    def forward(self, z0: Tensor, logits: Tensor) -> Tuple[Tensor, Dict[str, Tensor]]:
+        probs = logits.softmax(dim=-1)
+        top_probs, top_idx = probs.topk(k=min(2, probs.shape[-1]), dim=-1)
+        if top_probs.shape[-1] == 1:
+            top_probs = torch.cat([top_probs, torch.zeros_like(top_probs)], dim=-1)
+            top_idx = torch.cat([top_idx, top_idx], dim=-1)
+        p1 = top_probs[:, 0]
+        p2 = top_probs[:, 1]
+        margin = p1 - p2
+        entropy = -(probs * (probs.clamp_min(1e-8)).log()).sum(dim=-1)
+        class_vecs = self.class_embed(top_idx)  # [B, 2, C]
+        weighted_class_vec = (class_vecs * top_probs.unsqueeze(-1)).sum(dim=1)
+        stats = torch.stack([p1, p2, margin, entropy], dim=-1)
+        stat_vec = self.stats_mlp(stats)
+        scale_shift = self.to_scale_shift(torch.cat([weighted_class_vec, stat_vec], dim=-1))
+        scale, shift = scale_shift.chunk(2, dim=-1)
+        z_mod = z0 * (1.0 + 0.25 * torch.tanh(scale).unsqueeze(1)) + 0.25 * torch.tanh(shift).unsqueeze(1)
+        return z_mod, {
+            "p1": p1,
+            "p2": p2,
+            "margin": margin,
+            "entropy": entropy,
+            "top_idx": top_idx,
+            "top_probs": top_probs,
+        }
+class DeltaRBFHardExpert(nn.Module):
+    """RBF expert for hard examples, driven by feedback-modulated patch deltas."""
+    def __init__(self, dim: int, rbf_count: int, num_classes: int, dropout: float = 0.0):
+        super().__init__()
+        self.delta_norm = nn.LayerNorm(dim)
+        self.rbf_centers = nn.Parameter(torch.randn(rbf_count, dim) * 0.02)
+        self.log_sigma = nn.Parameter(torch.zeros(rbf_count))
+        self.rbf_to_logits = nn.Linear(rbf_count, num_classes)
+        self.delta_mlp = nn.Sequential(
+            nn.Linear(dim * 2, dim),
+            nn.GELU(),
+            nn.Dropout(dropout),
+            nn.Linear(dim, num_classes),
+        )
+    def forward(self, z_base: Tensor, z_mod: Tensor) -> Tuple[Tensor, Tensor]:
+        delta = self.delta_norm(z_mod - z_base)
+        delta_mean = delta.mean(dim=1)
+        delta_max = delta.max(dim=1).values
+        q = F.normalize(delta, dim=-1)
+        c = F.normalize(self.rbf_centers, dim=-1)
+        dist = 1.0 - torch.matmul(q, c.t())  # [B, N, R]
+        sigma = F.softplus(self.log_sigma).view(1, 1, -1) + 1e-4
+        rbf = torch.exp(-dist / sigma).mean(dim=1)  # [B, R]
+        expert_logits = self.rbf_to_logits(rbf) + self.delta_mlp(torch.cat([delta_mean, delta_max], dim=-1))
+        return expert_logits, rbf
+class LogitFusion(nn.Module):
+    def __init__(self, num_classes: int):
+        super().__init__()
+        self.alpha = nn.Parameter(torch.tensor(0.35))
+        self.calibrate = nn.Sequential(
+            nn.LayerNorm(num_classes * 2),
+            nn.Linear(num_classes * 2, num_classes),
+        )
+    def forward(self, main_logits: Tensor, expert_logits: Tensor) -> Tensor:
+        residual = self.calibrate(torch.cat([main_logits, expert_logits], dim=-1))
+        return main_logits + self.alpha.sigmoid() * expert_logits + 0.1 * residual
+class HardCaseGate(nn.Module):
+    """Deterministic inference gate from probability confidence signals."""
+    def __init__(self, pmax_threshold: float, margin_threshold: float, entropy_threshold: float):
+        super().__init__()
+        self.pmax_threshold = pmax_threshold
+        self.margin_threshold = margin_threshold
+        self.entropy_threshold = entropy_threshold
+    def forward(self, logits: Tensor) -> Tuple[Tensor, Dict[str, Tensor]]:
+        probs = logits.softmax(dim=-1)
+        top_probs = probs.topk(k=min(2, probs.shape[-1]), dim=-1).values
+        if top_probs.shape[-1] == 1:
+            p1 = top_probs[:, 0]
+            p2 = torch.zeros_like(p1)
+        else:
+            p1, p2 = top_probs[:, 0], top_probs[:, 1]
+        margin = p1 - p2
+        entropy = -(probs * probs.clamp_min(1e-8).log()).sum(dim=-1)
+        hard = (p1 < self.pmax_threshold) | (margin < self.margin_threshold) | (entropy > self.entropy_threshold)
+        return hard, {"pmax": p1, "margin": margin, "entropy": entropy}
+class ProtoMorphHead(nn.Module):
+    def __init__(self, cfg: ProtoMorphConfig):
+        super().__init__()
+        self.cfg = cfg
+        d = cfg.embed_dim
+        self.input_norm = nn.LayerNorm(d)
+        self.block1 = ProtoMorphBlock(d, cfg.proto_count, cfg.dropout)
+        self.mem1 = LayerMemoryAttention(d, cfg.memory_tokens, cfg.num_heads, cfg.dropout)
+        self.block2 = ProtoMorphBlock(d, cfg.proto_count, cfg.dropout)
+        self.mem2 = LayerMemoryAttention(d, cfg.memory_tokens, cfg.num_heads, cfg.dropout)
+        self.main = MainClassifier(d, cfg.num_classes, cfg.dropout)
+        self.gate = HardCaseGate(cfg.hard_pmax_threshold, cfg.hard_margin_threshold, cfg.hard_entropy_threshold)
+        self.feedback = Top2FeedbackModulator(d, cfg.num_classes)
+        self.hard_expert = DeltaRBFHardExpert(d, cfg.rbf_count, cfg.num_classes, cfg.dropout)
+        self.fusion = LogitFusion(cfg.num_classes)
+    def forward(self, cls: Tensor, patches: Tensor, force_hard: bool = False) -> Dict[str, Tensor]:
+        z0 = self.input_norm(patches)
+        z, assign1 = self.block1(z0)
+        z, mem_attn1 = self.mem1(z)
+        z, assign2 = self.block2(z)
+        z, mem_attn2 = self.mem2(z)
+        main_logits = self.main(cls, z)
+        hard_mask, gate_stats = self.gate(main_logits)
+        if force_hard:
+            hard_mask = torch.ones_like(hard_mask, dtype=torch.bool)
+        z_mod, fb_stats = self.feedback(z0, main_logits)
+        expert_logits, rbf = self.hard_expert(z0, z_mod)
+        fused_logits = self.fusion(main_logits, expert_logits)
+        final_logits = torch.where(hard_mask[:, None], fused_logits, main_logits)
+        out = {
+            "logits": final_logits,
+            "main_logits": main_logits,
+            "expert_logits": expert_logits,
+            "hard_mask": hard_mask,
+            "rbf": rbf,
+            "assign1_mean": assign1.mean(dim=1),
+            "assign2_mean": assign2.mean(dim=1),
+            "mem_attn1_mean": mem_attn1.mean(dim=1),
+            "mem_attn2_mean": mem_attn2.mean(dim=1),
+        }
+        out.update({f"gate_{k}": v for k, v in gate_stats.items()})
+        out.update({f"fb_{k}": v for k, v in fb_stats.items() if isinstance(v, Tensor)})
+        return out
+class ProtoMorphDINOv3(nn.Module):
+    """Full inference graph: frozen DINOv3 + custom ProtoMorph head."""
+    def __init__(self, cfg: ProtoMorphConfig, local_files_only: bool = False):
+        super().__init__()
+        self.cfg = cfg
+        self.backbone = FrozenDINOv3(cfg.dino_model_name, image_size=cfg.image_size, local_files_only=local_files_only)
+        actual_dim = self.backbone.hidden_size
+        if actual_dim and actual_dim != cfg.embed_dim:
+            raise ValueError(
+                f"Config embed_dim={cfg.embed_dim} but DINO hidden_size={actual_dim}. "
+                f"Use the matching config or run scripts/create_random_head.py with --embed-dim {actual_dim}."
+            )
+        self.head = ProtoMorphHead(cfg)
+    @torch.no_grad()
+    def forward(
+        self,
+        images: Image.Image | Sequence[Image.Image],
+        device: torch.device | str,
+        force_hard: bool = False,
+        use_bf16_autocast: Optional[bool] = None,
+    ) -> Dict[str, Tensor | Tuple[int, int]]:
+        use_amp = self.cfg.use_bf16_autocast if use_bf16_autocast is None else use_bf16_autocast
+        device_obj = torch.device(device)
+        amp_enabled = bool(use_amp and device_obj.type == "cuda")
+        amp_dtype = torch.bfloat16
+        with torch.autocast(device_type="cuda", dtype=amp_dtype, enabled=amp_enabled):
+            feats = self.backbone(images, device=device_obj)
+            cls = feats.cls
+            patches = feats.patches
+            if self.cfg.normalize_patch_tokens:
+                cls = F.layer_norm(cls, cls.shape[-1:])
+                patches = F.layer_norm(patches, patches.shape[-1:])
+            head_out = self.head(cls, patches, force_hard=force_hard)
+        head_out["patch_hw"] = feats.patch_hw
+        head_out["pixel_hw"] = feats.pixel_hw
+        return head_out
+    def save_custom_head(self, checkpoint_path: str | Path) -> None:
+        if safe_save_file is None:
+            raise ImportError("safetensors is required: pip install safetensors")
+        p = Path(checkpoint_path)
+        p.parent.mkdir(parents=True, exist_ok=True)
+        safe_save_file(self.head.state_dict(), str(p))
+    def load_custom_head(self, checkpoint_path: str | Path, strict: bool = True) -> None:
+        if safe_load_file is None:
+            raise ImportError("safetensors is required: pip install safetensors")
+        sd = safe_load_file(str(checkpoint_path), device="cpu")
+        self.head.load_state_dict(sd, strict=strict)
+def infer_embed_dim_from_model_name(model_name: str) -> int:
+    """Useful defaults for DINOv3 ViT checkpoints."""
+    name = model_name.lower()
+    if "vits" in name:
+        return 384
+    if "vitb" in name:
+        return 768
+    if "vitl" in name:
+        return 1024
+    if "vith" in name:
+        return 1280
+    return 384