| { | |
| "model_type": "vit-captioner-bias-decoder", | |
| "feature_extractor": "google/vit-base-patch16-224-in21k", | |
| "vocab_size": 75460, | |
| "seq_len": 32, | |
| "feature_dim": 768, | |
| "training_epochs": 3, | |
| "dataset": "ROCO-radiology (train + val + test)", | |
| "trainable": "Decoder + ViT biases only", | |
| "description": "ROCO radiology captioner trained for 3 epochs on full dataset using cached ViT features." | |
| } |