{ "context_length": 328, "emb_dim": 768, "embed_dim": 512, "fpn_in": [ 512, 768, 768 ], "fpn_out": [ 768, 768, 768, 512 ], "image_resolution": 224, "output_dim": 512, "patch_size": 32, "ratio": 0.9, "transformer_heads": 8, "transformer_layers": 12, "transformer_width": 512, "txt_length": 328, "vision_layers": 12, "vision_patch_size": 32, "vision_width": 768, "vocab_size": 49408 }