{ "_name_or_path": "vsearch/vdr-cross-modal", "architectures": [ "Retriever" ], "device": null, "encoder_p": { "heads": 12, "layers": 12, "patch_size": 32, "resolution": 224, "tokenizer_id": "bert-base-uncased", "topk": 768, "type": "vdr_crossmodal_image", "width": 768 }, "encoder_q": { "max_len": 512, "model_id": "bert-base-uncased", "topk": 768, "type": "vdr_crossmodal_text" }, "shared_encoder": false, "torch_dtype": "float32", "transformers_version": "4.34.0", "type": null }