{ "model_cfg": { "embed_dim": 256, "seed": 42, "max_length": 200, "flip_ratio": 0.5, "tensor_ratio": 1.0, "batch_size": 4, "num_workers": 4, "num_projection_layers": 1, "dropout": 0.1, "temperature": 1.0, "head_lr": 0.001, "weight_decay": 0.001, "patience": 1, "factor": 0.8, "epochs": 2, "vision_cfg": { "image_size": 336, "patch_size": 14, "image_embedding": 1024, "image_encoder": "vit_large_patch14_clip_336", "image_encoder_lr": 0.0001, "imgs_pixels": 255.0 }, "text_cfg": { "context_length": 512, "vocab_size": 501153, "text_embedding": 768, "text_encoder": "sartifyllc/AviLaBSE", "text_encoder_lr": 1e-05, "text_tokenizer": "sartifyllc/AviLaBSE" } }, "preprocess_cfg": { "mean": [ 0.48145466, 0.4578275, 0.40821073 ], "std": [ 0.26862954, 0.26130258, 0.27577711 ] } }