d_latent: 1048576 inner: cond_drop_prob: 0.1 heads: 8 init_scale: 0.25 layers: 24 name: CLIPImageGridPointDiffusionTransformer pos_emb_init_scale: 0.05 time_token_cond: true use_pos_emb: true width: 1024 latent_ctx: 1024 name: SplitVectorDiffusion