Initial commit: FloodDiffusionTiny - Tiny text-to-motion model with UMT5-Base

e86746e 6 days ago

877 Bytes

	exp_name: ldf
	seed: 1234
	debug: false
	train: false

	save_dir: ./outputs
	resume_ckpt: null
	test_ckpt: "model.safetensors"
	test_vae_ckpt: "vae.safetensors"

	test_vae:
	target: ldf_models.vae_wan_1d.VAEWanModel
	ema_decay: 0.99
	params:
	input_dim: 263
	z_dim: 4

	test_setting:
	render: false
	simple: true
	recover_dim: 263

	val_repeat: 1

	model:
	target: ldf_models.diffusion_forcing_wan_tiny.DiffForcingWanModel
	ema_decay: 0.99
	params:
	model_name: "google/umt5-base"
	input_dim: 4
	noise_steps: 10
	hidden_dim: 256
	ffn_dim: 1024
	freq_dim: 64
	num_heads: 8
	num_layers: 8
	time_embedding_scale: 1.0
	chunk_size: 5
	use_text_cond: True
	text_len: 128
	drop_out: 0.1
	cfg_scale: 5.0
	prediction_type: "vel"
	causal: False