prediff_code / scripts /train_diffusion /cfg.yaml

Upload folder using huggingface_hub

7667a87 verified 7 months ago

5.76 kB

	dataset:
	dataset_name: "sevirlr"
	img_height: 128
	img_width: 128
	in_len: 7
	out_len: 6
	seq_len: 13
	plot_stride: 1
	interval_real_time: 10
	sample_mode: "sequent"
	stride: 6
	layout: "NTHWC"
	start_date: null
	train_test_split_date: [2019, 6, 1]
	end_date: null
	val_ratio: 0.1
	metrics_mode: "0"
	metrics_list: ['csi', 'pod', 'sucr', 'bias']
	threshold_list: [16, 74, 133, 160, 181, 219]
	aug_mode: "2"
	layout:
	in_len: 7
	out_len: 6
	in_step: &in_step 1
	out_step: &out_step 1
	in_out_diff: &in_out_diff 1
	img_height: 128
	img_width: 128
	data_channels: 1
	layout: "NTHWC"
	optim:
	total_batch_size: 8
	micro_batch_size: 2
	seed: 0
	float32_matmul_precision: "high"
	method: "adamw"
	lr: 1.0e-3
	wd: 1.0e-5
	betas: [0.9, 0.999]
	gradient_clip_val: 1.0
	max_epochs: 2000
	loss_type: "l2"
	# scheduler
	warmup_percentage: 0.1
	lr_scheduler_mode: "cosine"
	min_lr_ratio: 1.0e-3
	warmup_min_lr_ratio: 0.1
	# early stopping
	monitor: "val/loss"
	# monitor: "valid_loss_epoch"
	early_stop: false
	early_stop_mode: "min"
	early_stop_patience: 100
	save_top_k: 3
	logging:
	logging_prefix: "PreDiff"
	monitor_lr: true
	monitor_device: false
	track_grad_norm: -1
	use_wandb: false
	profiler: null
	save_npy: true
	trainer:
	check_val_every_n_epoch: 50
	log_step_ratio: 0.001
	precision: 32
	find_unused_parameters: false
	num_sanity_val_steps: 2
	eval:
	train_example_data_idx_list: [0, ]
	val_example_data_idx_list: [0, 16, 32, 48, 64, 72, 96, 108, 128]
	test_example_data_idx_list: [0, 16, 32, 48, 64, 72, 96, 108, 128]
	eval_example_only: true
	eval_aligned: true
	eval_unaligned: true
	num_samples_per_context: 1
	fs: 20
	label_offset: [-0.5, 0.5]
	label_avg_int: false
	fvd_features: 400
	model:
	diffusion:
	data_shape: [6, 128, 128, 1]
	beta_schedule: "linear"
	use_ema: true
	log_every_t: 100
	clip_denoised: false
	linear_start: 1e-4
	linear_end: 2e-2
	cosine_s: 8e-3
	given_betas: null
	original_elbo_weight: 0.
	v_posterior: 0.
	l_simple_weight: 1.
	parameterization: "eps"
	learn_logvar: true
	logvar_init: 0.
	# latent diffusion
	latent_shape: [6, 16, 16, 64]
	cond_stage_model: "__is_first_stage__"
	num_timesteps_cond: null
	cond_stage_trainable: false
	cond_stage_forward: null
	scale_by_std: false
	scale_factor: 1.0
	latent_cond_shape: [7, 16, 16, 64]
	align:
	alignment_type: "avg_x"
	guide_scale: 50.0
	model_type: "cuboid"
	model_args:
	input_shape: [6, 16, 16, 64]
	out_channels: 1
	base_units: 128
	scale_alpha: 1.0
	depth: [1, 1]
	downsample: 2
	downsample_type: "patch_merge"
	block_attn_patterns: "axial"
	num_heads: 4
	attn_drop: 0.1
	proj_drop: 0.1
	ffn_drop: 0.1
	ffn_activation: "gelu"
	gated_ffn: false
	norm_layer: "layer_norm"
	use_inter_ffn: true
	hierarchical_pos_embed: false
	pos_embed_type: "t+h+w"
	padding_type: "zeros"
	checkpoint_level: 0
	use_relative_pos: true
	self_attn_use_final_proj: true
	# global vectors
	num_global_vectors: 0
	use_global_vector_ffn: true
	use_global_self_attn: false
	separate_global_qkv: false
	global_dim_ratio: 1
	# initialization
	attn_linear_init_mode: "0"
	ffn_linear_init_mode: "0"
	ffn2_linear_init_mode: "2"
	attn_proj_linear_init_mode: "2"
	conv_init_mode: "0"
	down_linear_init_mode: "0"
	global_proj_linear_init_mode: "2"
	norm_init_mode: "0"
	# timestep embedding for diffusion
	time_embed_channels_mult: 4
	time_embed_use_scale_shift_norm: false
	time_embed_dropout: 0.0
	# readout
	pool: "attention"
	readout_seq: true
	out_len: 6
	model_ckpt_path: "pretrained_sevirlr_alignment_avg_x_cuboid_v1.pt"
	latent_model:
	input_shape: [7, 16, 16, 64]
	target_shape: [6, 16, 16, 64]
	base_units: 256
	# block_units: null
	scale_alpha: 1.0
	num_heads: 4
	attn_drop: 0.1
	proj_drop: 0.1
	ffn_drop: 0.1
	# inter-attn downsample/upsample
	downsample: 2
	downsample_type: "patch_merge"
	upsample_type: "upsample"
	upsample_kernel_size: 3
	# cuboid attention
	depth: [4, 4]
	self_pattern: "axial"
	# global vectors
	num_global_vectors: 0
	use_dec_self_global: false
	dec_self_update_global: true
	use_dec_cross_global: false
	use_global_vector_ffn: false
	use_global_self_attn: true
	separate_global_qkv: true
	global_dim_ratio: 1
	# mise
	ffn_activation: "gelu"
	gated_ffn: false
	norm_layer: "layer_norm"
	padding_type: "zeros"
	pos_embed_type: "t+h+w"
	checkpoint_level: 0
	use_relative_pos: true
	self_attn_use_final_proj: true
	# initialization
	attn_linear_init_mode: "0"
	ffn_linear_init_mode: "0"
	ffn2_linear_init_mode: "2"
	attn_proj_linear_init_mode: "2"
	conv_init_mode: "0"
	down_up_linear_init_mode: "0"
	global_proj_linear_init_mode: "2"
	norm_init_mode: "0"
	# timestep embedding for diffusion
	time_embed_channels_mult: 4
	time_embed_use_scale_shift_norm: false
	time_embed_dropout: 0.0
	unet_res_connect: true
	vae:
	pretrained_ckpt_path: "pretrained_sevirlr_vae_8x8x64_v1_2.pt"
	data_channels: 1
	down_block_types: ['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D']
	in_channels: 1
	block_out_channels: [128, 256, 512, 512] # downsample `len(block_out_channels) - 1` times
	act_fn: 'silu'
	latent_channels: 64
	up_block_types: ['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D']
	norm_num_groups: 32
	layers_per_block: 2
	out_channels: 1