MusicGen

Running on A10G

App Files Files Community

MusicGen / config /model /lm /default.yaml

reach-vb HF staff

Stereo demo update (#60)

5325fcc 10 months ago

raw

history blame contribute delete

No virus

1.99 kB

	# @package __global__
	defaults:
	- _self_
	- /model/lm/model_scale: base # prefer this group to set model scale instead of transformer_lm keys directly

	lm_model: transformer_lm

	codebooks_pattern:
	modeling: parallel

	transformer_lm:
	dim: 512
	num_heads: 8
	num_layers: 8
	hidden_scale: 4
	n_q: 8 # number of streams to model
	card: 1024
	dropout: 0.
	emb_lr: null
	activation: gelu
	norm_first: false # use pre-norm instead of post-norm
	bias_ff: true # use bias for the feedforward
	bias_attn: true # use bias for the attention
	bias_proj: true # use bias for the output projections
	past_context: null
	causal: true
	custom: false # use custom MHA implementation
	memory_efficient: false # use flash attention
	attention_as_float32: false # use float32 for the attention part,
	# recommended at the moment when memory_efficient is True.
	layer_scale: null
	positional_embedding: sin # positional embedding strategy (sin, rope, or sin_rope).
	xpos: false # apply xpos decay (rope only).
	checkpointing: none # layer checkpointing method, can be none, torch, xformers_default.
	# torch is the slowest but uses the least memory,
	# xformers_default is somewhere in between.
	weight_init: null # weight initialization (null, gaussian or uniform)
	depthwise_init: null # perform depthwise initialization (null, current, global)
	zero_bias_init: false # initialize bias to zero if bias in linears and
	# if a weight_init method is used.
	norm: layer_norm # normalization method to use in transformer.
	cross_attention: false
	qk_layer_norm: false
	qk_layer_norm_cross: false
	attention_dropout: null
	kv_repeat: 1
	two_step_cfg: false # whether to do true 2 steps CFG, potentially resolving some padding issues or not...