_target_: fish_speech.models.text2semantic.llama.NaiveTransformer config: _target_: fish_speech.models.text2semantic.llama.NaiveModelArgs max_seq_len: ${max_length} vocab_size: 36408 n_layer: 12 n_head: 12 dim: 768 rope_base: 10000 norm_eps: 1e-5 num_codebooks: 2 # input/output codebook size codebook_size: 1032 # codebook size 1024 + 2 special tokens