model: class_path: model.lina.Lina init_args: n_warmup_steps: 500 learning_rate: 5e-4 n_codebook: 1024 n_special_token_in: 3 n_special_token_out: 3 n_txt_vocab: 256 d_context: 768 d_model: 768 quant_layer: [0, 1, 2, 3] txt_encoder: class_path: model.encoder.TextEncoder init_args: dim: 768 heads: 8 n_layers: 9 dropout: 0.1 attentive_rnn: class_path: model.gla.AttentiveGLA init_args: d_model: 768 d_context: 768 heads: 4 dropout_att: 0.2 dropout: 0. n_layer: 6 blind: True d_blind: 128