{ "architectures": [ "PerceiverMaskedLanguageModel" ], "model_config": { "activation_checkpointing": true, "activation_offloading": false, "decoder": { "cross_attention_residual": false, "cross_attention_widening_factor": 1, "dropout": 0.1, "freeze": false, "init_scale": 0.02, "max_seq_len": 2048, "num_cross_attention_heads": 8, "num_cross_attention_qk_channels": 256, "num_cross_attention_v_channels": 768, "num_output_query_channels": null, "vocab_size": 262 }, "encoder": { "cross_attention_widening_factor": 1, "dropout": 0.1, "first_cross_attention_layer_shared": false, "first_self_attention_block_shared": true, "freeze": false, "init_scale": 0.02, "max_seq_len": 2048, "num_cross_attention_heads": 8, "num_cross_attention_layers": 1, "num_cross_attention_qk_channels": 256, "num_cross_attention_v_channels": 1280, "num_input_channels": 768, "num_self_attention_blocks": 1, "num_self_attention_heads": 8, "num_self_attention_layers_per_block": 26, "num_self_attention_qk_channels": 256, "num_self_attention_v_channels": 1280, "params": null, "self_attention_widening_factor": 1, "vocab_size": 262 }, "num_latent_channels": 1280, "num_latents": 256 }, "model_type": "perceiver-io-masked-language-model", "tokenizer_class": "PerceiverTokenizer", "torch_dtype": "float32", "transformers_version": "4.28.0" }