{ "architectures": [ "E2TextToSemanticWLenSMPL" ], "bos_token_id": 1, "eos_token_id": 2, "hidden_size": 512, "main_encoder_args": { "attn_dropout": 0.0, "attn_flash": true, "conv_dropout": 0.0, "conv_kernel_size": 5, "depth": 8, "dim": 512, "dim_head": 32, "ff_dropout": 0.0, "ff_mult": 4, "heads": 16 }, "model_type": "e2_text_to_semantic_w_length_smpl", "pad_token_id": 0, "pre_text_encoder_args": { "attn_dropout": 0.0, "attn_flash": true, "conv_dropout": 0.0, "conv_kernel_size": 5, "depth": 4, "dim": 512, "dim_head": 32, "ff_dropout": 0.0, "ff_mult": 4, "heads": 16 }, "semantic_vocab_size": 1024, "special_tokens": { "mask": 6, "pad": 0, "prompt_end": 3, "prompt_start": 2, "sep": 5, "speech": 4, "text": 1 }, "text_vocab_size": 256, "torch_dtype": "bfloat16", "transformers_version": "4.42.3" }