{ "architectures": [ "UltravoxModel" ], "audio_config": { "_name_or_path": "facebook/wav2vec2-base-960h", "architectures": [ "Wav2Vec2ForCTC" ], "feat_extract_dropout": 0.0, "feat_proj_dropout": 0.1, "gradient_checkpointing": false, "hidden_dropout_prob": 0.1, "model_type": "wav2vec2" }, "audio_model_id": "facebook/wav2vec2-base-960h", "audio_model_lora_config": { "lora_alpha": 8, "r": 0, "target_modules": [ "k_proj", "q_proj", "linear_k", "linear_q" ] }, "audio_token_index": 32000, "hidden_size": 4096, "ignore_index": -100, "initializer_range": 0.02, "model_type": "ultravox", "norm_init": 0.4, "projector_act": "swiglu", "stack_factor": 8, "text_config": { "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "architectures": [ "LlamaForCausalLM" ], "hidden_size": 2048, "intermediate_size": 5632, "model_type": "llama", "num_hidden_layers": 22, "num_key_value_heads": 4, "rms_norm_eps": 1e-05, "torch_dtype": "bfloat16" }, "text_model_id": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "text_model_lora_config": { "lora_alpha": 8, "r": 0, "target_modules": [ "k_proj", "q_proj", "linear_k", "linear_q" ] }, "torch_dtype": "float32", "transformers_version": "4.41.2", "vocab_size": 32000 }