{ "_name_or_path": "hf-tiny-model-private/tiny-random-ClapModel", "architectures": [ "ClapModel" ], "audio_config": { "attention_dropout": 0.1, "dropout": 0.1, "freq_ratio": 2, "hidden_size": 256, "image_size": 60, "initializer_range": 0.02, "intermediate_size": 37, "model_type": "clap_audio_model", "num_attention_heads": [ 2, 2, 2, 2 ], "num_channels": 3, "num_mel_bins": 16, "patch_embeds_hidden_size": 32, "patch_size": 2, "patch_stride": 2, "projection_dim": 64, "spec_size": 64, "window_size": 4 }, "bos_token_id": 0, "eos_token_id": 2, "hidden_size": 32, "initializer_factor": 1.0, "logit_scale_init_value": 14.285714285714285, "model_type": "clap", "num_hidden_layers": 9, "pad_token_id": 1, "projection_dim": 64, "projection_hidden_act": "relu", "text_config": { "attention_dropout": 0.1, "classifier_dropout": null, "dropout": 0.1, "hidden_size": 32, "initializer_range": 0.02, "intermediate_size": 37, "max_position_embeddings": 512, "model_type": "clap_text_model", "num_attention_heads": 4, "num_hidden_layers": 5, "projection_dim": 64, "vocab_size": 1024 }, "transformers_version": "4.36.0.dev0" }