hahunavth
/

emofs2-base

Inference Endpoints

Model card Files Files and versions Metrics Training metrics Community

emofs2-base / config.json

hahunavth's picture

Upload model

d7ff923 6 months ago

raw history blame contribute delete

No virus

2.35 kB

	{
	"_name_or_path": "hahunavth/emofs2-base",
	"architectures": [
	"ESSModelForPretraining"
	],
	"freeze": [],
	"model_config": {
	"conformer": {
	"attention_dropout_p": 0.2,
	"conv_dropout_p": 0.2,
	"conv_expansion_factor": 2,
	"conv_kernel_size": 7,
	"decoder_dim": 256,
	"encoder_dim": 256,
	"feed_forward_dropout_p": 0.2,
	"feed_forward_expansion_factor": 4,
	"half_step_residual": true,
	"num_attention_heads": 2,
	"num_decode_layers": 6,
	"num_encode_layers": 4
	},
	"max_seq_len": 1000,
	"mode": "train",
	"num_emotion": 5,
	"reference_encoder": {
	"dropout": 0.2,
	"encoder_dim": 128
	},
	"variance_embedding": {
	"energy_quantization": "linear",
	"n_bins": 256,
	"pitch_quantization": "linear"
	},
	"variance_predictor": {
	"dropout": 0.5,
	"filter_size": 256,
	"kernel_size": 3
	},
	"vocoder": {
	"model": "HiFi-GAN",
	"speaker": "tth"
	}
	},
	"model_type": "emofs2",
	"preprocess_config": {
	"dataset": "vlsp2023emo",
	"emotion2id": {
	"angry": 3,
	"happy": 1,
	"neutral": 0,
	"sad": 2,
	"surprise": 4
	},
	"id2emotion": {
	"0": "neutral",
	"1": "happy",
	"2": "sad",
	"3": "angry",
	"4": "surprise"
	},
	"path": {
	"corpus_path": "./data/pretrained_tts_dataset/tuyendv.dict",
	"lexicon_path": "../datasets/ess-vlsp2023-lexicon/lexicon.dict",
	"preprocessed_path": "../datasets/ess-vlsp2023-emo-processed-phoneme-level",
	"raw_path": "./data/pretrained_tts_dataset_raw"
	},
	"preprocessing": {
	"audio": {
	"max_wav_value": 32768.0,
	"sampling_rate": 22050
	},
	"energy": {
	"feature": "phoneme_level",
	"normalization": true
	},
	"mel": {
	"mel_fmax": 8000,
	"mel_fmin": 0,
	"n_mel_channels": 80
	},
	"pitch": {
	"feature": "phoneme_level",
	"normalization": true
	},
	"stft": {
	"filter_length": 1024,
	"hop_length": 256,
	"win_length": 1024
	},
	"text": {
	"language": "en",
	"text_cleaners": []
	},
	"val_size": 512
	},
	"smoothing_label": 0.1
	},
	"torch_dtype": "float32",
	"transformers_version": "4.35.2"
	}