{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub\n",
"from fairseq.models.text_to_speech.hub_interface import TTSHubInterface\n",
"import IPython.display as ipd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"models, cfg, task = load_model_ensemble_and_task_from_hf_hub(\n",
" \"facebook/fastspeech2-en-ljspeech\",\n",
" arg_overrides={\"vocoder\": \"hifigan\", \"fp16\": False}\n",
")\n",
"\n",
"\n",
"model = models[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)\n",
"generator = task.build_generator(models, cfg)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"text = \"I am your master teacher Huahua\"\n",
"\n",
"sample = TTSHubInterface.get_model_input(task, text)\n",
"wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ipd.Audio(wav, rate=rate)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}