{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub\n", "from fairseq.models.text_to_speech.hub_interface import TTSHubInterface\n", "import IPython.display as ipd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "models, cfg, task = load_model_ensemble_and_task_from_hf_hub(\n", " \"facebook/fastspeech2-en-ljspeech\",\n", " arg_overrides={\"vocoder\": \"hifigan\", \"fp16\": False}\n", ")\n", "\n", "\n", "model = models[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)\n", "generator = task.build_generator(models, cfg)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "text = \"I am your master teacher Huahua\"\n", "\n", "sample = TTSHubInterface.get_model_input(task, text)\n", "wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ipd.Audio(wav, rate=rate)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }