{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-04-08 16:17:42.666716: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-04-08 16:17:51.862385: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
"2023-04-08 16:17:51.862490: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
"2023-04-08 16:17:51.862498: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
]
}
],
"source": [
"from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub\n",
"from fairseq.models.text_to_speech.hub_interface import TTSHubInterface\n",
"import IPython.display as ipd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"application/json": {
"ascii": false,
"bar_format": null,
"colour": null,
"elapsed": 0.025561809539794922,
"initial": 0,
"n": 0,
"ncols": null,
"nrows": null,
"postfix": null,
"prefix": "Fetching 9 files",
"rate": null,
"total": 9,
"unit": "it",
"unit_divisor": 1000,
"unit_scale": false
},
"application/vnd.jupyter.widget-view+json": {
"model_id": "a09031153d4945eb892d626679cfc9ec",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Fetching 9 files: 0%| | 0/9 [00:00, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"models, cfg, task = load_model_ensemble_and_task_from_hf_hub(\n",
" \"facebook/fastspeech2-en-ljspeech\",\n",
" arg_overrides={\"vocoder\": \"hifigan\", \"fp16\": False}\n",
")\n",
"\n",
"\n",
"model = models[0]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)\n",
"generator = task.build_generator(models, cfg)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"text = \"Homoscedasticity and heteroscedasticity\"\n",
"\n",
"sample = TTSHubInterface.get_model_input(task, text)\n",
"wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ipd.Audio(wav, rate=rate)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}