{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/home/darshan/ml-dev/tts/Indic-TTS\n",
" > Using model: fast_pitch\n",
" > Setting up Audio Processor...\n",
" | > sample_rate:22050\n",
" | > resample:False\n",
" | > num_mels:80\n",
" | > log_func:np.log\n",
" | > min_level_db:-100\n",
" | > frame_shift_ms:None\n",
" | > frame_length_ms:None\n",
" | > ref_level_db:20\n",
" | > fft_size:1024\n",
" | > power:1.5\n",
" | > preemphasis:0.0\n",
" | > griffin_lim_iters:60\n",
" | > signal_norm:False\n",
" | > symmetric_norm:True\n",
" | > mel_fmin:0\n",
" | > mel_fmax:8000.0\n",
" | > pitch_fmin:0.0\n",
" | > pitch_fmax:640.0\n",
" | > spec_gain:1.0\n",
" | > stft_pad_mode:reflect\n",
" | > max_norm:4.0\n",
" | > clip_norm:True\n",
" | > do_trim_silence:True\n",
" | > trim_db:60\n",
" | > do_sound_norm:False\n",
" | > do_amp_to_db_linear:True\n",
" | > do_amp_to_db_mel:True\n",
" | > do_rms_norm:False\n",
" | > db_level:None\n",
" | > stats_path:None\n",
" | > base:2.718281828459045\n",
" | > hop_length:256\n",
" | > win_length:1024\n",
" > Init speaker_embedding layer.\n",
" > Vocoder Model: hifigan\n",
" > Setting up Audio Processor...\n",
" | > sample_rate:22050\n",
" | > resample:False\n",
" | > num_mels:80\n",
" | > log_func:np.log\n",
" | > min_level_db:-100\n",
" | > frame_shift_ms:None\n",
" | > frame_length_ms:None\n",
" | > ref_level_db:20\n",
" | > fft_size:1024\n",
" | > power:1.5\n",
" | > preemphasis:0.0\n",
" | > griffin_lim_iters:60\n",
" | > signal_norm:False\n",
" | > symmetric_norm:True\n",
" | > mel_fmin:0\n",
" | > mel_fmax:8000.0\n",
" | > pitch_fmin:0.0\n",
" | > pitch_fmax:640.0\n",
" | > spec_gain:1.0\n",
" | > stft_pad_mode:reflect\n",
" | > max_norm:4.0\n",
" | > clip_norm:True\n",
" | > do_trim_silence:True\n",
" | > trim_db:60\n",
" | > do_sound_norm:False\n",
" | > do_amp_to_db_linear:True\n",
" | > do_amp_to_db_mel:True\n",
" | > do_rms_norm:False\n",
" | > db_level:None\n",
" | > stats_path:None\n",
" | > base:2.718281828459045\n",
" | > hop_length:256\n",
" | > win_length:1024\n",
" > Generator Model: hifigan_generator\n",
" > Discriminator Model: hifigan_discriminator\n",
"Removing weight norm...\n",
" > Text: बधाई हो! स्पीच जनरेशन मॉडल चलने लगा।\n",
" > Text splitted to sentences.\n",
"['बधाई हो!', 'स्पीच जनरेशन मॉडल चलने लगा।']\n",
"स्पीच जनरेशन मॉडल चलने लगा।\n",
" [!] Character '।' not found in the vocabulary. Discarding it.\n",
" > Processing time: 1.4851586818695068\n",
" > Real-time factor: 0.3664042800664901\n",
" > Saving output to output.mp4\n"
]
}
],
"source": [
"%cd Indic-TTS/\n",
"!python3 -m TTS.bin.synthesize --text \"बधाई हो! स्पीच जनरेशन मॉडल चलने लगा।\" \\\n",
" --model_path models/v1/hi/fastpitch/best_model.pth \\\n",
" --config_path models/v1/hi/fastpitch/config.json \\\n",
" --vocoder_path models/v1/hi/hifigan/best_model.pth \\\n",
" --vocoder_config_path models/v1/hi/hifigan/config.json \\\n",
" --speaker_idx \"female\" \\\n",
" --out_path output.mp4"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" \n",
" "
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from IPython.display import Audio, display\n",
"\n",
"display(Audio(\"output.mp4\"))"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%%capture\n",
"!git clone https://github.com/AI4Bharat/Indic-TTS\n",
"%cd Indic-TTS\n",
"!echo \"[1/4] Cloned repository.\"\n",
"\n",
"# Install modified coqui-ai/Trainer\n",
"!git clone https://github.com/gokulkarthik/Trainer\n",
"%cd Trainer\n",
"!python3 -m pip install -q -e .[all]\n",
"%cd ..\n",
"!echo \"[2/4] Cloned Trainer.\"\n",
"\n",
"# Install modified coqui-ai/TTS\n",
"!git clone https://github.com/gokulkarthik/TTS\n",
"%cd TTS\n",
"!python3 -m pip install -q -e .[all]\n",
"%cd ..\n",
"!echo \"[3/4] Cloned TTS.\"\n",
"\n",
"# Install dependencies\n",
"!python3 -m pip install -q -r requirements.txt\n",
"!echo \"[4/4] Installed requirements.\""
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"trainer 0.0.12 /home/darshan/ml-dev/tts/Indic-TTS/Trainer\n",
"TTS 0.7.1 /home/darshan/ml-dev/tts/Indic-TTS/TTS\n",
"Note: you may need to restart the kernel to use updated packages.\n"
]
}
],
"source": [
"pip list | grep TTS"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "py37_env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}