{"cells":[{"cell_type":"markdown","metadata":{"id":"AvxXfBluyZvb"},"source":["## 1. 구글 드라이브 마운트\n","\n","음성합성을 위해 학습한 모델이 있는 구글 드라이브를 마운트합니다. \n","마운트할 구글 드라이브 내에 다음 파일들이 존재하는지 꼭 확인해주세요.\n","\n","- `/Colab Notebooks/data/glowtts-v2/model_file.pth.tar`\n","- `/Colab Notebooks/data/glowtts-v2/config.json`\n","- `/Colab Notebooks/data/hifigan-v2/model_file.pth.tar`\n","- `/Colab Notebooks/data/hifigan-v2/config.json`\n","\n","\n","(존재하지 않는다면, [glowtts-v2.zip](https://drive.google.com/file/d/1DMKLdfZ_gzc_z0qDod6_G8fEXj0zCHvC/view?usp=sharing), [hifigan-v2.zip](https://drive.google.com/file/d/1vRxp1RH-U7gSzWgyxnKY4h_7pB3tjPmU/view?usp=sharing)을 내려받아 준비해주세요.)\n","\n","만약 아래에 `Enter your authorization code:`과 같은 메시지가 출력될 경우, \n","같이 출력된 링크에 접속하여, 마운트할 구글 계정을 선택하신 후, 인증 코드를 복사하여 입력해주세요."]},{"cell_type":"code","execution_count":1,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":230434,"status":"ok","timestamp":1707413202236,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"4U2wrDOthrsF","outputId":"1a56442b-2686-468a-bb32-0ba7608ef988"},"outputs":[{"name":"stdout","output_type":"stream","text":["Mounted at /content/drive\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/drive')"]},{"cell_type":"markdown","metadata":{"id":"8erClGSnzwge"},"source":["## 2. 필수 라이브러리 및 함수 불러오기\n","\n","실행에 필요한 라이브러리 및 함수를 불러옵니다.\n","\n","이 과정은 약 10분 정도 소요될 수 있습니다."]},{"cell_type":"code","execution_count":2,"metadata":{"executionInfo":{"elapsed":18,"status":"ok","timestamp":1707413202238,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"jYCym6hXge2_"},"outputs":[],"source":["import os\n","import sys\n","from pathlib import Path"]},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"background_save":true,"base_uri":"https://localhost:8080/"},"id":"JkWG-L13gReB"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content\n","fatal: destination path 'TTS' already exists and is not an empty directory.\n","fatal: destination path 'g2pK' already exists and is not an empty directory.\n","/content/TTS\n"," \u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n"," \n"," \u001b[31m×\u001b[0m \u001b[32mpip subprocess to install build dependencies\u001b[0m did not run successfully.\n"," \u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n"," \u001b[31m╰─\u003e\u001b[0m See above for output.\n"," \n"," \u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n"," Installing build dependencies ... \u001b[?25l\u001b[?25herror\n","\u001b[1;31merror\u001b[0m: \u001b[1msubprocess-exited-with-error\u001b[0m\n","\n","\u001b[31m×\u001b[0m \u001b[32mpip subprocess to install build dependencies\u001b[0m did not run successfully.\n","\u001b[31m│\u001b[0m exit code: \u001b[1;36m1\u001b[0m\n","\u001b[31m╰─\u003e\u001b[0m See above for output.\n","\n","\u001b[1;35mnote\u001b[0m: This error originates from a subprocess, and is likely not a problem with pip.\n","/content/g2pK\n","\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n","\u001b[?25h Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n"]}],"source":["%cd /content\n","!git clone --depth 1 https://github.com/sce-tts/TTS.git -b sce-tts\n","!git clone --depth 1 https://github.com/sce-tts/g2pK.git\n","%cd /content/TTS\n","!pip install -q --no-cache-dir -e .\n","%cd /content/g2pK\n","!pip install -q --no-cache-dir \"pysbd\" \"konlpy\" \"jamo\" \"nltk\" \"python-mecab-ko\"\n","!pip install -q --no-cache-dir -e ."]},{"cell_type":"code","execution_count":4,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":2893,"status":"ok","timestamp":1707413252303,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"FUD8SfIxSY8j","outputId":"3b984ef6-c788-4694-84b1-2675d82ed0c3"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/g2pK\n"]},{"name":"stderr","output_type":"stream","text":["[nltk_data] Downloading package cmudict to /root/nltk_data...\n","[nltk_data] Unzipping corpora/cmudict.zip.\n"]}],"source":["%cd /content/g2pK\n","import g2pk\n","g2p = g2pk.G2p()"]},{"cell_type":"code","execution_count":5,"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":524},"executionInfo":{"elapsed":15,"status":"error","timestamp":1707413252303,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"Lt9bLLZ8I4GH","outputId":"d0eba754-a986-47ee-d154-7233c2ef24c6"},"outputs":[{"name":"stdout","output_type":"stream","text":["/content/TTS\n"]},{"ename":"ModuleNotFoundError","evalue":"No module named 'pysbd'","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)","\u001b[0;32m\u003cipython-input-5-73a1c4bbdabf\u003e\u001b[0m in \u001b[0;36m\u003ccell line: 7\u003e\u001b[0;34m()\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mIPython\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----\u003e 7\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mTTS\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msynthesizer\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mSynthesizer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mnormalize_text\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;32m/content/TTS/TTS/utils/synthesizer.py\u001b[0m in \u001b[0;36m\u003cmodule\u003e\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----\u003e 5\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0mpysbd\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n","\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pysbd'","","\u001b[0;31m---------------------------------------------------------------------------\u001b[0;32m\nNOTE: If your import is failing due to a missing package, you can\nmanually install dependencies using either !pip or !apt.\n\nTo view examples of installing some common dependencies, click the\n\"Open Examples\" button below.\n\u001b[0;31m---------------------------------------------------------------------------\u001b[0m\n"]}],"source":["%cd /content/TTS\n","import re\n","import sys\n","from unicodedata import normalize\n","import IPython\n","\n","from TTS.utils.synthesizer import Synthesizer\n","\n","def normalize_text(text):\n"," text = text.strip()\n","\n"," for c in \",;:\":\n"," text = text.replace(c, \".\")\n"," text = remove_duplicated_punctuations(text)\n","\n"," text = jamo_text(text)\n","\n"," text = g2p.idioms(text)\n"," text = g2pk.english.convert_eng(text, g2p.cmu)\n"," text = g2pk.utils.annotate(text, g2p.mecab)\n"," text = g2pk.numerals.convert_num(text)\n"," text = re.sub(\"/[PJEB]\", \"\", text)\n","\n"," text = alphabet_text(text)\n","\n"," # remove unreadable characters\n"," text = normalize(\"NFD\", text)\n"," text = \"\".join(c for c in text if c in symbols)\n"," text = normalize(\"NFC\", text)\n","\n"," text = text.strip()\n"," if len(text) == 0:\n"," return \"\"\n","\n"," # only single punctuation\n"," if text in '.!?':\n"," return punctuation_text(text)\n","\n"," # append punctuation if there is no punctuation at the end of the text\n"," if text[-1] not in '.!?':\n"," text += '.'\n","\n"," return text\n","\n","\n","def remove_duplicated_punctuations(text):\n"," text = re.sub(r\"[.?!]+\\?\", \"?\", text)\n"," text = re.sub(r\"[.?!]+!\", \"!\", text)\n"," text = re.sub(r\"[.?!]+\\.\", \".\", text)\n"," return text\n","\n","\n","def split_text(text):\n"," text = remove_duplicated_punctuations(text)\n","\n"," texts = []\n"," for subtext in re.findall(r'[^.!?\\n]*[.!?\\n]', text):\n"," texts.append(subtext.strip())\n","\n"," return texts\n","\n","\n","def alphabet_text(text):\n"," text = re.sub(r\"(a|A)\", \"에이\", text)\n"," text = re.sub(r\"(b|B)\", \"비\", text)\n"," text = re.sub(r\"(c|C)\", \"씨\", text)\n"," text = re.sub(r\"(d|D)\", \"디\", text)\n"," text = re.sub(r\"(e|E)\", \"이\", text)\n"," text = re.sub(r\"(f|F)\", \"에프\", text)\n"," text = re.sub(r\"(g|G)\", \"쥐\", text)\n"," text = re.sub(r\"(h|H)\", \"에이치\", text)\n"," text = re.sub(r\"(i|I)\", \"아이\", text)\n"," text = re.sub(r\"(j|J)\", \"제이\", text)\n"," text = re.sub(r\"(k|K)\", \"케이\", text)\n"," text = re.sub(r\"(l|L)\", \"엘\", text)\n"," text = re.sub(r\"(m|M)\", \"엠\", text)\n"," text = re.sub(r\"(n|N)\", \"엔\", text)\n"," text = re.sub(r\"(o|O)\", \"오\", text)\n"," text = re.sub(r\"(p|P)\", \"피\", text)\n"," text = re.sub(r\"(q|Q)\", \"큐\", text)\n"," text = re.sub(r\"(r|R)\", \"알\", text)\n"," text = re.sub(r\"(s|S)\", \"에스\", text)\n"," text = re.sub(r\"(t|T)\", \"티\", text)\n"," text = re.sub(r\"(u|U)\", \"유\", text)\n"," text = re.sub(r\"(v|V)\", \"브이\", text)\n"," text = re.sub(r\"(w|W)\", \"더블유\", text)\n"," text = re.sub(r\"(x|X)\", \"엑스\", text)\n"," text = re.sub(r\"(y|Y)\", \"와이\", text)\n"," text = re.sub(r\"(z|Z)\", \"지\", text)\n","\n"," return text\n","\n","\n","def punctuation_text(text):\n"," # 문장부호\n"," text = re.sub(r\"!\", \"느낌표\", text)\n"," text = re.sub(r\"\\?\", \"물음표\", text)\n"," text = re.sub(r\"\\.\", \"마침표\", text)\n","\n"," return text\n","\n","\n","def jamo_text(text):\n"," # 기본 자모음\n"," text = re.sub(r\"ㄱ\", \"기역\", text)\n"," text = re.sub(r\"ㄴ\", \"니은\", text)\n"," text = re.sub(r\"ㄷ\", \"디귿\", text)\n"," text = re.sub(r\"ㄹ\", \"리을\", text)\n"," text = re.sub(r\"ㅁ\", \"미음\", text)\n"," text = re.sub(r\"ㅂ\", \"비읍\", text)\n"," text = re.sub(r\"ㅅ\", \"시옷\", text)\n"," text = re.sub(r\"ㅇ\", \"이응\", text)\n"," text = re.sub(r\"ㅈ\", \"지읒\", text)\n"," text = re.sub(r\"ㅊ\", \"치읓\", text)\n"," text = re.sub(r\"ㅋ\", \"키읔\", text)\n"," text = re.sub(r\"ㅌ\", \"티읕\", text)\n"," text = re.sub(r\"ㅍ\", \"피읖\", text)\n"," text = re.sub(r\"ㅎ\", \"히읗\", text)\n"," text = re.sub(r\"ㄲ\", \"쌍기역\", text)\n"," text = re.sub(r\"ㄸ\", \"쌍디귿\", text)\n"," text = re.sub(r\"ㅃ\", \"쌍비읍\", text)\n"," text = re.sub(r\"ㅆ\", \"쌍시옷\", text)\n"," text = re.sub(r\"ㅉ\", \"쌍지읒\", text)\n"," text = re.sub(r\"ㄳ\", \"기역시옷\", text)\n"," text = re.sub(r\"ㄵ\", \"니은지읒\", text)\n"," text = re.sub(r\"ㄶ\", \"니은히읗\", text)\n"," text = re.sub(r\"ㄺ\", \"리을기역\", text)\n"," text = re.sub(r\"ㄻ\", \"리을미음\", text)\n"," text = re.sub(r\"ㄼ\", \"리을비읍\", text)\n"," text = re.sub(r\"ㄽ\", \"리을시옷\", text)\n"," text = re.sub(r\"ㄾ\", \"리을티읕\", text)\n"," text = re.sub(r\"ㄿ\", \"리을피읍\", text)\n"," text = re.sub(r\"ㅀ\", \"리을히읗\", text)\n"," text = re.sub(r\"ㅄ\", \"비읍시옷\", text)\n"," text = re.sub(r\"ㅏ\", \"아\", text)\n"," text = re.sub(r\"ㅑ\", \"야\", text)\n"," text = re.sub(r\"ㅓ\", \"어\", text)\n"," text = re.sub(r\"ㅕ\", \"여\", text)\n"," text = re.sub(r\"ㅗ\", \"오\", text)\n"," text = re.sub(r\"ㅛ\", \"요\", text)\n"," text = re.sub(r\"ㅜ\", \"우\", text)\n"," text = re.sub(r\"ㅠ\", \"유\", text)\n"," text = re.sub(r\"ㅡ\", \"으\", text)\n"," text = re.sub(r\"ㅣ\", \"이\", text)\n"," text = re.sub(r\"ㅐ\", \"애\", text)\n"," text = re.sub(r\"ㅒ\", \"얘\", text)\n"," text = re.sub(r\"ㅔ\", \"에\", text)\n"," text = re.sub(r\"ㅖ\", \"예\", text)\n"," text = re.sub(r\"ㅘ\", \"와\", text)\n"," text = re.sub(r\"ㅙ\", \"왜\", text)\n"," text = re.sub(r\"ㅚ\", \"외\", text)\n"," text = re.sub(r\"ㅝ\", \"워\", text)\n"," text = re.sub(r\"ㅞ\", \"웨\", text)\n"," text = re.sub(r\"ㅟ\", \"위\", text)\n"," text = re.sub(r\"ㅢ\", \"의\", text)\n","\n"," return text\n","\n","\n","def normalize_multiline_text(long_text):\n"," texts = split_text(long_text)\n"," normalized_texts = [normalize_text(text).strip() for text in texts]\n"," return [text for text in normalized_texts if len(text) \u003e 0]\n","\n","def synthesize(text):\n"," wavs = synthesizer.tts(text, None, None)\n"," return wavs"]},{"cell_type":"markdown","metadata":{"id":"SbPRQfl8z28u"},"source":["## 3. 학습한 모델 불러오기\n","\n","학습한 Glow-TTS와 HiFi-GAN 모델을 불러옵니다.\n","\n","만약 다른 체크포인트에서 불러오시려면 아래 코드에서 경로를 아래와 같이 적절하게 수정합니다.\n","\n","```python\n","synthesizer = Synthesizer(\n"," \"/content/drive/My Drive/Colab Notebooks/data/glowtts-v2/glowtts-v2-May-31-2021_08+17AM-d897f2e/best_model.pth.tar\",\n"," \"/content/drive/My Drive/Colab Notebooks/data/glowtts-v2/glowtts-v2-May-31-2021_08+17AM-d897f2e/config.json\",\n"," None,\n"," \"/content/drive/My Drive/Colab Notebooks/data/hifigan-v2/hifigan-v2-May-31-2021_08+26AM-d897f2e/checkpoint_300000.pth.tar\",\n"," \"/content/drive/My Drive/Colab Notebooks/data/hifigan-v2/hifigan-v2-May-31-2021_08+26AM-d897f2e/config.json\",\n"," None,\n"," None,\n"," False,\n",")\n","```"]},{"cell_type":"code","execution_count":null,"metadata":{"executionInfo":{"elapsed":10,"status":"aborted","timestamp":1707413252304,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"zwROk8zUHgUn"},"outputs":[],"source":["synthesizer = Synthesizer(\n"," \"/content/drive/My Drive/Colab Notebooks/data/glowtts-v2/glowtts-v2-April-17-2022_04+46AM-3aa165a/best_model.pth.tar\",\n"," \"/content/drive/My Drive/Colab Notebooks/data/glowtts-v2/glowtts-v2-April-17-2022_04+46AM-3aa165a/config.json\",\n"," None,\n"," \"/content/drive/My Drive/Colab Notebooks/data/hifigan-v2/hifigan-v2-April-16-2022_11+57AM-3aa165a/best_model.pth.tar\",\n"," \"/content/drive/My Drive/Colab Notebooks/data/hifigan-v2/hifigan-v2-April-16-2022_11+57AM-3aa165a/config.json\",\n"," None,\n"," None,\n"," False,\n",")\n","symbols = synthesizer.tts_config.characters.characters"]},{"cell_type":"markdown","metadata":{"id":"tmjT_BrV0XYD"},"source":["## 4. 음성 합성\n","\n","실제 음성 합성을 수행합니다."]},{"cell_type":"code","execution_count":null,"metadata":{"executionInfo":{"elapsed":11,"status":"aborted","timestamp":1707413252305,"user":{"displayName":"Cream Ice","userId":"02668969734157440879"},"user_tz":-540},"id":"XSnF1D48F1tx"},"outputs":[],"source":["texts = \"\"\"\n","\n","\"\"\"\n","for text in normalize_multiline_text(texts):\n"," wav = synthesizer.tts(text, None, None)\n"," IPython.display.display(IPython.display.Audio(wav, rate=22050))"]}],"metadata":{"colab":{"collapsed_sections":["8erClGSnzwge"],"name":"","provenance":[{"file_id":"1YkxjzBz3V4eXoAaEgcFNEUg8ZyWV40x9","timestamp":1650109450650},{"file_id":"13pqat2mWsMha7Vn_-Q5_Ih8MDkvz3q5a","timestamp":1622375316346},{"file_id":"1IlZt42ETvNHthRFXfwNSSH-ftWthxzqr","timestamp":1596336131977},{"file_id":"1UinTd1Kp1ytwPQ4QWA610ZKOVfmPDdn5","timestamp":1596300568469}],"version":""},"kernelspec":{"display_name":"Python 3","name":"python3"}},"nbformat":4,"nbformat_minor":0}