Spaces:

Lycoris53
/

VITS-TTS-Japanese-Only-Amitaro

Running

File size: 5,337 Bytes

029074a

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8775d691",
   "metadata": {},
   "outputs": [],
   "source": [
    "import librosa\n",
    "import os\n",
    "import soundfile\n",
    "from tqdm import tqdm, tqdm_notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bcd1f6dc",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      " 26%|█████████████████████████████████████████████▊                                                                                                                                  | 2606/10000 [01:01<02:54, 42.35it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Total audio file written : 2606\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "base_dir = \"../data/amitaro\"\n",
    "output_dir = \"../data/amitaro_22050hz\"\n",
    "all_dir = [f for f in os.listdir(base_dir) if not os.path.isfile(os.path.join(base_dir, f))]\n",
    "\n",
    "file_list = []\n",
    "\n",
    "skip_dir = [\"301_dousa\",\n",
    "            \"801_eng_suuji\",\n",
    "            \"801_eng_jikan\",\n",
    "            \"803_eng_others\",\n",
    "            \"912_alphabet\",\n",
    "            \"912_alphabet2\",\n",
    "            \"913_web\",\n",
    "            \"sample\"]\n",
    "\n",
    "total_file_write = 0\n",
    "\n",
    "def recursive_til_audio_file_found(path):\n",
    "    listed_dir = [f for f in os.listdir(path)]\n",
    "    if len(listed_dir) == 0:\n",
    "        return\n",
    "    test_path_first = os.path.join(path, listed_dir[0])\n",
    "    \n",
    "    # continue through the directory if not a file\n",
    "    if not os.path.isfile(test_path_first):\n",
    "        for next_dir in listed_dir:\n",
    "            next_path = os.path.join(path, next_dir)\n",
    "            # skip any directory specify in skip_dir\n",
    "            for skip in skip_dir:\n",
    "                if next_path.find(skip) != -1:\n",
    "                    break\n",
    "            else:\n",
    "                recursive_til_audio_file_found(next_path)\n",
    "        return\n",
    "\n",
    "    #for new_dir in tqdm_notebook(listed_dir, desc=f\"Processing : {path}\"):\n",
    "    for new_dir in listed_dir:\n",
    "        new_path = os.path.join(path, new_dir)\n",
    "        \n",
    "        #if it is file, convert the audio to 16k and write to output directory\n",
    "#        output_path_base = path.replace(base_dir, output_dir)\n",
    "#        if not os.path.exists(output_path_base):\n",
    "#            os.makedirs(output_path_base, exist_ok=True)\n",
    "\n",
    "        # not an audio file\n",
    "        if new_path.find(\".wav\") == -1 and new_path.find(\".mp3\") == -1:\n",
    "            continue\n",
    "\n",
    "        global total_file_write\n",
    "#        audio, rate = librosa.load(new_path, sr=16000)\n",
    "        audio, rate = librosa.load(new_path, sr=22050)\n",
    "#        output_path = os.path.join(output_path_base, new_dir)\n",
    "        output_path = os.path.join(output_dir, new_dir)\n",
    "#        output_path = os.path.join(output_dir, \"sakuramiko_\" + str(total_file_write) + \".wav\")\n",
    "#        output_path = os.path.join(output_dir, new_dir[0:-4] + \".wav\")\n",
    "        soundfile.write(output_path, audio, rate, format='wav', subtype=\"PCM_16\")\n",
    "        file_list.append(new_dir)\n",
    "        \n",
    "        total_file_write += 1\n",
    "        pbar.update(1)\n",
    "        #print(f\"\\rWrite file{output_path}\", end=\"\")\n",
    "        \n",
    "with tqdm(total=10000) as pbar:\n",
    "    recursive_til_audio_file_found(base_dir)\n",
    "print(f\"Total audio file written : {total_file_write}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "7efe2fec",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "base_dir = \"../data/amitaro_22050hz\"\n",
    "output_dir = \"./custom_character_voice/amitaro\"\n",
    "listed_dir = [f for f in os.listdir(base_dir)]\n",
    "for i, val in enumerate(listed_dir):\n",
    "    os.rename(os.path.join(base_dir, val), os.path.join(output_dir, f\"amitaro_{i}.wav\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "34c1fd46",
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "out_json = {}\n",
    "for val in file_list:\n",
    "    out_json[val] = {\"path\":val, \"kana\":\"\"}\n",
    "    \n",
    "with open(\"./amitaro.json\", \"w\") as outfile:\n",
    "    outfile.write(json.dumps(out_json))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}