{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'イエーイ'" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formal_to_informal = {\n", " \n", " \n", "\n", " 'ワタクシ': 'わたし',\n", " 'チカコ':'しゅうこ',\n", " \"タノヒト\":\"ほかのひと\",\n", "\n", " # Add more mappings as needed\n", "}\n", "\n", "formal_to_informal2 = {\n", "\n", " \"たのひと\":\"ほかのひと\",\n", " \"すうは\": \"かずは\",\n", "\n", "\n", " # Add more mappings as needed\n", "}\n", "\n", "formal_to_informal3 = {\n", "\n", " \"%\":\"%\",\n", " \"@\": \"あっとさいん\",\n", " \"$\":\"どる\",\n", " \"#\":\"はっしゅたぐ\",\n", " \"$\":\"どる\",\n", " \"#\":\"はっしゅたぐ\",\n", " \"何が\":\"なにが\",\n", "\n", " \"何も\":\"なにも\",\n", " \"何か\":\"なにか\",\n", " # \"奏\":\"かなで\",\n", " \"何は\":\"なにが\",\n", " \"お父様\":\"おとうさま\",\n", " \"お兄様\":\"おにいさま\",\n", " \"何を\":\"なにを\",\n", " \"良い\":\"いい\",\n", " \"李衣菜\":\"りいな\",\n", " \"志希\":\"しき\",\n", " \"種\":\"たね\",\n", " \"方々\":\"かたがた\",\n", " \"颯\":\"はやて\",\n", " \"茄子さん\":\"かこさん\",\n", " \"茄子ちゃん\":\"かこちゃん\",\n", " \"涼ちゃん\":\"りょうちゃん\",\n", " \"涼さん\":\"りょうさん\",\n", " \"紗枝\":\"さえ\",\n", " \"文香\":\"ふみか\",\n", " \"私\":\"わたし\",\n", " \"周子\":\"しゅうこ\",\n", " \"イェ\":\"いえ\",\n", " \"可憐\":\"かれん\",\n", " \"加蓮\":\"かれん\",\n", " \"・\":\".\",\n", " \"方の\":\"かたの\",\n", " \"気に\":\"きに\",\n", " \"唯さん\":\"ゆいさん\",\n", " \"唯ちゃん\":\"ゆいちゃん\",\n", " \"聖ちゃん\":\"ひじりちゃん\",\n", " \"他の\":\"ほかの\",\n", " \"他に\":\"ほかに\",\n", " \"一生懸命\":\"いっしょうけんめい\",\n", " \"楓さん\":\"かえでさん\",\n", " \"楓ちゃん\":\"かえでちゃん\",\n", " \"内から\":\"ないから\",\n", " \"の下で\":\"のしたで\",\n", "\n", "}\n", "\n", "\n", "mapper = dict([\n", "\n", " (\"仕方\",\"しかた\"),\n", " (\"明日\",\"あした\"),\n", " ('私',\"わたし\"),\n", " (\"従妹\",\"いとこ\"),\n", " \n", " (\"1人\",\"ひとり\"),\n", " (\"2人\",\"ふたり\"),\n", " \n", " (\"一期\",\"いちご\"),\n", " (\"一会\",\"いちえ\"),\n", " \n", " (\"♪\",\"!\"),\n", " (\"?\",\"?\"),\n", "\n", " (\"どんな方\",\"どんなかた\"),\n", " (\"ふたり暮らし\",\"ふたりぐらし\"),\n", "\n", " (\"新年\",\"しんねん\"),\n", " (\"来年\",\"らいねん\"),\n", " (\"去年\",\"きょねん\"),\n", " (\"壮年\",\"そうねん\"),\n", " (\"今年\",\"ことし\"),\n", "\n", " (\"昨年\",\"さくねん\"),\n", " (\"本年\",\"ほんねん\"),\n", " (\"平年\",\"へいねん\"),\n", " (\"閏年\",\"うるうどし\"),\n", " (\"初年\",\"しょねん\"),\n", " (\"少年\",\"しょうねん\"),\n", " (\"多年\",\"たねん\"),\n", " (\"青年\",\"せいねん\"),\n", " (\"中年\",\"ちゅうねん\"),\n", " (\"老年\",\"ろうねん\"),\n", " (\"成年\",\"せいねん\"),\n", " (\"幼年\",\"ようねん\"),\n", " (\"前年\",\"ぜんねん\"),\n", " (\"元年\",\"がんねん\"),\n", " (\"経年\",\"けいねん\"),\n", " (\"当年\",\"とうねん\"),\n", "\n", " (\"明年\",\"みょうねん\"),\n", " (\"歳年\",\"さいねん\"),\n", " (\"数年\",\"すうねん\"),\n", " (\"半年\",\"はんとし\"),\n", " (\"後年\",\"こうねん\"),\n", " (\"実年\",\"じつねん\"),\n", " (\"年年\",\"ねんねん\"),\n", " (\"連年\",\"れんねん\"),\n", " (\"暦年\",\"れきねん\"),\n", " (\"各年\",\"かくねん\"),\n", " (\"全年\",\"ぜんねん\"),\n", "\n", " (\"年を\",\"としを\"),\n", " (\"年が\",\"としが\"),\n", " (\"年も\",\"としも\"),\n", " (\"年は\",\"としは\"),\n", "\n", "\n", " (\"奏ちゃん\",\"かなでちゃん\"),\n", " (\"負けず嫌い\",\"まけずぎらい\"),\n", " (\"貴方\",\"あなた\"),\n", " (\"貴女\",\"あなた\"),\n", " (\"貴男\",\"あなた\"),\n", "\n", " (\"その節\",\"そのせつ\"),\n", "\n", " (\"何し\",\"なにし\"),\n", " (\"何する\",\"なにする\"),\n", "\n", " (\"心さん\",\"しんさん\"),\n", " (\"心ちゃん\",\"しんちゃん\"),\n", "\n", " (\"乃々\",\"のの\"),\n", "\n", " (\"身体の\",\"からだの\"),\n", " (\"身体が\",\"からだが\"),\n", " (\"身体を\",\"からだを\"),\n", " (\"身体は\",\"からだは\"),\n", " (\"身体に\",\"からだに\"),\n", " (\"正念場\",\"しょうねんば\"),\n", " (\"言う\",\"いう\"),\n", " \n", " \n", " (\"一回\",\"いっかい\"),\n", " (\"一曲\",\"いっきょく\"),\n", " (\"一日\",\"いちにち\"),\n", " (\"一言\",\"ひとこと\"),\n", " (\"一杯\",\"いっぱい\"),\n", " \n", " \n", " (\"方が\",\"ほうが\"),\n", " (\"縦輪城\",\"じゅうりんしろ\"),\n", " (\"深息\",\"しんそく\"),\n", " (\"家人\",\"かじん\"),\n", " (\"お返し\",\"おかえし\"),\n", " (\"化物語\",\"ばけものがたり\"),\n", " (\"阿良々木暦\",\"あららぎこよみ\"),\n", " (\"何より\",\"なにより\")\n", "\n", "\n", "])\n", "\n", "\n", "# Merge all dictionaries into one\n", "all_transformations = {**formal_to_informal, **formal_to_informal2, **formal_to_informal3, **mapper}\n", "\n", "def apply_transformations(text, transformations = all_transformations):\n", " for key, value in transformations.items():\n", " text = text.replace(key, value)\n", " return text\n", "apply_transformations('イエーイ')\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'さん人'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "\n", "def number_to_japanese(num):\n", " if not isinstance(num, int) or num < 0 or num > 9999:\n", " return \"Invalid input\"\n", "\n", " digits = [\"\", \"いち\", \"に\", \"さん\", \"よん\", \"ご\", \"ろく\", \"なな\", \"はち\", \"きゅう\"]\n", " tens = [\"\", \"じゅう\", \"にじゅう\", \"さんじゅう\", \"よんじゅう\", \"ごじゅう\", \"ろくじゅう\", \"ななじゅう\", \"はちじゅう\", \"きゅうじゅう\"]\n", " hundreds = [\"\", \"ひゃく\", \"にひゃく\", \"さんびゃく\", \"よんひゃく\", \"ごひゃく\", \"ろっぴゃく\", \"ななひゃく\", \"はっぴゃく\", \"きゅうひゃく\"]\n", " thousands = [\"\", \"せん\", \"にせん\", \"さんぜん\", \"よんせん\", \"ごせん\", \"ろくせん\", \"ななせん\", \"はっせん\", \"きゅうせん\"]\n", "\n", " if num == 0:\n", " return \"ゼロ\"\n", "\n", " result = \"\"\n", " if num >= 1000:\n", " result += thousands[num // 1000]\n", " num %= 1000\n", " if num >= 100:\n", " result += hundreds[num // 100]\n", " num %= 100\n", " if num >= 10:\n", " result += tens[num // 10]\n", " num %= 10\n", " if num > 0:\n", " result += digits[num]\n", "\n", " return result\n", "\n", "def convert_numbers_in_string(input_string):\n", " # Regular expression to find numbers in the string\n", " number_pattern = re.compile(r'\\d+')\n", "\n", " # Function to replace numbers with their Japanese pronunciation\n", " def replace_with_japanese(match):\n", " num = int(match.group())\n", " return number_to_japanese(num)\n", "\n", " # Replace all occurrences of numbers in the string\n", " converted_string = number_pattern.sub(replace_with_japanese, input_string)\n", " return converted_string\n", "\n", "convert_numbers_in_string(\"3人\")\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "roma_mapper = dict([\n", " \n", " ################################\n", "\n", " (\"my\",\"mʲ\"),\n", " (\"by\",\"bʲ\"),\n", " (\"ny\",\"nʲ\"),\n", " (\"ry\",\"rʲ\"),\n", " (\"si\",\"sʲ\"),\n", " (\"ky\",\"kʲ\"),\n", " (\"gy\",\"gʲ\"),\n", " (\"dy\",\"dʲ\"),\n", " (\"di\",\"dʲ\"),\n", " (\"fi\",\"fʲ\"),\n", " (\"fy\",\"fʲ\"),\n", " (\"ch\",\"tɕ\"),\n", " (\"sh\",\"ɕ\"),\n", " \n", " ################################\n", "\n", " (\"a\",\"a\"),\n", " (\"i\",\"i\"),\n", " (\"u\",\"ɯ\"),\n", " (\"e\",\"e\"),\n", " (\"o\",\"o\"),\n", " (\"ka\",\"ka\"),\n", " (\"ki\",\"ki\"),\n", " (\"ku\",\"kɯ\"),\n", " (\"ke\",\"ke\"),\n", " (\"ko\",\"ko\"),\n", " (\"sa\",\"sa\"),\n", " (\"shi\",\"ɕi\"),\n", " (\"su\",\"sɯ\"),\n", " (\"se\",\"se\"),\n", " (\"so\",\"so\"),\n", " (\"ta\",\"ta\"),\n", " (\"chi\",\"tɕi\"),\n", " (\"tsu\",\"tsɯ\"),\n", " (\"te\",\"te\"),\n", " (\"to\",\"to\"),\n", " (\"na\",\"na\"),\n", " (\"ni\",\"ni\"),\n", " (\"nu\",\"nɯ\"),\n", " (\"ne\",\"ne\"),\n", " (\"no\",\"no\"),\n", " (\"ha\",\"ha\"),\n", " (\"hi\",\"çi\"),\n", " (\"fu\",\"ɸɯ\"),\n", " (\"he\",\"he\"),\n", " (\"ho\",\"ho\"),\n", " (\"ma\",\"ma\"),\n", " (\"mi\",\"mi\"),\n", " (\"mu\",\"mɯ\"),\n", " (\"me\",\"me\"),\n", " (\"mo\",\"mo\"),\n", " (\"ra\",\"ɽa\"),\n", " (\"ri\",\"ɽi\"),\n", " (\"ru\",\"ɽɯ\"),\n", " (\"re\",\"ɽe\"),\n", " (\"ro\",\"ɽo\"),\n", " (\"ga\",\"ga\"),\n", " (\"gi\",\"gi\"),\n", " (\"gu\",\"gɯ\"),\n", " (\"ge\",\"ge\"),\n", " (\"go\",\"go\"),\n", " (\"za\",\"za\"),\n", " (\"ji\",\"dʑi\"),\n", " (\"zu\",\"zɯ\"),\n", " (\"ze\",\"ze\"),\n", " (\"zo\",\"zo\"),\n", " (\"da\",\"da\"),\n", " \n", "\n", " (\"zu\",\"zɯ\"),\n", " (\"de\",\"de\"),\n", " (\"do\",\"do\"),\n", " (\"ba\",\"ba\"),\n", " (\"bi\",\"bi\"),\n", " (\"bu\",\"bɯ\"),\n", " (\"be\",\"be\"),\n", " (\"bo\",\"bo\"),\n", " (\"pa\",\"pa\"),\n", " (\"pi\",\"pi\"),\n", " (\"pu\",\"pɯ\"),\n", " (\"pe\",\"pe\"),\n", " (\"po\",\"po\"),\n", " (\"ya\",\"ja\"),\n", " (\"yu\",\"jɯ\"),\n", " (\"yo\",\"jo\"),\n", " (\"wa\",\"wa\"),\n", " \n", "\n", " \n", "\n", " (\"a\",\"a\"),\n", " (\"i\",\"i\"),\n", " (\"u\",\"ɯ\"),\n", " (\"e\",\"e\"),\n", " (\"o\",\"o\"),\n", " (\"wa\",\"wa\"),\n", " (\"o\",\"o\"),\n", "\n", "\n", " (\"wo\",\"o\")])\n", "\n", "nasal_sound = dict([\n", " # before m, p, b\n", " (\"ɴm\",\"mm\"),\n", " (\"ɴb\", \"mb\"),\n", " (\"ɴp\", \"mp\"),\n", " \n", " # before k, g\n", " (\"ɴk\",\"ŋk\"),\n", " (\"ɴg\", \"ŋg\"),\n", " \n", " # before t, d, n, s, z, ɽ\n", " (\"ɴt\",\"nt\"),\n", " (\"ɴd\", \"nd\"),\n", " (\"ɴn\",\"nn\"),\n", " (\"ɴs\", \"ns\"),\n", " (\"ɴz\",\"nz\"),\n", " (\"ɴɽ\", \"nɽ\"),\n", " \n", " (\"ɴɲ\", \"ɲɲ\"),\n", " \n", "])\n", "\n", "def Roma2IPA(text):\n", " orig = text\n", "\n", " for k, v in roma_mapper.items():\n", " text = text.replace(k, v)\n", " \n", " return text\n", "\n", "def nasal_mapper(text):\n", " orig = text\n", "\n", "\n", " for k, v in nasal_sound.items():\n", " text = text.replace(k, v)\n", " \n", " return text\n", "\n", "def alphabetreading(text):\n", " alphabet_dict = {\"A\": \"エイ\",\n", " \"B\": \"ビー\",\n", " \"C\": \"シー\",\n", " \"D\": \"ディー\",\n", " \"E\": \"イー\",\n", " \"F\": \"エフ\",\n", " \"G\": \"ジー\",\n", " \"H\": \"エイチ\",\n", " \"I\":\"アイ\",\n", " \"J\":\"ジェイ\",\n", " \"K\":\"ケイ\",\n", " \"L\":\"エル\",\n", " \"M\":\"エム\",\n", " \"N\":\"エヌ\",\n", " \"O\":\"オー\",\n", " \"P\":\"ピー\",\n", " \"Q\":\"キュー\",\n", " \"R\":\"アール\",\n", " \"S\":\"エス\",\n", " \"T\":\"ティー\",\n", " \"U\":\"ユー\",\n", " \"V\":\"ヴィー\",\n", " \"W\":\"ダブリュー\",\n", " \"X\":\"エックス\",\n", " \"Y\":\"ワイ\",\n", " \"Z\":\"ゼッド\"}\n", " text = text.upper()\n", " text_ret = \"\"\n", " for t in text:\n", " if t in alphabet_dict:\n", " text_ret += alphabet_dict[t]\n", " else:\n", " text_ret += t\n", " return text_ret" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'sekawɴdo'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import re\n", "import cutlet\n", "\n", "roma_mapper_plus_2 = {\n", " \n", "\"bjo\":'bʲo',\n", "\"rjo\":\"rʲo\",\n", "\"kjo\":\"kʲo\",\n", "\"kyu\":\"kʲu\",\n", " \n", "}\n", "\n", "def replace_repeated_chars(input_string):\n", " result = []\n", " i = 0\n", " while i < len(input_string):\n", " if i + 1 < len(input_string) and input_string[i] == input_string[i + 1] and input_string[i] in 'aiueo':\n", " result.append(input_string[i] + 'ː')\n", " i += 2 \n", " else:\n", " result.append(input_string[i])\n", " i += 1\n", " return ''.join(result)\n", "\n", "\n", "def replace_chars_2(text, mapping=roma_mapper_plus_2):\n", " \n", "\n", " sorted_keys = sorted(mapping.keys(), key=len, reverse=True)\n", "\n", " pattern = '|'.join(re.escape(key) for key in sorted_keys)\n", " \n", "\n", " def replace(match):\n", " key = match.group(0)\n", " return mapping.get(key, key)\n", "\n", " return re.sub(pattern, replace, text)\n", "\n", "\n", "def replace_tashdid_2(s):\n", " vowels = 'aiueoɯ0123456789.?!_。؟?!...@@##$$%%^^&&**()()_+=[「」]>= 3:\n", "# return pattern + \"~~~\"\n", "# return match.group(0)\n", "\n", "# # Pattern for space-separated repeats\n", "# pattern1 = r'((?:\\S+\\s+){1,5}?)(?:\\1){2,}'\n", "# # Pattern for continuous repeats without spaces\n", "# pattern2 = r'(.+?)\\1{2,}'\n", "\n", "# text = re.sub(pattern1, replace_repeats, text)\n", "# text = re.sub(pattern2, replace_repeats, text)\n", "# return text\n", "\n", "\n", "def replace_repeating_a(output):\n", " # Define patterns and their replacements\n", " patterns = [\n", " (r'(aː)\\s*\\1+\\s*', r'\\1~'), # Replace repeating \"aː\" with \"aː~~\"\n", " (r'(aːa)\\s*aː', r'\\1~'), # Replace \"aːa aː\" with \"aː~~\"\n", " (r'aːa', r'aː~'), # Replace \"aːa\" with \"aː~\"\n", " (r'naː\\s*aː', r'naː~'), # Replace \"naː aː\" with \"naː~\"\n", " (r'(oː)\\s*\\1+\\s*', r'\\1~'), # Replace repeating \"oː\" with \"oː~~\"\n", " (r'(oːo)\\s*oː', r'\\1~'), # Replace \"oːo oː\" with \"oː~~\"\n", " (r'oːo', r'oː~'), # Replace \"oːo\" with \"oː~\"\n", " (r'(eː)\\s*\\1+\\s*', r'\\1~'), \n", " (r'(e)\\s*\\1+\\s*', r'\\1~'), \n", " (r'(eːe)\\s*eː', r'\\1~'), \n", " (r'eːe', r'eː~'), \n", " (r'neː\\s*eː', r'neː~'), \n", " ]\n", "\n", " \n", " # Apply each pattern to the output\n", " for pattern, replacement in patterns:\n", " output = re.sub(pattern, replacement, output)\n", " \n", " return output\n", "\n", "def phonemize(text):\n", " \n", " # if \"っ\" in text:\n", " # text = text.replace(\"っ\",\"ʔ\")\n", " \n", " output = post_fix(process_japanese_text(text))\n", " #output = text\n", " \n", " if \" ɴ\" in output:\n", " output = output.replace(\" ɴ\", \"ɴ\")\n", " if \"y\" in output:\n", " output = output.replace(\"y\", \"j\")\n", " if \"ɯa\" in output:\n", " output = output.replace(\"ɯa\", \"wa\")\n", " \n", " if \"a aː\" in output:\n", " output = output.replace(\"a aː\",\"a~\")\n", " if \"a a\" in output:\n", " output = output.replace(\"a a\",\"a~\")\n", "\n", "\n", "\n", " \n", " \n", " output = replace_repeating_a((output))\n", " output = re.sub(r'\\s+~', '~', output)\n", " \n", " if \"oː~o oː~ o\" in output:\n", " output = output.replace(\"oː~o oː~ o\",\"oː~~~~~~\")\n", " if \"aː~aː\" in output:\n", " output = output.replace(\"aː~aː\",\"aː~~~\")\n", " if \"oɴ naː\" in output:\n", " output = output.replace(\"oɴ naː\",\"onnaː\")\n", " if \"aː~~ aː\" in output:\n", " output = output.replace(\"aː~~ aː\",\"aː~~~~\")\n", " if \"oː~o\" in output:\n", " output = output.replace(\"oː~o\",\"oː~~\")\n", " if \"oː~~o o\" in output:\n", " output = output.replace(\"oː~~o o\",\"oː~~~~\") # yeah I'm too tired to learn regex how did you know\n", "\n", " output = random_space_fix(output)\n", " output = random_sym_fix(output) # fixing some symbols, if they have a specific white space such as miku& sakura -> miku ando sakura\n", " output = random_sym_fix_no_space(output) # same as above but for those without white space such as miku&sakura -> miku ando sakura\n", " # if \"ɯ\" in output:\n", " # output = output.replace(\"ɯ\",\"U\")ss\n", " # if \"ʔ\" in output:\n", " # output = output.replace(\"ʔ\",\"!\")\n", " \n", " return output.lstrip()\n", "# def process_row(row):\n", "# return {'phonemes': [phonemize(word) for word in row['phonemes']]}\n", "\n", "\n", "# Example usage\n", "text = \"\"\"日本には、美しい自然と豊かな文化があります。特に、四季の変化は日本の魅力の一つです。春には桜の花が咲き、多くの人々が花見に出かけます。桜の下で家族や友人と一緒にお弁当を食べたり、お酒を飲んだりして楽しむのが一般的です。\n", "\n", "夏には、各地で花火大会が行われます。夜空に打ち上げられる花火はとても美しく、多くの人々が浴衣を着て参加します。また、夏は海やプールで泳ぐのも楽しいです。海辺では、サンドイッチやアイスクリームを食べながら、波と戯れることもできます。\n", "\n", "秋には、紅葉が美しく、多くの人々が紅葉狩りに出かけます。特に京都の嵐山や奈良の奈良公園は、紅葉が有名です。紅葉の下を散歩しながら、自然の美しさを感じることができます。また、秋はおいしい果物や野菜がたくさん取れる季節でもあります。りんごやさつまいも、そして栗など、秋の味覚を楽しむことができます。\n", "\n", "冬には、雪が降り、特に北海道や東北地方では雪祭りが開催されます。雪像や氷の彫刻が展示され、夜にはライトアップされてとても幻想的です。また、温泉に入るのも冬の楽しみの一つです。暖かいお湯に浸かりながら、外の雪景色を眺めるのは最高の贅沢です。\n", "\n", "日本には、このように四季ごとに違った魅力があります。どの季節に訪れても、新しい発見や感動があることでしょう。日本の自然と文化を体験して、心豊かな旅を楽しんでください。\"\"\"\n", "result = phonemize(text)\n", "print(result)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ref_texts = {}\n", "ref_texts['Happy'] = \"neː neː, iʔɕo ni kako e no tabi niːkanai? dʑiɴrɯi ga tsɯkɯʔta tɕoɯsɯgoi tatemono to ka mi niːkerɯɴ da jo!\"\n", "ref_texts['Sad'] = \"zanneɴ naɴ dakedo, keiki to ɕinɽai o toɽimodosoɯ to ɕiteta no ni, kanaɽi koɯtai ɕitɕaʔtaɴ da.\"\n", "ref_texts['Angry'] = \"temmoŋgakɯ nante bakagete irɯ! sono ɽiɽoɴ wa keʔkandaɽake no kansatsɯ to katajoʔta kaiɕakɯ ni motozɯite irɯɴ da!\"\n", "ref_texts['Surprised'] = \"ɕindʑiɽaɽenai! kono ike de ataɽaɕi tane no bakɯteɽia o haʔkeɴ ɕitaʔte hontoɯ na no?\"" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "\n", "df3.to_csv(\"/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/imas_split/shiki/shiki_fine/shiki_finetune.csv\", index=False, sep=\"|\", header=None)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8efe9d5252f64d799ced920633af72e0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/2233 [00:00 CN_031.wav\n", "Renamed: CN_010.mp3(1).wav -> CN_010.wav\n", "Renamed: CN_021.mp3(1).wav -> CN_021.wav\n", "Renamed: CN_030.mp3(1).wav -> CN_030.wav\n", "Renamed: CN_011.mp3(1).wav -> CN_011.wav\n", "Renamed: CN_007.mp3(1).wav -> CN_007.wav\n", "Renamed: CN_006.mp3(1).wav -> CN_006.wav\n", "Renamed: CN_028.mp3(1).wav -> CN_028.wav\n", "Renamed: CN_022.mp3(1).wav -> CN_022.wav\n", "Renamed: CN_027.mp3(1).wav -> CN_027.wav\n", "Renamed: CN_029.mp3(1).wav -> CN_029.wav\n", "Renamed: CN_036.mp3(1).wav -> CN_036.wav\n", "Renamed: CN_033.mp3(1).wav -> CN_033.wav\n", "Renamed: CN_025.mp3(1).wav -> CN_025.wav\n", "Renamed: CN_026.mp3(1).wav -> CN_026.wav\n", "Renamed: CN_023.mp3(1).wav -> CN_023.wav\n", "Renamed: CN_042.mp3(1).wav -> CN_042.wav\n", "Renamed: CN_005.mp3(1).wav -> CN_005.wav\n", "Renamed: CN_013.mp3(1).wav -> CN_013.wav\n", "Renamed: CN_019.mp3(1).wav -> CN_019.wav\n", "Renamed: CN_002.mp3(1).wav -> CN_002.wav\n", "Renamed: CN_037.mp3(1).wav -> CN_037.wav\n", "Renamed: CN_020.mp3(1).wav -> CN_020.wav\n", "Renamed: CN_018.mp3(1).wav -> CN_018.wav\n", "Renamed: CN_004.mp3(1).wav -> CN_004.wav\n", "Renamed: CN_008.mp3(1).wav -> CN_008.wav\n", "Renamed: CN_024.mp3(1).wav -> CN_024.wav\n", "Renamed: CN_014.mp3(1).wav -> CN_014.wav\n", "Renamed: CN_032.mp3(1).wav -> CN_032.wav\n", "Renamed: CN_012.mp3(1).wav -> CN_012.wav\n", "Renamed: CN_017.mp3(1).wav -> CN_017.wav\n", "Renamed: CN_003.mp3(1).wav -> CN_003.wav\n" ] } ], "source": [ "import os\n", "\n", "# Define the directory path\n", "directory = \"/home/austin/disk1/stts-zs_cleaning/data/moe_soshy/Japanese/saori\"\n", "\n", "# Iterate over all files in the directory\n", "for filename in os.listdir(directory):\n", " # Check if the file name contains \".mp3(1)\"\n", " if \".mp3(1)\" in filename:\n", " # Construct the new file name by removing \".mp3(1)\"\n", " new_filename = filename.replace(\".mp3(1)\", \"\")\n", " \n", " # Construct the full file paths\n", " old_file_path = os.path.join(directory, filename)\n", " new_file_path = os.path.join(directory, new_filename)\n", " \n", " # Rename the file\n", " os.rename(old_file_path, new_file_path)\n", " print(f\"Renamed: {filename} -> {new_filename}\")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "93.4 μs ± 9.95 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n" ] } ], "source": [ "%%timeit\n", "class stuff:\n", " def __init__(self):\n", " self.phon = phonemize\n", " \n", " def __call__(self,text):\n", " ps = self.phon(text)\n", " return ps\n", " \n", "lol = stuff()\n", "\n", "lol(\"少年\")\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MOLADEALD ALALD ALALD WALLD.\n" ] } ], "source": [ "\n", "# pip install vllm\n", "\n", "from openai import OpenAI\n", "\n", "\n", "openai_api_key = \"EMPTY\"\n", "openai_api_base = \"http://localhost:8000/v1\"\n", "\n", "client = OpenAI(\n", " api_key=openai_api_key,\n", " base_url=openai_api_base,\n", ")\n", "\n", "model_name = \"Respair/Test_QwJP\"\n", "\n", "\n", "def p2g(param):\n", "\n", " chat_response = client.chat.completions.create(\n", "\n", " model=model_name,\n", " max_tokens=512,\n", "\n", "\n", " messages=[\n", " \n", " {\"role\": \"user\", \"content\": f\"{param}\"}]\n", " ) \n", " \n", " result = chat_response.choices[0].message.content\n", " if \"  \" in result:\n", " result = result.replace(\"  \",\" \")\n", "\n", " return result.lstrip()\n", "\n", "\n", "prompt= f\"\"\"Turn IPA to Japanese: mendoɯ dakaɽa.\"\"\"\n", "\n", "result= p2g(prompt)\n", "\n", "print(result)\n" ] }, { "cell_type": "code", "execution_count": 81, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "646f07f3a8f14b3da091d085b5989df0", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/13 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
textIPA2
0@@@@
1......
2!!
3??
4####
\n", "" ], "text/plain": [ " text IPA2\n", "0 @@ @@\n", "1 ... ...\n", "2 ! !\n", "3 ? ?\n", "4 ## ##" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "# List of symbols\n", "symbols = [\"@@\", \"...\", \"!\", \"?\", \"##\", \"%%\", \"&&\",\"ᵻᵻᵻ\",\"ᵻᵻᵻᵻᵻᵻᵻᵻ!\",\"~\",\"«...!!»\",\"«...»\",\"?...!?\"]\n", "\n", "# Initialize an empty DataFrame\n", "df2 = pd.DataFrame()\n", "\n", "# Populate the DataFrame with symbols\n", "df2['text'] = symbols\n", "df2['IPA2'] = symbols\n", "df2.head()" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "df.to_json(\"/home/austin/disk2/llmvcs/tt/jp_p2g.jsonl\",lines=True, orient='records', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!huggingface-cli login --token hf_xxx\n", "dataset.push_to_hub('Respair/Japanese_Phoneme_to_Grapheme_DS_PLUS_Upgraded')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ckpt path or config path does not exist! Downloading the model from the Hugging Face Hub...\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "67889d301c494f8b9c21184a014a8f85", "version_major": 2, "version_minor": 0 }, "text/plain": [ ".gitattributes: 0%| | 0.00/1.52k [00:00 39\u001b[0m \u001b[43mprocess_directory\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_directory\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProcessing complete.\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", "Cell \u001b[0;32mIn[4], line 34\u001b[0m, in \u001b[0;36mprocess_directory\u001b[0;34m(root_dir)\u001b[0m\n\u001b[1;32m 31\u001b[0m wav_files\u001b[38;5;241m.\u001b[39mappend(os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(dirpath, filename))\n\u001b[1;32m 33\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_path \u001b[38;5;129;01min\u001b[39;00m tqdm(wav_files, desc\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mProcessing WAV files\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m---> 34\u001b[0m \u001b[43mencode_wav_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[0;32mIn[4], line 19\u001b[0m, in \u001b[0;36mencode_wav_file\u001b[0;34m(file_path)\u001b[0m\n\u001b[1;32m 16\u001b[0m encoded, _ \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mencode_wav(y, \u001b[38;5;241m44100\u001b[39m, [\u001b[38;5;241m123\u001b[39m, \u001b[38;5;241m234\u001b[39m, \u001b[38;5;241m111\u001b[39m, \u001b[38;5;241m222\u001b[39m, \u001b[38;5;241m11\u001b[39m], message_sdr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m47\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# Resample back to 24kHz\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m encoded_24k \u001b[38;5;241m=\u001b[39m \u001b[43mresampy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresample\u001b[49m\u001b[43m(\u001b[49m\u001b[43mencoded\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m44100\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m24000\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;66;03m# Save\u001b[39;00m\n\u001b[1;32m 22\u001b[0m sf\u001b[38;5;241m.\u001b[39mwrite(file_path, encoded_24k, \u001b[38;5;241m24000\u001b[39m)\n", "File \u001b[0;32m~/disk2/micromamba/envs/decypher/lib/python3.11/site-packages/resampy/core.py:168\u001b[0m, in \u001b[0;36mresample\u001b[0;34m(x, sr_orig, sr_new, axis, filter, parallel, **kwargs)\u001b[0m\n\u001b[1;32m 158\u001b[0m resample_f_s(\n\u001b[1;32m 159\u001b[0m x\u001b[38;5;241m.\u001b[39mswapaxes(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, axis),\n\u001b[1;32m 160\u001b[0m t_out,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 165\u001b[0m y\u001b[38;5;241m.\u001b[39mswapaxes(\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, axis),\n\u001b[1;32m 166\u001b[0m )\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 168\u001b[0m \u001b[43mresample_f_s\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 169\u001b[0m \u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mswapaxes\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 170\u001b[0m \u001b[43m \u001b[49m\u001b[43mt_out\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 171\u001b[0m \u001b[43m \u001b[49m\u001b[43minterp_win\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 172\u001b[0m \u001b[43m \u001b[49m\u001b[43minterp_delta\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 173\u001b[0m \u001b[43m \u001b[49m\u001b[43mprecision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 174\u001b[0m \u001b[43m \u001b[49m\u001b[43mscale\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 175\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mswapaxes\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 176\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m y\n", "File \u001b[0;32m~/disk2/micromamba/envs/decypher/lib/python3.11/site-packages/numba/np/ufunc/gufunc.py:279\u001b[0m, in \u001b[0;36mGUFunc.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39madd(sig)\n\u001b[1;32m 277\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbuild_ufunc()\n\u001b[0;32m--> 279\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mufunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "import os\n", "import librosa\n", "import silentcipher\n", "import soundfile as sf\n", "from tqdm.notebook import tqdm\n", "import resampy\n", "\n", "def encode_wav_file(file_path):\n", " try:\n", " # Load and resample to 44.1kHz\n", " y, sr = librosa.load(file_path, sr=None)\n", " if sr != 44100:\n", " y = resampy.resample(y, sr, 44100)\n", " \n", " # Encode\n", " encoded, _ = model.encode_wav(y, 44100, [123, 234, 111, 222, 11], message_sdr=47)\n", " \n", " # Resample back to 24kHz\n", " encoded_24k = resampy.resample(encoded, 44100, 24000)\n", " \n", " # Save\n", " sf.write(file_path, encoded_24k, 24000)\n", " except Exception as e:\n", " raise RuntimeError(f\"Failed to process {file_path}: {str(e)}\")\n", "\n", "def process_directory(root_dir):\n", " wav_files = []\n", " for dirpath, dirnames, filenames in os.walk(root_dir):\n", " for filename in filenames:\n", " if filename.lower().endswith('.wav'):\n", " wav_files.append(os.path.join(dirpath, filename))\n", " \n", " for file_path in tqdm(wav_files, desc=\"Processing WAV files\"):\n", " encode_wav_file(file_path)\n", " \n", " \n", "root_directory = \"/home/austin/disk2/llmvcs/tt/stylekan/Data/Japanese\"\n", "try:\n", " process_directory(root_directory)\n", " print(\"Processing complete.\")\n", "except Exception as e:\n", " print(f\"Processing stopped due to an error: {str(e)}\")\n", " raise" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import resampy\n", "\n", "y, sr = librosa.load('/home/austin/disk2/llmvcs/tt/cleaning/decypher.wav', sr=None)\n", "y = resampy.resample(y, sr, 44100)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "\n", "# The message should be in the form of five 8-bit characters, giving a total message capacity of 40 bits \n", "\n", "# encoded, sdr = model.encode_wav(y, sr, [123, 234, 111, 222, 11])\n", "\n", "# # You can specify the message SDR (in dB) along with the encode_wav function. But this may result in unexpected detection accuracy\n", "# # encoded, sdr = model.encode_wav(y, sr, [123, 234, 111, 222, 11], message_sdr=47)\n", "\n", "# # You should set phase_shift_decoding to True when you want the decoder to be robust to audio crops.\n", "# # !Warning, this can increase the decode time quite drastically.\n", "\n", "result = model.decode_wav(y, sr, phase_shift_decoding=False)\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n" ] }, { "ename": "IndexError", "evalue": "list index out of range", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[4], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstatus\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mresult\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m==\u001b[39m [\u001b[38;5;241m123\u001b[39m, \u001b[38;5;241m234\u001b[39m, \u001b[38;5;241m111\u001b[39m, \u001b[38;5;241m222\u001b[39m, \u001b[38;5;241m11\u001b[39m])\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28mprint\u001b[39m(result[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mconfidences\u001b[39m\u001b[38;5;124m'\u001b[39m][\u001b[38;5;241m0\u001b[39m])\n", "\u001b[0;31mIndexError\u001b[0m: list index out of range" ] } ], "source": [ "\n", "print(result['status'])\n", "print(result['messages'][0] == [123, 234, 111, 222, 11])\n", "print(result['confidences'][0])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 4 }