Upload 10 files

Browse files

Updated some features

Files changed (7) hide show

README.md +3 -16
WarBot.py +132 -0
WarBot_test.ipynb +129 -24
WarClient.py +14 -0
WarServer.py +25 -0
latest_silero_models.yml +563 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,16 +1,3 @@
----
-license: apache-2.0
-datasets:
-- kertser/WarOnline
-language:
-- ru
-tags:
-- chatbot
-- WarOnline
-- NLP
-- GPT2
----
-This is a GPT-style model, based on "Kirili4ik/ruDialoGpt3-medium-finetuned-telegram" model and fine-tuned on the WarOnline Dataset.<br>
-https://huggingface.co/Kirili4ik/ruDialoGpt3-medium-finetuned-telegram<br>
-The model operates as a chat-bot by means of "Quote" -> "Response"<br>
-The model fine-tuned with a single GPU (locally)

+# WarOnline_Bot
+This is a GPT based chat bot, trained on WarOnline Conversation<br>
+It is not ready yet

WarBot.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from transformers import AutoTokenizer ,AutoModelForCausalLM
+import re
+# Speller and punctuation:
+import os
+import yaml
+import torch
+from torch import package
+# not very necessary
+import textwrap
+from textwrap3 import wrap
+# util function to get expected len after tokenizing
+def get_length_param(text: str, tokenizer) -> str:
+    tokens_count = len(tokenizer.encode(text))
+    if tokens_count <= 15:
+        len_param = '1'
+    elif tokens_count <= 50:
+        len_param = '2'
+    elif tokens_count <= 256:
+        len_param = '3'
+    else:
+        len_param = '-'
+    return len_param
+def remove_duplicates(S):
+    S = re.sub(r'[a-zA-Z]+', '', S) #Remove english
+    S = S.split()
+    result = ""
+    for subst in S:
+        if subst not in result:
+            result += subst+" "
+    return result.rstrip()
+def removeSigns(S):
+    last_index = max(S.rfind("."), S.rfind("!"))
+    if last_index >= 0:
+        S = S[:last_index+1]
+    return S
+def prepare_punct():
+    torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
+                                   'latest_silero_models.yml',
+                                   progress=False)
+    with open('latest_silero_models.yml', 'r') as yaml_file:
+        models = yaml.load(yaml_file, Loader=yaml.SafeLoader)
+    model_conf = models.get('te_models').get('latest')
+    # Prepare punctuation fix
+    model_url = model_conf.get('package')
+    model_dir = "downloaded_model"
+    os.makedirs(model_dir, exist_ok=True)
+    model_path = os.path.join(model_dir, os.path.basename(model_url))
+    if not os.path.isfile(model_path):
+        torch.hub.download_url_to_file(model_url,
+                                       model_path,
+                                       progress=True)
+    imp = package.PackageImporter(model_path)
+    model_punct = imp.load_pickle("te_model", "model")
+    return model_punct
+def initialize():
+    """ Loading the model """
+    fit_checkpoint = "WarBot"
+    tokenizer = AutoTokenizer.from_pretrained(fit_checkpoint)
+    model = AutoModelForCausalLM.from_pretrained(fit_checkpoint)
+    model_punсt = prepare_punct()
+    return (model,tokenizer,model_punсt)
+def split_string(string,n=256):
+    return [string[i:i+n] for i in range(0, len(string), n)]
+def get_response(quote:str,model,tokenizer,model_punct):
+    # encode the input, add the eos_token and return a tensor in Pytorch
+    user_inpit_ids = tokenizer.encode(f"|0|{get_length_param(quote, tokenizer)}|" \
+                                                  + quote + tokenizer.eos_token, return_tensors="pt")
+    chat_history_ids = user_inpit_ids # To be changed
+    tokens_count = len(tokenizer.encode(quote))
+    if tokens_count < 15:
+        no_repeat_ngram_size = 2
+    else:
+        no_repeat_ngram_size = 1
+    output_id = model.generate(
+                chat_history_ids,
+                num_return_sequences=1, # use for more variants, but have to print [i]
+                max_length=200, #512
+                no_repeat_ngram_size=no_repeat_ngram_size, #3
+                do_sample=True, #True
+                top_k=50,#50
+                top_p=0.9, #0.9
+                temperature = 0.4, # was 0.6, 0 for greedy
+                #mask_token_id=tokenizer.mask_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                #unk_token_id=tokenizer.unk_token_id,
+                pad_token_id=tokenizer.pad_token_id,
+                #pad_token_id=tokenizer.eos_token_id,
+                #device='cpu'
+            )
+    response = tokenizer.decode(output_id[0], skip_special_tokens=True)
+    response = removeSigns(response)
+    response = response.split(quote)[-1]  # Remove the Quote
+    response = re.sub(r'[^0-9А-Яа-яЁёa-zA-z;., !()/\-+:?]', '',
+                      response)  # Clear the response, remains only alpha-numerical values
+    response = remove_duplicates(re.sub(r"\d{4,}", "", response))  # Remove the consequent numbers with 4 or more digits
+    response = re.sub(r'\.\.+', '', response) # Remove the "....." thing
+    if len(response)>200:
+        resps = wrap(response,200)
+        for i in range(len(resps)):
+            resps[i] = model_punct.enhance_text(resps[i], lan='ru')
+            response = ''.join(resps)
+    else:
+        response = model_punct.enhance_text(response, lan='ru')
+    response = re.sub(r'[UNK]', '', response)  # Remove the [UNK] thing
+    return response
+#if __name__ == '__main__':
+    #model,tokenizer,model_punct = initialize()
+    #quote = "Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется"
+    #print('please wait...')
+    #response = wrap(get_response(quote,model,tokenizer,model_punct),60)
+    #for phrase in response:
+    #    print(phrase)

WarBot_test.ipynb CHANGED Viewed

@@ -9,11 +9,73 @@
    "outputs": [],
    "source": [
     "from transformers import AutoTokenizer ,AutoModelForCausalLM\n",
-    "import torch\n",
-    "import re\n",
-    "from sklearn.utils import shuffle"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 2,
@@ -38,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "outputs": [],
    "source": [
     "def remove_duplicates(S):\n",
@@ -56,7 +118,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "outputs": [],
    "source": [
     "fit_checkpoint = \"WarBot\"\n",
@@ -69,10 +131,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "outputs": [],
    "source": [
-    "quote = \"Однажды мы проснёмся и поймём, что бригада Нахаль наваляла десантникам по самые помидоры\""
    ],
    "metadata": {
     "collapsed": false
@@ -80,26 +143,38 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
-   "outputs": [],
    "source": [
     "# encode the input, add the eos_token and return a tensor in Pytorch\n",
     "user_inpit_ids = tokenizer.encode(f\"|0|{get_length_param(quote, tokenizer)}|\" \\\n",
     "                                              + quote + tokenizer.eos_token, return_tensors=\"pt\")\n",
     "\n",
-    "#chat_history_ids = torch.cat([chat_history_ids, user_inpit_ids], dim=-1)\n",
-    "\n",
     "chat_history_ids = user_inpit_ids # To be changed\n",
     "\n",
     "output_id = model.generate(\n",
     "            chat_history_ids,\n",
-    "            num_return_sequences=1, # use for more variants, but have to print [i]\n",
     "            max_length=300, #512\n",
-    "            no_repeat_ngram_size=1, #3\n",
     "            do_sample=True, #True\n",
     "            top_k=50,#50\n",
     "            top_p=0.9, #0.9\n",
-    "            temperature = 0.45, # was 0.6, 0 for greedy\n",
     "            #mask_token_id=tokenizer.mask_token_id,\n",
     "            eos_token_id=tokenizer.eos_token_id,\n",
     "            #unk_token_id=tokenizer.unk_token_id,\n",
@@ -114,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
    "outputs": [],
    "source": [
     "def removeSigns(S):\n",
@@ -135,10 +210,10 @@
     "def getResponce():\n",
     "    response = tokenizer.decode(output_id[0], skip_special_tokens=True)\n",
     "    response = removeSigns(response)\n",
-    "    #response = re.sub(r'[^а-яА-Я;.,!?]', '', response) # Clear the response, remains only russian\n",
-    "    response_с = response.split(quote)[-1] #Remove the Quote\n",
-    "    clean_response = remove_duplicates(re.sub(r\"\\d{4,}\", \"\", response_с)) # Remove the consequent numbers with 4 or more digits\n",
-    "    return clean_response"
    ],
    "metadata": {
     "collapsed": false
@@ -146,13 +221,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Response: в бригаде есть несколько батальонов \"йерихон\". они основном для того чтобы отражать атаки хезов. батальонный уровень это ротные минометы на джипах прицепами (на уровне батальона). если них будет достаточно ракет могут даже накрыть батарею 120мм минометной установки или из состава бригады может быть хуже чем батареи 122 м-109 которые находятся под управлением роты/бат аля рейнджеры... опять все зависит ситуации например после первой ливанской артиллеристы стали очень сильно нервничать когда обстреливали израильские бпла типа 28, как приходилось отвечать свои задачи. теперь вот примеру американцы решили полностью перевести всю бригаду второй эшелон : 1) сократив количество артдивизионов 4х; 3 пехотных батальонах(м113); 5 танковых + отдельный армейский который сможет прикрывать танки непосредственно перед атакой противника.. правда нужно еще иметь возможность поддерживать свой штатную авиацию огнем своего штатного места без необходимости перебрасывать туда часть танков.... короче говоря вся эта система должна работать вместе /при условии полного отсутствия взаимозачетчиков между ними...но тут надо смотреть кто первый окажется дежурным батареей техасских коптеров..и итп....\n"
      ]
     }
    ],
@@ -163,6 +238,36 @@
     "collapsed": false
    }
   },
   {
    "cell_type": "markdown",
    "source": [
@@ -174,7 +279,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
    "outputs": [],
    "source": [
     "from autocorrect import Speller\n",
@@ -187,13 +292,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "в бригаде есть несколько батальонов \"рихон\". они основном для того чтобы отражать атаки уезов. батальонный уровень это ротные минометы на джипах прицепами (на уровне батальона). если них будет достаточно ракет могут даже накрыть батарею 120мм минометной установки или из состава бригады может быть хуже чем батареи 122 м-109 которые находятся под управлением роты/бат аля рейнджеры... опять все зависит ситуации например после первой ливанской артиллеристы стали очень сильно нервничать когда обстреливали израильские была типа 28, как приходилось отвечать свои задачи. теперь вот примеру американцы решили полностью перевести всю бригаду второй эшелон : 1) сократив количество артдивизионов 4х; 3 пехотных батальонах(м113); 5 танковых + отдельный армейский который сможет прикрывать танки непосредственно перед атакой противника.. правда нужно еще иметь возможность поддерживать свой штатную авиацию огнем своего штатного места без необходимости перебрасывать туда часть танков.... короче говоря вся эта система должна работать вместе /при условии полного отсутствия взаимозачетчиков между ними...но тут надо смотреть кто первый окажется дежурным батареей техасских коптеров..и итп....\n"
      ]
     }
    ],

    "outputs": [],
    "source": [
     "from transformers import AutoTokenizer ,AutoModelForCausalLM\n",
+    "import re"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "outputs": [],
+   "source": [
+    "# Speller and punctuation\n",
+    "\n",
+    "import os\n",
+    "import yaml\n",
+    "import torch\n",
+    "from torch import package\n",
+    "\n",
+    "torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',\n",
+    "                               'latest_silero_models.yml',\n",
+    "                               progress=False)\n",
+    "\n",
+    "with open('latest_silero_models.yml', 'r') as yaml_file:\n",
+    "    models = yaml.load(yaml_file, Loader=yaml.SafeLoader)\n",
+    "model_conf = models.get('te_models').get('latest')"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "outputs": [
+    {
+     "data": {
+      "text/plain": "  0%|          | 0.00/87.5M [00:00<?, ?B/s]",
+      "application/vnd.jupyter.widget-view+json": {
+       "version_major": 2,
+       "version_minor": 0,
+       "model_id": "2c9c9ff9721046ad89665fbf4f6dd275"
+      }
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Prepare punctuation fix for test\n",
+    "model_url = model_conf.get('package')\n",
+    "\n",
+    "model_dir = \"downloaded_model\"\n",
+    "os.makedirs(model_dir, exist_ok=True)\n",
+    "model_path = os.path.join(model_dir, os.path.basename(model_url))\n",
+    "\n",
+    "if not os.path.isfile(model_path):\n",
+    "    torch.hub.download_url_to_file(model_url,\n",
+    "                                   model_path,\n",
+    "                                   progress=True)\n",
+    "\n",
+    "imp = package.PackageImporter(model_path)\n",
+    "model = imp.load_pickle(\"te_model\", \"model\")\n",
+    "\n",
+    "def apply_te(text, lan='ru'):\n",
+    "    return model.enhance_text(text, lan)"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
   {
    "cell_type": "code",
    "execution_count": 2,
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "outputs": [],
    "source": [
     "def remove_duplicates(S):\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "outputs": [],
    "source": [
     "fit_checkpoint = \"WarBot\"\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "outputs": [],
    "source": [
+    "quote = \"Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется.\\\n",
+    "Особенно запомнилась картина, когда на проезжающий авто набрасывается штук десять негров и бьют его камнями, запрыгивают на капот и крышу, пытаются через лобовик достать парня-водителя. Жутковато. И им это сошло с рук.\""
    ],
    "metadata": {
     "collapsed": false
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "90\n"
+     ]
+    }
+   ],
    "source": [
     "# encode the input, add the eos_token and return a tensor in Pytorch\n",
     "user_inpit_ids = tokenizer.encode(f\"|0|{get_length_param(quote, tokenizer)}|\" \\\n",
     "                                              + quote + tokenizer.eos_token, return_tensors=\"pt\")\n",
     "\n",
     "chat_history_ids = user_inpit_ids # To be changed\n",
     "\n",
+    "tokens_count = len(tokenizer.encode(quote))\n",
+    "if tokens_count < 15:\n",
+    "    no_repeat_ngram_size = 2\n",
+    "else:\n",
+    "    no_repeat_ngram_size = 1\n",
+    "\n",
     "output_id = model.generate(\n",
     "            chat_history_ids,\n",
+    "            num_return_sequences=2, # use for more variants, but have to print [i]\n",
     "            max_length=300, #512\n",
+    "            no_repeat_ngram_size=no_repeat_ngram_size, #3\n",
     "            do_sample=True, #True\n",
     "            top_k=50,#50\n",
     "            top_p=0.9, #0.9\n",
+    "            temperature = 0.4, # was 0.6, 0 for greedy\n",
     "            #mask_token_id=tokenizer.mask_token_id,\n",
     "            eos_token_id=tokenizer.eos_token_id,\n",
     "            #unk_token_id=tokenizer.unk_token_id,\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "outputs": [],
    "source": [
     "def removeSigns(S):\n",
     "def getResponce():\n",
     "    response = tokenizer.decode(output_id[0], skip_special_tokens=True)\n",
     "    response = removeSigns(response)\n",
+    "    response = response.split(quote)[-1] #Remove the Quote\n",
+    "    response = re.sub(r'[^0-9А-Яа-яЁёa-zA-z;., !()-+:?]', '', response) # Clear the response, remains only alpha-numerical values\n",
+    "    response = remove_duplicates(re.sub(r\"\\d{4,}\", \"\", response)) # Remove the consequent numbers with 4 or more digits\n",
+    "    return response"
    ],
    "metadata": {
     "collapsed": false
   },
   {
    "cell_type": "code",
+   "execution_count": 64,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Response: я не знаю как там было у вас...но вот вам видео из ливана: эти граждане (в плохом смысле слова) просто тупо ломанулись внутрь! сожалению они были безоружны еще вооружены до зубов....и их всех убили..так что лучше сразу стреляли мне хотябы пытались зайти комне под видом друзей..... готов стрелять хоть сейчас....а потом думал может быть таки стоит подумать где мой пистолет дома если вдруг придется применить...как так..подумайте сами господа...... зызыы для тех кто тут оружие, вы поняли меня правильно! спасибо большое. буду знать теперь обязательно кого нибудь пристрелят случае ))) псы!!! вообщем говоря ситуация такая 1 группа была сама хамасниками поэтому должныватся 2 другая часть которые хотели жить вместе(ну скажем каждый себе злобный буратино).\n"
      ]
     }
    ],
     "collapsed": false
    }
   },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "Punctuation:"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 65,
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Я не знаю, как там было у вас...но вот вам видео из [UNK] Эти граждане (в плохом смысле слова) просто тупо ломанулись внутрь!. Сожалению, они были безоружны еще вооружены до зубов....и их всех убили..так, что лучше сразу стреляли мне, хотябы пытались зайти комне под видом друзей..... готов стрелять хоть сейчас....а Потом думал может быть таки стоит подумать, где мой пистолет дома, если вдруг придется применить...как так..подумайте сами господа...... зызыы для тех, кто тут оружие, вы поняли меня правильно.! спасибо большое. буду знать теперь обязательно кого-нибудь пристрелят случае ))) псы!!! вообщем говоря ситуация. Такая 1. Группа была сама хамасниками, поэтому должныватся 2. Другая часть, которые хотели жить вместе(ну скажем каждый себе злобный буратино)..\n"
+     ]
+    }
+   ],
+   "source": [
+    "input_text = getResponce()\n",
+    "output_text = apply_te(input_text, lan='ru')\n",
+    "print(output_text)"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
   {
    "cell_type": "markdown",
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 41,
    "outputs": [],
    "source": [
     "from autocorrect import Speller\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 42,
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "я не знаю как там было у вас...но вот вам видео из лимана эти граждане плохом смысле слова просто тупо ломанулись внутрь! сожалению они были безоружны еще вооружены до зубов....и их всех убили..так что лучше сразу стре��яли мне хотябы пытались зайти камне под видом друзей..... готов стрелять хоть сейчас....а потом думал может быть таки стоит подумать где мой пистолет дома если вдруг придется применить...как так..подумайте сами господа...... кызы для тех кто тут оружие, вы поняли меня правильно! спасибо большое. буду знать теперь обязательно кого нибудь пристрелят случае псы!!! вообщем говоря ситуация такая 1 группа была сама хамасниками поэтому должныватся 2 другая часть которые хотели жить вместе скажем каждый себе злобный буратино.\n"
      ]
     }
    ],

WarClient.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import socket
+HOST = 'localhost'
+PORT = 5000
+message = "Это хорошо, но глядя на ролик, когда ефиопские толпы в Израиле громят машины и нападают на улице на израильтян - задумаешься, куда все движется"
+with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as client_socket:
+    client_socket.connect((HOST, PORT))
+    client_socket.sendall(message.encode())
+    print('Wait...')
+    data = client_socket.recv(1024)
+    received_string = data.decode('utf-8')
+    print(f'Received string from server: {received_string}')

WarServer.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import socket
+import WarBot
+model,tokenizer,model_punct = WarBot.initialize()
+HOST = 'localhost'
+PORT = 5000
+with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as server_socket:
+    server_socket.bind((HOST, PORT))
+    server_socket.listen()
+    print(f'Server is listening on port {PORT}')
+    while True:
+        conn, addr = server_socket.accept()
+        with conn:
+            print(f'Connected by {addr}')
+            data = conn.recv(1024)
+            received_string = data.decode()
+            print(f'Received string from client: {received_string}')
+            response = WarBot.get_response(received_string, model, tokenizer, model_punct)
+            response_string = response
+            conn.sendall(response_string.encode())
+            conn.close()

latest_silero_models.yml ADDED Viewed

	@@ -0,0 +1,563 @@

+# pre-trained STT models
+stt_models:
+  en:
+    latest:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v6:
+      meta:
+        name: "en_v6"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v6.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v6_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v6_xlarge.onnx"
+    v5:
+      meta:
+        name: "en_v5"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v5.jit"
+      onnx: "https://models.silero.ai/models/en/en_v5.onnx"
+      onnx_q: "https://models.silero.ai/models/en/en_v5_q.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v5_q.jit"
+      jit_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.jit"
+      onnx_xlarge: "https://models.silero.ai/models/en/en_v5_xlarge.onnx"
+    v4_0:
+      meta:
+        name: "en_v4_0"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/en/en_v4_0_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v4_0_large.onnx"
+    v3:
+      meta:
+        name: "en_v3"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v3_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v3.onnx"
+      jit_q: "https://models.silero.ai/models/en/en_v3_jit_q.model"
+      jit_skip: "https://models.silero.ai/models/en/en_v3_jit_skips.model"
+      jit_large: "https://models.silero.ai/models/en/en_v3_jit_large.model"
+      onnx_large: "https://models.silero.ai/models/en/en_v3_large.onnx"
+      jit_xsmall: "https://models.silero.ai/models/en/en_v3_jit_xsmall.model"
+      jit_q_xsmall: "https://models.silero.ai/models/en/en_v3_jit_q_xsmall.model"
+      onnx_xsmall: "https://models.silero.ai/models/en/en_v3_xsmall.onnx"
+    v2:
+      meta:
+        name: "en_v2"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v2_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v2.onnx"
+      tf: "https://models.silero.ai/models/en/en_v2_tf.tar.gz"
+    v1:
+      meta:
+        name: "en_v1"
+        sample: "https://models.silero.ai/examples/en_sample.wav"
+      labels: "https://models.silero.ai/models/en/en_v1_labels.json"
+      jit: "https://models.silero.ai/models/en/en_v1_jit.model"
+      onnx: "https://models.silero.ai/models/en/en_v1.onnx"
+      tf: "https://models.silero.ai/models/en/en_v1_tf.tar.gz"
+  de:
+    latest:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v1:
+      meta:
+        name: "de_v1"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v1_jit.model"
+      onnx: "https://models.silero.ai/models/de/de_v1.onnx"
+      tf: "https://models.silero.ai/models/de/de_v1_tf.tar.gz"
+    v3:
+      meta:
+        name: "de_v3"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v3_large.jit"
+    v4:
+      meta:
+        name: "de_v4"
+        sample: "https://models.silero.ai/examples/de_sample.wav"
+      labels: "https://models.silero.ai/models/de/de_v1_labels.json"
+      jit_large: "https://models.silero.ai/models/de/de_v4_large.jit"
+      onnx_large: "https://models.silero.ai/models/de/de_v4_large.onnx"
+  es:
+    latest:
+      meta:
+        name: "es_v1"
+        sample: "https://models.silero.ai/examples/es_sample.wav"
+      labels: "https://models.silero.ai/models/es/es_v1_labels.json"
+      jit: "https://models.silero.ai/models/es/es_v1_jit.model"
+      onnx: "https://models.silero.ai/models/es/es_v1.onnx"
+      tf: "https://models.silero.ai/models/es/es_v1_tf.tar.gz"
+  ua:
+    latest:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v3:
+      meta:
+        name: "ua_v3"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v3_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v3_jit_q.model"
+      onnx: "https://models.silero.ai/models/ua/ua_v3.onnx"
+    v1:
+      meta:
+        name: "ua_v1"
+        sample: "https://models.silero.ai/examples/ua_sample.wav"
+        credits:
+          datasets:
+            speech-recognition-uk: https://github.com/egorsmkv/speech-recognition-uk
+      labels: "https://models.silero.ai/models/ua/ua_v1_labels.json"
+      jit: "https://models.silero.ai/models/ua/ua_v1_jit.model"
+      jit_q: "https://models.silero.ai/models/ua/ua_v1_jit_q.model"
+tts_models:
+  ru:
+    v3_1_ru:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v3_1_ru.pt'
+        sample_rate: [8000, 24000, 48000]
+    ru_v3:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/ru_v3.pt'
+        sample_rate: [8000, 24000, 48000]
+    aidar_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_aidar.pt'
+        sample_rate: [8000, 16000]
+    aidar_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_8000.jit'
+        sample_rate: 8000
+    aidar_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_aidar_16000.jit'
+        sample_rate: 16000
+    baya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_baya.pt'
+        sample_rate: [8000, 16000]
+    baya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_8000.jit'
+        sample_rate: 8000
+    baya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_baya_16000.jit'
+        sample_rate: 16000
+    irina_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_irina.pt'
+        sample_rate: [8000, 16000]
+    irina_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_8000.jit'
+        sample_rate: 8000
+    irina_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_irina_16000.jit'
+        sample_rate: 16000
+    kseniya_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_kseniya.pt'
+        sample_rate: [8000, 16000]
+    kseniya_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_8000.jit'
+        sample_rate: 8000
+    kseniya_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit'
+        sample_rate: 16000
+    natasha_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_natasha.pt'
+        sample_rate: [8000, 16000]
+    natasha_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_8000.jit'
+        sample_rate: 8000
+    natasha_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_natasha_16000.jit'
+        sample_rate: 16000
+    ruslan_v2:
+      latest:
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        package: 'https://models.silero.ai/models/tts/ru/v2_ruslan.pt'
+        sample_rate: [8000, 16000]
+    ruslan_8khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_8000.jit'
+        sample_rate: 8000
+    ruslan_16khz:
+      latest:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
+        example: 'В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.'
+        jit: 'https://models.silero.ai/models/tts/ru/v1_ruslan_16000.jit'
+        sample_rate: 16000
+  en:
+    v3_en:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en.pt'
+        sample_rate: [8000, 24000, 48000]
+    v3_en_indic:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v3_en_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+    lj_v2:
+      latest:
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        package: 'https://models.silero.ai/models/tts/en/v2_lj.pt'
+        sample_rate: [8000, 16000]
+    lj_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_8000.jit'
+        sample_rate: 8000
+    lj_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyz .,!?…:;–'
+        example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+        jit: 'https://models.silero.ai/models/tts/en/v1_lj_16000.jit'
+        sample_rate: 16000
+  de:
+    v3_de:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v3_de.pt'
+        sample_rate: [8000, 24000, 48000]
+    thorsten_v2:
+      latest:
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        package: 'https://models.silero.ai/models/tts/de/v2_thorsten.pt'
+        sample_rate: [8000, 16000]
+    thorsten_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_8000.jit'
+        sample_rate: 8000
+    thorsten_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzäöüß .,!?…:;–'
+        example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+        jit: 'https://models.silero.ai/models/tts/de/v1_thorsten_16000.jit'
+        sample_rate: 16000
+  es:
+    v3_es:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v3_es.pt'
+        sample_rate: [8000, 24000, 48000]
+    tux_v2:
+      latest:
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        package: 'https://models.silero.ai/models/tts/es/v2_tux.pt'
+        sample_rate: [8000, 16000]
+    tux_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_8000.jit'
+        sample_rate: 8000
+    tux_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzáéíñóú .,!?…:;–¡¿'
+        example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+        jit: 'https://models.silero.ai/models/tts/es/v1_tux_16000.jit'
+        sample_rate: 16000
+  fr:
+    v3_fr:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v3_fr.pt'
+        sample_rate: [8000, 24000, 48000]
+    gilles_v2:
+      latest:
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        package: 'https://models.silero.ai/models/tts/fr/v2_gilles.pt'
+        sample_rate: [8000, 16000]
+    gilles_8khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_8000.jit'
+        sample_rate: 8000
+    gilles_16khz:
+      latest:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+      v1:
+        tokenset: '_~abcdefghijklmnopqrstuvwxyzéàèùâêîôûç .,!?…:;–'
+        example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+        jit: 'https://models.silero.ai/models/tts/fr/v1_gilles_16000.jit'
+        sample_rate: 16000
+  ba:
+    aigul_v2:
+      latest:
+        example: 'Салауат Юлаевтың тормошо һәм яҙмышы хаҡындағы документтарҙың һәм шиғри әҫәрҙәренең бик аҙ өлөшө генә һаҡланған.'
+        package: 'https://models.silero.ai/models/tts/ba/v2_aigul.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'bashkir'
+  xal:
+    v3_xal:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v3_xal.pt'
+        sample_rate: [8000, 24000, 48000]
+    erdni_v2:
+      latest:
+        example: 'Һорвн, дөрвн күн ирәд, һазань чиңгнв. Байн Цецн хаана һорвн көвүн күүндҗәнә.'
+        package: 'https://models.silero.ai/models/tts/xal/v2_erdni.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'kalmyk'
+  tt:
+    v3_tt:
+      latest:
+        example: 'Исәнмесез, саумысез, нишләп кәҗәгезне саумыйсыз, әтәчегез күкәй салган, нишләп чыгып алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v3_tt.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilyara_v2:
+      latest:
+        example: 'Ис+әнмесез, с+аумысез, нишл+әп кәҗәгезн+е с+аумыйсыз, әтәчег+ез күк+әй салг+ан, нишл+әп чыг+ып +алмыйсыз.'
+        package: 'https://models.silero.ai/models/tts/tt/v2_dilyara.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'tatar'
+  uz:
+    v3_uz:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v3_uz.pt'
+        sample_rate: [8000, 24000, 48000]
+    dilnavoz_v2:
+      latest:
+        example: 'Tanishganimdan xursandman.'
+        package: 'https://models.silero.ai/models/tts/uz/v2_dilnavoz.pt'
+        sample_rate: [8000, 16000]
+        language_name: 'uzbek'
+  ua:
+    v3_ua:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v3_ua.pt'
+        sample_rate: [8000, 24000, 48000]
+    mykyta_v2:
+      latest:
+        example: 'К+отики - пухн+асті жив+отики.'
+        package: 'https://models.silero.ai/models/tts/ua/v22_mykyta_48k.pt'
+        sample_rate: [8000, 24000, 48000]
+        language_name: 'ukrainian'
+  indic:
+    v3_indic:
+      latest:
+        example: 'prasidda kabīra adhyētā, puruṣōttama agravāla kā yaha śōdha ālēkha, usa rāmānaṁda kī khōja karatā hai'
+        package: 'https://models.silero.ai/models/tts/indic/v3_indic.pt'
+        sample_rate: [8000, 24000, 48000]
+  multi:
+    multi_v2:
+      latest:
+        package: 'https://models.silero.ai/models/tts/multi/v2_multi.pt'
+        sample_rate: [8000, 16000]
+        speakers:
+          aidar:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          baya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          kseniya:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          irina:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          ruslan:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          natasha:
+            lang: 'ru'
+            example: 'Съ+ешьте ещ+ё +этих м+ягких франц+узских б+улочек, д+а в+ыпейте ч+аю.'
+          thorsten:
+            lang: 'de'
+            example: 'Fischers Fritze fischt frische Fische, Frische Fische fischt Fischers Fritze.'
+          tux:
+            lang: 'es'
+            example: 'Hoy ya es ayer y ayer ya es hoy, ya llegó el día, y hoy es hoy.'
+          gilles:
+            lang: 'fr'
+            example: 'Je suis ce que je suis, et si je suis ce que je suis, qu’est ce que je suis.'
+          lj:
+            lang: 'en'
+            example: 'Can you can a canned can into an un-canned can like a canner can can a canned can into an un-canned can?'
+          dilyara:
+            lang: 'tt'
+            example: 'Пес+и пес+и песик+әй, борыннар+ы бәләк+әй.'
+te_models:
+  latest:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'
+  v2:
+    package: "https://models.silero.ai/te_models/v2_4lang_q.pt"
+    languages: ['en', 'de', 'ru', 'es']
+    punct: '.,-!?—'

requirements.txt CHANGED Viewed

@@ -6,6 +6,8 @@ scikit-learn
 tensorboardX
 sentencepiece # summaruization
 autocorrect # spelling
 # pip install git+https://github.com/RussianNLP/russian_paraphrasers@master
 #pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
 #pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117

 tensorboardX
 sentencepiece # summaruization
 autocorrect # spelling
+normalizer
+textwrap3 #??
 # pip install git+https://github.com/RussianNLP/russian_paraphrasers@master
 #pip install torch==1.12.1+cu116 torchvision==0.13.1+cu116 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu116
 #pip install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117