Yurii Paniv commited on
Commit
f9e5028
1 Parent(s): a63a536

Add automatic stress support

Browse files
Files changed (4) hide show
  1. .gitmodules +3 -0
  2. app.py +19 -8
  3. stress.py +60 -0
  4. ukrainian-accentor +1 -0
.gitmodules ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ [submodule "ukrainian-accentor"]
2
+ path = ukrainian-accentor
3
+ url = https://github.com/egorsmkv/ukrainian-accentor.git
app.py CHANGED
@@ -8,6 +8,12 @@ import requests
8
  from os.path import exists
9
  from formatter import preprocess_text
10
  from datetime import datetime
 
 
 
 
 
 
11
 
12
  MODEL_NAMES = [
13
  "uk/mykyta/vits-tts"
@@ -30,7 +36,7 @@ def download(url, file_name):
30
  for MODEL_NAME in MODEL_NAMES:
31
  print(f"downloading {MODEL_NAME}")
32
  release_number = "v2.0.0-beta"
33
- model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model.pth"
34
  config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
35
 
36
  model_path = "model.pth"
@@ -43,13 +49,14 @@ for MODEL_NAME in MODEL_NAMES:
43
  #MODELS[MODEL_NAME] = synthesizer
44
 
45
 
46
- def tts(text: str):
47
  synthesizer = Synthesizer(
48
  model_path, config_path, None, None, None,
49
  )
50
  text = preprocess_text(text)
51
- text_limit = 100
52
  text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
 
53
  print(text, datetime.utcnow())
54
  if synthesizer is None:
55
  raise NameError("model not found")
@@ -68,10 +75,10 @@ iface = gr.Interface(
68
  label="Input",
69
  default="Введ+іть, б+удь л+аска, сво+є р+ечення.",
70
  ),
71
- #gr.inputs.Radio(
72
- # label="Виберіть TTS модель",
73
- # choices=MODEL_NAMES,
74
- #),
75
  ],
76
  outputs=gr.outputs.Audio(label="Output"),
77
  title="🐸💬🇺🇦 - Coqui TTS",
@@ -79,5 +86,9 @@ iface = gr.Interface(
79
  description="Україномовний🇺🇦 TTS за допомогою Coqui TTS (для наголосу використовуйте + перед голосною)",
80
  article="Якщо вам подобається, підтримайте за посиланням: [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm), " +
81
  "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)",
 
 
 
 
82
  )
83
- iface.launch(enable_queue=True)
8
  from os.path import exists
9
  from formatter import preprocess_text
10
  from datetime import datetime
11
+ from stress import sentence_to_stress
12
+ from enum import Enum
13
+
14
+ class StressOption(Enum):
15
+ ManualStress = "Наголоси вручну"
16
+ AutomaticStress = "Автоматичні наголоси (Beta)"
17
 
18
  MODEL_NAMES = [
19
  "uk/mykyta/vits-tts"
36
  for MODEL_NAME in MODEL_NAMES:
37
  print(f"downloading {MODEL_NAME}")
38
  release_number = "v2.0.0-beta"
39
+ model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
40
  config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
41
 
42
  model_path = "model.pth"
49
  #MODELS[MODEL_NAME] = synthesizer
50
 
51
 
52
+ def tts(text: str, stress: str):
53
  synthesizer = Synthesizer(
54
  model_path, config_path, None, None, None,
55
  )
56
  text = preprocess_text(text)
57
+ text_limit = 150
58
  text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
59
+ text = sentence_to_stress(text) if stress == StressOption.AutomaticStress.value else text
60
  print(text, datetime.utcnow())
61
  if synthesizer is None:
62
  raise NameError("model not found")
75
  label="Input",
76
  default="Введ+іть, б+удь л+аска, сво+є р+ечення.",
77
  ),
78
+ gr.inputs.Radio(
79
+ label="Опції",
80
+ choices=[option.value for option in StressOption],
81
+ ),
82
  ],
83
  outputs=gr.outputs.Audio(label="Output"),
84
  title="🐸💬🇺🇦 - Coqui TTS",
86
  description="Україномовний🇺🇦 TTS за допомогою Coqui TTS (для наголосу використовуйте + перед голосною)",
87
  article="Якщо вам подобається, підтримайте за посиланням: [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm), " +
88
  "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)",
89
+ examples=[
90
+ ["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
91
+ ["Привіт, як тебе звати?", StressOption.AutomaticStress.value]
92
+ ]
93
  )
94
+ iface.launch(enable_queue=True, prevent_thread_lock=True)
stress.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unittest import skip
2
+ from gruut import sentences
3
+ import torch
4
+
5
+ importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
6
+ accentor = importer.load_pickle("uk-accentor", "model")
7
+ replace_accents = importer.load_pickle("uk-accentor", "replace_accents")
8
+
9
+ # Using GPU
10
+ # accentor.cuda()
11
+ # Back to CPU
12
+ # accentor.cpu()
13
+
14
+ alphabet = "абгґдеєжзиіїйклмнопрстуфхцчшщьюя"
15
+
16
+ def accent_word(word):
17
+ stressed_words = accentor.predict([word], mode='stress')
18
+ plused_words = [replace_accents(x) for x in stressed_words]
19
+ return plused_words[0]
20
+
21
+ def sentence_to_stress(sentence):
22
+ words = sentence.split()
23
+ words = sum([[word, " "] for word in words], start=[])
24
+ new_list = []
25
+ for word in words:
26
+ first_word_sep = list(map(lambda letter: letter in alphabet, word.lower()))
27
+ if all(first_word_sep):
28
+ new_list.append(word)
29
+ else:
30
+ current_index = 0
31
+ past_index = 0
32
+ for letter in first_word_sep:
33
+ if letter == False:
34
+ new_list.append(word[past_index:current_index])
35
+ new_list.append(word[current_index])
36
+ past_index = current_index + 1
37
+ current_index += 1
38
+ new_list.append(word[past_index:current_index])
39
+ #print(list(filter(lambda x: len(x) > 0, new_list)))
40
+ for word_index in range(0, len(new_list)):
41
+ element = new_list[word_index]
42
+ first_word_sep = list(map(lambda letter: letter in alphabet, element.lower()))
43
+ if not all(first_word_sep) or len(element) == 0:
44
+ continue
45
+ else:
46
+ new_list[word_index] = accent_word(new_list[word_index])
47
+
48
+ return "".join(new_list)
49
+
50
+
51
+ if __name__ == "__main__":
52
+ sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
53
+ print(sentence_to_stress(sentence))
54
+ #test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"]
55
+
56
+ stressed_words = accentor.predict(["привіт"], mode='stress')
57
+ plused_words = [replace_accents(x) for x in stressed_words]
58
+
59
+ print('With stress:', stressed_words)
60
+ print('With pluses:', plused_words)
ukrainian-accentor ADDED
@@ -0,0 +1 @@
 
1
+ Subproject commit a3dd2cf9341db200853cfd19df142224a47749b2