Yurii Paniv commited on
Commit
1a75086
1 Parent(s): 77f184e

Migrate to ukrainian-word-stress

Browse files
Files changed (3) hide show
  1. requirements.txt +1 -0
  2. stress.py +21 -61
  3. ukrainian-accentor +0 -1
requirements.txt CHANGED
@@ -1 +1,2 @@
1
  TTS==0.7.1
 
 
1
  TTS==0.7.1
2
+ ukrainian-word-stress==1.0.0
stress.py CHANGED
@@ -1,62 +1,25 @@
1
- from unittest import skip
2
- from gruut import sentences
3
- import torch
4
 
5
- importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
6
- accentor = importer.load_pickle("uk-accentor", "model")
7
- replace_accents = importer.load_pickle("uk-accentor", "replace_accents")
8
 
9
- # Using GPU
10
- # accentor.cuda()
11
- # Back to CPU
12
- # accentor.cpu()
13
-
14
- vowels = "аеєиіїоуюя"
15
- consonants = "бвгґджзйклмнпрстфхцчшщь"
16
- special = "'"
17
- alphabet = vowels + consonants + special
18
-
19
- def accent_word(word):
20
- with torch.no_grad():
21
- stressed_words = accentor.predict([word], mode='stress')
22
- plused_words = [replace_accents(x) for x in stressed_words]
23
- return plused_words[0]
24
 
25
  def sentence_to_stress(sentence):
26
- words = sentence.lower().split()
27
- words = sum([[word, " "] for word in words], start=[])
28
- new_list = []
29
- for word in words:
30
- first_word_sep = list(map(lambda letter: letter in alphabet, word.lower()))
31
- if all(first_word_sep):
32
- new_list.append(word)
 
 
 
 
 
33
  else:
34
- current_index = 0
35
- past_index = 0
36
- for letter in first_word_sep:
37
- if letter == False:
38
- new_list.append(word[past_index:current_index])
39
- new_list.append(word[current_index])
40
- past_index = current_index + 1
41
- current_index += 1
42
- new_list.append(word[past_index:current_index])
43
- #print(list(filter(lambda x: len(x) > 0, new_list)))
44
- for word_index in range(0, len(new_list)):
45
- element = new_list[word_index]
46
- first_word_sep = list(map(lambda letter: letter in alphabet, element.lower()))
47
- if not all(first_word_sep) or len(element) == 0:
48
- continue
49
- else:
50
- vowels_in_words = list(map(lambda letter: letter in vowels, new_list[word_index]))
51
- if vowels_in_words.count(True) == 0:
52
- continue
53
- elif vowels_in_words.count(True) == 1:
54
- vowel_index = vowels_in_words.index(True)
55
- new_list[word_index] = new_list[word_index][0:vowel_index] + "+" + new_list[word_index][vowel_index::]
56
- else:
57
- new_list[word_index] = accent_word(new_list[word_index])
58
-
59
- return "".join(new_list)
60
 
61
 
62
  if __name__ == "__main__":
@@ -68,10 +31,7 @@ if __name__ == "__main__":
68
  print(sentence_to_stress(sentence))
69
  sentence = "Не тільки в Україні таке може бути."
70
  print(sentence_to_stress(sentence))
71
- #test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"]
72
-
73
- stressed_words = accentor.predict(["привіт"], mode='stress')
74
- plused_words = [replace_accents(x) for x in stressed_words]
75
-
76
- print('With stress:', stressed_words)
77
- print('With pluses:', plused_words)
 
1
+ from ukrainian_word_stress import Stressifier, StressSymbol
 
 
2
 
3
+ stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
 
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def sentence_to_stress(sentence):
7
+ stressed = stressify(sentence).replace(StressSymbol.CombiningAcuteAccent, "+")
8
+ new_stressed = ""
9
+ start = 0
10
+ last = 0
11
+ while True:
12
+ plus_position = stressed.find("+", start)
13
+ if plus_position != -1:
14
+ new_stressed += (
15
+ stressed[last : plus_position - 1] + "+" + stressed[plus_position - 1]
16
+ )
17
+ start = plus_position + 1
18
+ last = start
19
  else:
20
+ new_stressed += stressed[last:]
21
+ break
22
+ return new_stressed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  if __name__ == "__main__":
 
31
  print(sentence_to_stress(sentence))
32
  sentence = "Не тільки в Україні таке може бути."
33
  print(sentence_to_stress(sentence))
34
+ sentence = "Н тльк в крн тк мж бт."
35
+ print(sentence_to_stress(sentence))
36
+ sentence = "Н тльк в крн тк мж бт."
37
+ print(sentence_to_stress(sentence))
 
 
 
ukrainian-accentor DELETED
@@ -1 +0,0 @@
1
- Subproject commit a3dd2cf9341db200853cfd19df142224a47749b2