Yurii Paniv commited on
Commit
17873e4
1 Parent(s): 6180416

Fix formatter

Browse files
tests/test_formatter.py CHANGED
@@ -1,6 +1,7 @@
1
  from ukrainian_tts.formatter import preprocess_text
2
  import pytest
3
 
 
4
  @pytest.mark.parametrize(
5
  "text,expected",
6
  [
@@ -40,6 +41,7 @@ import pytest
40
  def test_formatter(text, expected):
41
  assert preprocess_text(text) == expected
42
 
 
43
  # Purspose of these tests, to have clearly separate list of issues
44
  # in the conversion. Once fixed, these cases should move to test_formatter
45
  # We still want make sure that no changes happens there, as any regressions
1
  from ukrainian_tts.formatter import preprocess_text
2
  import pytest
3
 
4
+
5
  @pytest.mark.parametrize(
6
  "text,expected",
7
  [
41
  def test_formatter(text, expected):
42
  assert preprocess_text(text) == expected
43
 
44
+
45
  # Purspose of these tests, to have clearly separate list of issues
46
  # in the conversion. Once fixed, these cases should move to test_formatter
47
  # We still want make sure that no changes happens there, as any regressions
ukrainian_tts/formatter.py CHANGED
@@ -29,6 +29,7 @@ def replace_currency_with_words(text, currency, num_form):
29
  text = text.replace("€", CURRENCY[currency][num_form])
30
  return text
31
 
 
32
  def find_any_char(text: str, find: str, start: int):
33
  result = -1
34
  for c in find:
@@ -38,11 +39,12 @@ def find_any_char(text: str, find: str, start: int):
38
 
39
  return result
40
 
 
41
  # Have to check if I can use https://github.com/lang-uk/tokenize-uk
42
  def simple_tokenizer(text: str):
43
  start = 0
44
  index = find_any_char(text, " ,", start)
45
- while (index >= 0):
46
  word = text[start:index]
47
  yield word
48
  separator = text[index]
@@ -52,6 +54,7 @@ def simple_tokenizer(text: str):
52
 
53
  yield text[start:]
54
 
 
55
  def preprocess_text(text):
56
  text = text.lower()
57
  # currencies
29
  text = text.replace("€", CURRENCY[currency][num_form])
30
  return text
31
 
32
+
33
  def find_any_char(text: str, find: str, start: int):
34
  result = -1
35
  for c in find:
39
 
40
  return result
41
 
42
+
43
  # Have to check if I can use https://github.com/lang-uk/tokenize-uk
44
  def simple_tokenizer(text: str):
45
  start = 0
46
  index = find_any_char(text, " ,", start)
47
+ while index >= 0:
48
  word = text[start:index]
49
  yield word
50
  separator = text[index]
54
 
55
  yield text[start:]
56
 
57
+
58
  def preprocess_text(text):
59
  text = text.lower()
60
  # currencies