txya900619 commited on
Commit
fdeff58
1 Parent(s): 1280207

fix: ? ! can't parse to ipa error and add ignore_comma arg for future model

Browse files
Files changed (1) hide show
  1. ipa/ipa.py +10 -3
ipa/ipa.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  def can_form_string(x, symbol_dict):
2
  def helper(x, symbol_dict, matched_parts):
3
  if not x:
@@ -16,15 +19,19 @@ def can_form_string(x, symbol_dict):
16
  return helper(x, symbol_dict, [])
17
 
18
 
19
- def text_to_ipa(text, lang_tag, g2p):
20
  ipa = []
21
- words = text.lower().replace(".", "").replace("'", "’").split() # change in future
 
 
 
22
 
23
  print(words)
24
 
25
  for word in words:
26
  ipa_parts = ""
27
- result, matched_parts = can_form_string(word, g2p[lang_tag])
 
28
 
29
  if result is False:
30
  print(word)
 
1
+ import re
2
+
3
+
4
  def can_form_string(x, symbol_dict):
5
  def helper(x, symbol_dict, matched_parts):
6
  if not x:
 
19
  return helper(x, symbol_dict, [])
20
 
21
 
22
+ def text_to_ipa(text, lang_tag, g2p, ignore_comma=True):
23
  ipa = []
24
+ text = text.lower()
25
+ text = re.sub(r"[.?!]", "", text)
26
+ text = text.replace("'", "’")
27
+ words = text.split() # change in future
28
 
29
  print(words)
30
 
31
  for word in words:
32
  ipa_parts = ""
33
+ extended_g2p = {**g2p[lang_tag], ",": "" if ignore_comma else ","}
34
+ result, matched_parts = can_form_string(word, extended_g2p)
35
 
36
  if result is False:
37
  print(word)