seanghay commited on
Commit
d1c3c7d
β€’
1 Parent(s): 664d5a5
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +27 -33
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- title: Khmer G2p Ipa
3
- emoji: πŸ“‰
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
 
1
  ---
2
+ title: Khmer Phonemizer
3
+ emoji: πŸ‡°πŸ‡­
4
  colorFrom: purple
5
  colorTo: gray
6
  sdk: gradio
app.py CHANGED
@@ -1,45 +1,39 @@
1
  import gradio as gr
2
  import phonetisaurus
3
- from collections import namedtuple
4
-
5
- model = phonetisaurus.Phonetisaurus(model="./model.fst")
6
-
7
- def Phoneticize (args) :
8
-
9
- results = model.Phoneticize (
10
- args["token"],
11
- args["nbest"],
12
- args["beam"],
13
- args["thresh"],
14
- args["write_fsts"],
15
- args["accumulate"],
16
- args["pmass"]
17
- )
18
-
19
- for result in results :
20
- uniques = [model.FindOsym (u) for u in result.Uniques]
21
- return "".join(uniques)
22
-
23
- return ""
24
-
25
 
26
  def phonemizer(text: str):
27
 
28
  if not text:
29
  return ""
30
 
31
- args = {
32
- "token": text,
33
- "word": "word",
34
- "nbest": 1,
35
- "thresh": 10.0,
36
- "write_fsts": False,
37
- "accumulate": False,
38
- "pmass": 0.0,
39
- "beam": 500
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- return f"/{Phoneticize(args)}/"
43
 
44
  iface = gr.Interface(
45
  title="Khmer Phonemizer",
 
1
  import gradio as gr
2
  import phonetisaurus
3
+ from collections import namedtuple
4
+ import re
5
+ import shlex
6
+ import tempfile
7
+ import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def phonemizer(text: str):
10
 
11
  if not text:
12
  return ""
13
 
14
+ env = phonetisaurus.guess_environment()
15
+ model_path = "./model.fst"
16
+
17
+ with tempfile.NamedTemporaryFile(suffix=".txt", mode="w+") as temp_file:
18
+ print(text, file=temp_file)
19
+
20
+ temp_file.seek(0)
21
+
22
+ phonetisaurus_cmd = [
23
+ "phonetisaurus-apply",
24
+ "--model",
25
+ shlex.quote(str(model_path)),
26
+ "--word_list",
27
+ shlex.quote(str(temp_file.name)),
28
+ "--nbest",
29
+ str(1),
30
+ ]
31
+
32
+ result_str = subprocess.check_output(phonetisaurus_cmd, env=env, universal_newlines=True)
33
+ phoneme = result_str.split('\t')[1].strip()
34
+ phoneme = re.sub(r'\s', "", phoneme)
35
+ return f"/{phoneme}/"
36
 
 
37
 
38
  iface = gr.Interface(
39
  title="Khmer Phonemizer",