Irpan commited on
Commit
8e57d14
·
1 Parent(s): ff91a06
Files changed (3) hide show
  1. app.py +39 -13
  2. asr.py +2 -2
  3. util.py +21 -18
app.py CHANGED
@@ -8,6 +8,7 @@ with gr.Blocks() as app:
8
  with gr.Row():
9
  # Input Column
10
  with gr.Column(scale=1):
 
11
  with gr.Row():
12
  script_choice = gr.Dropdown(
13
  choices=["Uyghur Arabic", "Uyghur Latin"],
@@ -15,6 +16,7 @@ with gr.Blocks() as app:
15
  value="Uyghur Arabic",
16
  interactive=True
17
  )
 
18
  with gr.Group():
19
  with gr.Row():
20
  input_text = gr.Textbox(
@@ -25,15 +27,34 @@ with gr.Blocks() as app:
25
  with gr.Row():
26
  generate_short_btn = gr.Button("Generate Short Text")
27
  generate_long_btn = gr.Button("Generate Long Text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  with gr.Group():
29
  with gr.Row():
30
- example_audio = gr.Audio(label="3. (Optional) Generate AI Pronunciation for Text")
31
  with gr.Row():
32
  tts_btn = gr.Button("Generate AI Pronunciation")
 
33
  with gr.Group():
34
  with gr.Row():
35
  user_audio = gr.Audio(
36
- label="4. Record or Upload Your Pronunciation",
37
  sources=["microphone", "upload"],
38
  type="filepath",
39
  )
@@ -42,20 +63,19 @@ with gr.Blocks() as app:
42
 
43
  # Output Column
44
  with gr.Column(scale=1):
45
- # Group transcripts together
46
  with gr.Group():
47
  with gr.Row():
48
  transcript_ugArab_box = gr.Textbox(
49
- label="User Transcript (Uyghur Arabic)",
50
  placeholder="ASR transcription of user audio..."
51
  )
52
  with gr.Row():
53
  transcript_ugLatn_box = gr.Textbox(
54
- label="User Transcript (Uyghur Latin)",
55
  placeholder="ASR transcription of user audio..."
56
  )
57
-
58
- # Group AI and user pronunciation
59
  with gr.Group():
60
  with gr.Row():
61
  correct_phoneme_box = gr.Textbox(
@@ -67,19 +87,19 @@ with gr.Blocks() as app:
67
  label="User Phonemes",
68
  placeholder="IPA representation of the user pronunciation..."
69
  )
70
-
71
  with gr.Group():
72
- with gr.Row():
73
- score_box = gr.Textbox(
74
- label="Phonetic Score",
75
- placeholder="Your pronunciation score as a percentage..."
76
- )
77
  with gr.Row():
78
  match_box = gr.Markdown(
79
  """<h4>Pronunciation Feedback</h4>\n
80
  Matching and mismatched characters will be visualized here...
81
  """
82
  )
 
 
 
 
 
83
 
84
  # Bind functions to buttons
85
  generate_short_btn.click(
@@ -93,6 +113,12 @@ with gr.Blocks() as app:
93
  inputs=[script_choice],
94
  outputs=[input_text]
95
  )
 
 
 
 
 
 
96
 
97
  tts_btn.click(
98
  tts.generate_audio,
 
8
  with gr.Row():
9
  # Input Column
10
  with gr.Column(scale=1):
11
+ # Script
12
  with gr.Row():
13
  script_choice = gr.Dropdown(
14
  choices=["Uyghur Arabic", "Uyghur Latin"],
 
16
  value="Uyghur Arabic",
17
  interactive=True
18
  )
19
+ # Text
20
  with gr.Group():
21
  with gr.Row():
22
  input_text = gr.Textbox(
 
27
  with gr.Row():
28
  generate_short_btn = gr.Button("Generate Short Text")
29
  generate_long_btn = gr.Button("Generate Long Text")
30
+ # Translations
31
+ with gr.Group():
32
+ with gr.Row():
33
+ translation_text = gr.Textbox(
34
+ label="(Optional) Translate Uyghur Text",
35
+ placeholder="Translated text here...",
36
+ )
37
+ # Add buttons for generating short and long texts
38
+ with gr.Row():
39
+ with gr.Column():
40
+ translate_choice = gr.Dropdown(
41
+ choices=util.translation_choices,
42
+ label="Select Translation Langauge",
43
+ value="english",
44
+ interactive=True
45
+ )
46
+ translate_btn = gr.Button("Translate")
47
+ # TTS
48
  with gr.Group():
49
  with gr.Row():
50
+ example_audio = gr.Audio(label="(Optional) Generate AI Pronunciation for Text")
51
  with gr.Row():
52
  tts_btn = gr.Button("Generate AI Pronunciation")
53
+ # ASR
54
  with gr.Group():
55
  with gr.Row():
56
  user_audio = gr.Audio(
57
+ label="3. Record or Upload Your Pronunciation",
58
  sources=["microphone", "upload"],
59
  type="filepath",
60
  )
 
63
 
64
  # Output Column
65
  with gr.Column(scale=1):
66
+ # ASR Transcripts
67
  with gr.Group():
68
  with gr.Row():
69
  transcript_ugArab_box = gr.Textbox(
70
+ label="User Transcript (Arabic Script)",
71
  placeholder="ASR transcription of user audio..."
72
  )
73
  with gr.Row():
74
  transcript_ugLatn_box = gr.Textbox(
75
+ label="User Transcript (Latin Script)",
76
  placeholder="ASR transcription of user audio..."
77
  )
78
+ # IPA
 
79
  with gr.Group():
80
  with gr.Row():
81
  correct_phoneme_box = gr.Textbox(
 
87
  label="User Phonemes",
88
  placeholder="IPA representation of the user pronunciation..."
89
  )
90
+ # Feedback
91
  with gr.Group():
 
 
 
 
 
92
  with gr.Row():
93
  match_box = gr.Markdown(
94
  """<h4>Pronunciation Feedback</h4>\n
95
  Matching and mismatched characters will be visualized here...
96
  """
97
  )
98
+ with gr.Row():
99
+ score_box = gr.Textbox(
100
+ label="Phonetic Score",
101
+ placeholder="Your pronunciation score as a percentage..."
102
+ )
103
 
104
  # Bind functions to buttons
105
  generate_short_btn.click(
 
113
  inputs=[script_choice],
114
  outputs=[input_text]
115
  )
116
+
117
+ translate_btn.click(
118
+ util.translate_text,
119
+ inputs=[input_text, script_choice, translate_choice],
120
+ outputs=[translation_text]
121
+ )
122
 
123
  tts_btn.click(
124
  tts.generate_audio,
asr.py CHANGED
@@ -42,7 +42,7 @@ def asr(audio_data, target_rate = 16000):
42
  return transcript
43
 
44
 
45
- def check_pronunciation(input_text, script, user_audio):
46
  # Transcripts from user input audio
47
  transcript_ugLatn_box = asr(user_audio)
48
  transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
@@ -51,6 +51,6 @@ def check_pronunciation(input_text, script, user_audio):
51
  correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
52
  reference_text = input_text,
53
  output_text = transcript_ugArab_box,
54
- script=script)
55
 
56
  return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score
 
42
  return transcript
43
 
44
 
45
+ def check_pronunciation(input_text, script_choice, user_audio):
46
  # Transcripts from user input audio
47
  transcript_ugLatn_box = asr(user_audio)
48
  transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
 
51
  correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
52
  reference_text = input_text,
53
  output_text = transcript_ugArab_box,
54
+ script_choice=script_choice)
55
 
56
  return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score
util.py CHANGED
@@ -1,11 +1,13 @@
1
  import random
2
  from umsc import UgMultiScriptConverter
 
3
  import string
4
  import epitran
5
  from difflib import SequenceMatcher
6
 
7
- # import httpcore
8
- # setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
 
9
 
10
  ## Global Vars
11
  # Lists of Uyghur short and long texts
@@ -19,6 +21,10 @@ long_texts = [
19
  "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
20
  ]
21
 
 
 
 
 
22
  # Initialize uyghur script converter
23
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
24
  ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
@@ -26,7 +32,6 @@ ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
26
  # Initialize Epitran for Uyghur (Arabic script)
27
  ipa_converter = epitran.Epitran('uig-Arab')
28
 
29
-
30
  ## Front-End Utils
31
  def generate_short_text(script_choice):
32
  """Generate a random Uyghur short text based on the type."""
@@ -38,11 +43,19 @@ def generate_long_text(script_choice):
38
  text = random.choice(long_texts)
39
  return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
40
 
 
 
 
 
 
 
 
 
41
 
42
  ## ASR Utils
43
  def remove_punctuation(text):
44
  """Helper function to remove punctuation from text."""
45
- extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Add your additional custom punctuation from the training set here
46
  all_punctuation = string.punctuation + extra_punctuation
47
 
48
  return text.translate(str.maketrans('', '', all_punctuation))
@@ -65,23 +78,13 @@ def remove_punctuation(text):
65
 
66
  # return audio_input, target_rate
67
 
68
- def calculate_pronunciation_accuracy(reference_text, output_text, script):
69
  """
70
  Calculate pronunciation accuracy between reference and ASR output text using Epitran.
71
-
72
- Args:
73
- reference_text (str): The ground truth text in Uyghur (Arabic script).
74
- output_text (str): The ASR output text in Uyghur (Arabic script).
75
- language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).
76
-
77
- Returns:
78
- float: Pronunciation accuracy as a percentage.
79
- str: IPA transliteration of the reference text.
80
- str: IPA transliteration of the output text.
81
  """
82
 
83
  # make sure input text is arabic script for IPA conversion
84
- if script == 'Uyghur Latin':
85
  reference_text = ug_latn_to_arab(reference_text)
86
 
87
  # Remove punctuation from both texts
@@ -100,7 +103,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
100
  pronunciation_accuracy = match_ratio * 100
101
 
102
  # Convert reference back to original script for feedback output
103
- if script == 'Uyghur Latin':
104
  reference_text_clean = ug_arab_to_latn(reference_text_clean)
105
  # Generate Markdown-compatible styled text
106
  comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header
@@ -110,7 +113,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
110
  out_segment = output_text_clean[j1:j2]
111
 
112
  if opcode == 'equal': # Matching characters
113
- comparison_md += f'<span style="color: blue; font-size: 20px;">{ref_segment}</span>'
114
  elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
115
  comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
116
  comparison_md += "</div>"
 
1
  import random
2
  from umsc import UgMultiScriptConverter
3
+ from googletrans import Translator, LANGCODES
4
  import string
5
  import epitran
6
  from difflib import SequenceMatcher
7
 
8
+ # For googletrans 4.0.0-rc1
9
+ import httpcore
10
+ setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
11
 
12
  ## Global Vars
13
  # Lists of Uyghur short and long texts
 
21
  "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
22
  ]
23
 
24
+ # Initialize the translator
25
+ translator = Translator()
26
+ translation_choices = [L for L in LANGCODES]
27
+
28
  # Initialize uyghur script converter
29
  ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
30
  ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
 
32
  # Initialize Epitran for Uyghur (Arabic script)
33
  ipa_converter = epitran.Epitran('uig-Arab')
34
 
 
35
  ## Front-End Utils
36
  def generate_short_text(script_choice):
37
  """Generate a random Uyghur short text based on the type."""
 
43
  text = random.choice(long_texts)
44
  return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
45
 
46
+ def translate_text(input_text, script_choice, target_language):
47
+ """
48
+ Translate Uyghur text to the target language
49
+ """
50
+ if script_choice == 'Uyghur Latin':
51
+ input_text = ug_latn_to_arab(input_text)
52
+ translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language])
53
+ return translated_text.text
54
 
55
  ## ASR Utils
56
  def remove_punctuation(text):
57
  """Helper function to remove punctuation from text."""
58
+ extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Additional custom uyghur punctuation
59
  all_punctuation = string.punctuation + extra_punctuation
60
 
61
  return text.translate(str.maketrans('', '', all_punctuation))
 
78
 
79
  # return audio_input, target_rate
80
 
81
+ def calculate_pronunciation_accuracy(reference_text, output_text, script_choice):
82
  """
83
  Calculate pronunciation accuracy between reference and ASR output text using Epitran.
 
 
 
 
 
 
 
 
 
 
84
  """
85
 
86
  # make sure input text is arabic script for IPA conversion
87
+ if script_choice == 'Uyghur Latin':
88
  reference_text = ug_latn_to_arab(reference_text)
89
 
90
  # Remove punctuation from both texts
 
103
  pronunciation_accuracy = match_ratio * 100
104
 
105
  # Convert reference back to original script for feedback output
106
+ if script_choice == 'Uyghur Latin':
107
  reference_text_clean = ug_arab_to_latn(reference_text_clean)
108
  # Generate Markdown-compatible styled text
109
  comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header
 
113
  out_segment = output_text_clean[j1:j2]
114
 
115
  if opcode == 'equal': # Matching characters
116
+ comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>'
117
  elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
118
  comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
119
  comparison_md += "</div>"