Spaces:

ixxan
/

uyghur-pronunciation-checker

Running

App Files Files Community

Irpan commited on Dec 11, 2024

Commit

8e57d14

1 Parent(s): ff91a06

app

Browse files

Files changed (3) hide show

app.py +39 -13
asr.py +2 -2
util.py +21 -18

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ with gr.Blocks() as app:
     with gr.Row():
         # Input Column
         with gr.Column(scale=1):
             with gr.Row():
                 script_choice = gr.Dropdown(
                     choices=["Uyghur Arabic", "Uyghur Latin"],
@@ -15,6 +16,7 @@ with gr.Blocks() as app:
                     value="Uyghur Arabic",
                     interactive=True
                 )
             with gr.Group():
                 with gr.Row():
                     input_text = gr.Textbox(
@@ -25,15 +27,34 @@ with gr.Blocks() as app:
                 with gr.Row():
                     generate_short_btn = gr.Button("Generate Short Text")
                     generate_long_btn = gr.Button("Generate Long Text")
             with gr.Group():
                 with gr.Row():
-                    example_audio = gr.Audio(label="3. (Optional) Generate AI Pronunciation for Text")
                 with gr.Row():
                     tts_btn = gr.Button("Generate AI Pronunciation")
             with gr.Group():
                 with gr.Row():
                     user_audio = gr.Audio(
-                        label="4. Record or Upload Your Pronunciation",
                         sources=["microphone", "upload"],
                         type="filepath",
                     )
@@ -42,20 +63,19 @@ with gr.Blocks() as app:
         # Output Column
         with gr.Column(scale=1):
-            # Group transcripts together
             with gr.Group():
                 with gr.Row():
                     transcript_ugArab_box = gr.Textbox(
-                        label="User Transcript (Uyghur Arabic)",
                         placeholder="ASR transcription of user audio..."
                     )
                 with gr.Row():
                     transcript_ugLatn_box = gr.Textbox(
-                        label="User Transcript (Uyghur Latin)",
                         placeholder="ASR transcription of user audio..."
                     )
-            # Group AI and user pronunciation
             with gr.Group():
                 with gr.Row():
                     correct_phoneme_box = gr.Textbox(
@@ -67,19 +87,19 @@ with gr.Blocks() as app:
                         label="User Phonemes",
                         placeholder="IPA representation of the user pronunciation..."
                     )
             with gr.Group():
-                with gr.Row():
-                    score_box = gr.Textbox(
-                        label="Phonetic Score",
-                        placeholder="Your pronunciation score as a percentage..."
-                    )
                 with gr.Row():
                     match_box = gr.Markdown(
                         """<h4>Pronunciation Feedback</h4>\n
                         Matching and mismatched characters will be visualized here...
                         """
                     )
     # Bind functions to buttons
     generate_short_btn.click(
@@ -93,6 +113,12 @@ with gr.Blocks() as app:
         inputs=[script_choice],
         outputs=[input_text]
     )
     tts_btn.click(
         tts.generate_audio,

     with gr.Row():
         # Input Column
         with gr.Column(scale=1):
+            # Script
             with gr.Row():
                 script_choice = gr.Dropdown(
                     choices=["Uyghur Arabic", "Uyghur Latin"],
                     value="Uyghur Arabic",
                     interactive=True
                 )
+            # Text
             with gr.Group():
                 with gr.Row():
                     input_text = gr.Textbox(
                 with gr.Row():
                     generate_short_btn = gr.Button("Generate Short Text")
                     generate_long_btn = gr.Button("Generate Long Text")
+            # Translations
+            with gr.Group():
+                with gr.Row():
+                    translation_text = gr.Textbox(
+                        label="(Optional) Translate Uyghur Text",
+                        placeholder="Translated text here...",
+                    )
+                # Add buttons for generating short and long texts
+                with gr.Row():
+                    with gr.Column():
+                        translate_choice = gr.Dropdown(
+                            choices=util.translation_choices,
+                            label="Select Translation Langauge",
+                            value="english",
+                            interactive=True
+                        )
+                        translate_btn = gr.Button("Translate")
+            # TTS
             with gr.Group():
                 with gr.Row():
+                    example_audio = gr.Audio(label="(Optional) Generate AI Pronunciation for Text")
                 with gr.Row():
                     tts_btn = gr.Button("Generate AI Pronunciation")
+            # ASR
             with gr.Group():
                 with gr.Row():
                     user_audio = gr.Audio(
+                        label="3. Record or Upload Your Pronunciation",
                         sources=["microphone", "upload"],
                         type="filepath",
                     )
         # Output Column
         with gr.Column(scale=1):
+            # ASR Transcripts
             with gr.Group():
                 with gr.Row():
                     transcript_ugArab_box = gr.Textbox(
+                        label="User Transcript (Arabic Script)",
                         placeholder="ASR transcription of user audio..."
                     )
                 with gr.Row():
                     transcript_ugLatn_box = gr.Textbox(
+                        label="User Transcript (Latin Script)",
                         placeholder="ASR transcription of user audio..."
                     )
+            # IPA
             with gr.Group():
                 with gr.Row():
                     correct_phoneme_box = gr.Textbox(
                         label="User Phonemes",
                         placeholder="IPA representation of the user pronunciation..."
                     )
+            # Feedback
             with gr.Group():
                 with gr.Row():
                     match_box = gr.Markdown(
                         """<h4>Pronunciation Feedback</h4>\n
                         Matching and mismatched characters will be visualized here...
                         """
                     )
+                with gr.Row():
+                    score_box = gr.Textbox(
+                        label="Phonetic Score",
+                        placeholder="Your pronunciation score as a percentage..."
+                    )
     # Bind functions to buttons
     generate_short_btn.click(
         inputs=[script_choice],
         outputs=[input_text]
     )
+    translate_btn.click(
+        util.translate_text,
+        inputs=[input_text, script_choice, translate_choice],
+        outputs=[translation_text]
+    )
     tts_btn.click(
         tts.generate_audio,

asr.py CHANGED Viewed

@@ -42,7 +42,7 @@ def asr(audio_data, target_rate = 16000):
     return transcript
-def check_pronunciation(input_text, script, user_audio):
     # Transcripts from user input audio
     transcript_ugLatn_box = asr(user_audio)
     transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
@@ -51,6 +51,6 @@ def check_pronunciation(input_text, script, user_audio):
     correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
-        script=script)
     return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score

     return transcript
+def check_pronunciation(input_text, script_choice, user_audio):
     # Transcripts from user input audio
     transcript_ugLatn_box = asr(user_audio)
     transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
     correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
+        script_choice=script_choice)
     return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score

util.py CHANGED Viewed

@@ -1,11 +1,13 @@
 import random
 from umsc import UgMultiScriptConverter
 import string
 import epitran
 from difflib import SequenceMatcher
-# import httpcore
-# setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
 ## Global Vars
 # Lists of Uyghur short and long texts
@@ -19,6 +21,10 @@ long_texts = [
     "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
 ]
 # Initialize uyghur script converter
 ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
 ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
@@ -26,7 +32,6 @@ ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
 # Initialize Epitran for Uyghur (Arabic script)
 ipa_converter = epitran.Epitran('uig-Arab')
 ## Front-End Utils
 def generate_short_text(script_choice):
     """Generate a random Uyghur short text based on the type."""
@@ -38,11 +43,19 @@ def generate_long_text(script_choice):
     text = random.choice(long_texts)
     return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
 ## ASR Utils
 def remove_punctuation(text):
   """Helper function to remove punctuation from text."""
-  extra_punctuation = "–؛;،؟?«»‹›−—¬”“"  # Add your additional custom punctuation from the training set here
   all_punctuation = string.punctuation + extra_punctuation
   return text.translate(str.maketrans('', '', all_punctuation))
@@ -65,23 +78,13 @@ def remove_punctuation(text):
 #     return audio_input, target_rate
-def calculate_pronunciation_accuracy(reference_text, output_text, script):
     """
     Calculate pronunciation accuracy between reference and ASR output text using Epitran.
-    Args:
-        reference_text (str): The ground truth text in Uyghur (Arabic script).
-        output_text (str): The ASR output text in Uyghur (Arabic script).
-        language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).
-    Returns:
-        float: Pronunciation accuracy as a percentage.
-        str: IPA transliteration of the reference text.
-        str: IPA transliteration of the output text.
     """
     # make sure input text is arabic script for IPA conversion
-    if script == 'Uyghur Latin':
         reference_text = ug_latn_to_arab(reference_text)
     # Remove punctuation from both texts
@@ -100,7 +103,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
     pronunciation_accuracy = match_ratio * 100
     # Convert reference back to original script for feedback output
-    if script == 'Uyghur Latin':
         reference_text_clean = ug_arab_to_latn(reference_text_clean)
      # Generate Markdown-compatible styled text
     comparison_md = "<h4>Pronunciation Feedback</h4>\n"  # Small header
@@ -110,7 +113,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
         out_segment = output_text_clean[j1:j2]
         if opcode == 'equal':  # Matching characters
-            comparison_md += f'<span style="color: blue; font-size: 20px;">{ref_segment}</span>'
         elif opcode in ['replace', 'delete', 'insert']:  # Mismatched or missing
             comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
     comparison_md += "</div>"

 import random
 from umsc import UgMultiScriptConverter
+from googletrans import Translator, LANGCODES
 import string
 import epitran
 from difflib import SequenceMatcher
+# For googletrans 4.0.0-rc1
+import httpcore
+setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
 ## Global Vars
 # Lists of Uyghur short and long texts
     "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
 ]
+# Initialize the translator
+translator = Translator()
+translation_choices = [L for L in LANGCODES]
 # Initialize uyghur script converter
 ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
 ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
 # Initialize Epitran for Uyghur (Arabic script)
 ipa_converter = epitran.Epitran('uig-Arab')
 ## Front-End Utils
 def generate_short_text(script_choice):
     """Generate a random Uyghur short text based on the type."""
     text = random.choice(long_texts)
     return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
+def translate_text(input_text, script_choice, target_language):
+    """
+    Translate Uyghur text to the target language
+    """
+    if script_choice == 'Uyghur Latin':
+        input_text = ug_latn_to_arab(input_text)
+    translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language])
+    return translated_text.text
 ## ASR Utils
 def remove_punctuation(text):
   """Helper function to remove punctuation from text."""
+  extra_punctuation = "–؛;،؟?«»‹›−—¬”“"  # Additional custom uyghur punctuation
   all_punctuation = string.punctuation + extra_punctuation
   return text.translate(str.maketrans('', '', all_punctuation))
 #     return audio_input, target_rate
+def calculate_pronunciation_accuracy(reference_text, output_text, script_choice):
     """
     Calculate pronunciation accuracy between reference and ASR output text using Epitran.
     """
     # make sure input text is arabic script for IPA conversion
+    if script_choice == 'Uyghur Latin':
         reference_text = ug_latn_to_arab(reference_text)
     # Remove punctuation from both texts
     pronunciation_accuracy = match_ratio * 100
     # Convert reference back to original script for feedback output
+    if script_choice == 'Uyghur Latin':
         reference_text_clean = ug_arab_to_latn(reference_text_clean)
      # Generate Markdown-compatible styled text
     comparison_md = "<h4>Pronunciation Feedback</h4>\n"  # Small header
         out_segment = output_text_clean[j1:j2]
         if opcode == 'equal':  # Matching characters
+            comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>'
         elif opcode in ['replace', 'delete', 'insert']:  # Mismatched or missing
             comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
     comparison_md += "</div>"