Spaces:

ixxan
/

uyghur-pronunciation-checker

Sleeping

App Files Files Community

Abdurahman commited on Dec 11, 2024

Commit

f0e249a

1 Parent(s): f4d4d8e

app

Browse files

Files changed (3) hide show

asr.py +2 -7
tts.py +2 -3
util.py +23 -11

asr.py CHANGED Viewed

@@ -2,7 +2,6 @@ import numpy as np
 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import torch
 import torchaudio
-from umsc import UgMultiScriptConverter
 import util
 # Model ID and setup
@@ -46,16 +45,12 @@ def asr(audio_data, target_rate = 16000):
 def check_pronunciation(input_text, script, user_audio):
     # Transcripts from user input audio
     transcript_ugLatn_box = asr(user_audio)
-    ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
-    transcript_ugArab_box = ug_latn_to_arab(transcript_ugLatn_box)
-    if script == 'Uyghur Latin':
-        input_text = ug_latn_to_arab(input_text) # make sure input text is arabic script for IPA conversion
     # Get IPA and Pronunciation Feedback
     machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
-        language_code='uig-Arab')
     return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score

 from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
 import torch
 import torchaudio
 import util
 # Model ID and setup
 def check_pronunciation(input_text, script, user_audio):
     # Transcripts from user input audio
     transcript_ugLatn_box = asr(user_audio)
+    transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
     # Get IPA and Pronunciation Feedback
     machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
         reference_text = input_text,
         output_text = transcript_ugArab_box,
+        script=script)
     return transcript_ugArab_box, transcript_ugLatn_box, machine_pronunciation, user_pronunciation, pronunciation_match, pronunciation_score

tts.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from transformers import VitsModel, AutoTokenizer
 import torch
-from umsc import UgMultiScriptConverter
 import scipy.io.wavfile
 # Model ID and setup
 model_id = "facebook/mms-tts-uig-script_arabic"
@@ -17,9 +17,8 @@ def generate_audio(input_text, script):
     Generate audio for the given input text and script
     """
     # Convert text to Uyghur Arabic if needed
-    ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
     if script != "Uyghur Arabic":
-        input_text = ug_latn_to_arab(input_text)
     # Tokenize and move inputs to the same device as the model
     tts_inputs = tts_tokenizer(input_text, return_tensors="pt").to(device)

 from transformers import VitsModel, AutoTokenizer
 import torch
 import scipy.io.wavfile
+import util
 # Model ID and setup
 model_id = "facebook/mms-tts-uig-script_arabic"
     Generate audio for the given input text and script
     """
     # Convert text to Uyghur Arabic if needed
     if script != "Uyghur Arabic":
+        input_text = util.ug_latn_to_arab(input_text)
     # Tokenize and move inputs to the same device as the model
     tts_inputs = tts_tokenizer(input_text, return_tensors="pt").to(device)

util.py CHANGED Viewed

@@ -4,6 +4,7 @@ import string
 import epitran
 from difflib import SequenceMatcher
 # Lists of Uyghur short and long texts
 short_texts = [
     "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
@@ -15,8 +16,15 @@ long_texts = [
     "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
 ]
-# Front-End Utils
 ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
 def generate_short_text(script_choice):
     """Generate a random Uyghur short text based on the type."""
     text = random.choice(short_texts)
@@ -27,7 +35,15 @@ def generate_long_text(script_choice):
     text = random.choice(long_texts)
     return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
-# ASR Utils
 # def load_and_resample_audio(audio_data, target_rate):
 #     """Load audio and resample based on target sample rate"""
 #     if isinstance(audio_data, tuple):
@@ -46,7 +62,7 @@ def generate_long_text(script_choice):
 #     return audio_input, target_rate
-def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'):
     """
     Calculate pronunciation accuracy between reference and ASR output text using Epitran.
@@ -60,8 +76,10 @@ def calculate_pronunciation_accuracy(reference_text, output_text, language_code=
         str: IPA transliteration of the reference text.
         str: IPA transliteration of the output text.
     """
-    # Initialize Epitran for Uyghur (Arabic script)
-    ipa_converter = epitran.Epitran(language_code)
     # Remove punctuation from both texts
     reference_text_clean = remove_punctuation(reference_text)
@@ -93,9 +111,3 @@ def calculate_pronunciation_accuracy(reference_text, output_text, language_code=
     return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy
-def remove_punctuation(text):
-  """Helper function to remove punctuation from text."""
-  extra_punctuation = "–؛;،؟?«»‹›−—¬”“"  # Add your additional custom punctuation from the training set here
-  all_punctuation = string.punctuation + extra_punctuation
-  return text.translate(str.maketrans('', '', all_punctuation))

 import epitran
 from difflib import SequenceMatcher
+## Global Vars
 # Lists of Uyghur short and long texts
 short_texts = [
     "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش"
     "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
 ]
+# Initialize uyghur script converter
 ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
+ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
+# Initialize Epitran for Uyghur (Arabic script)
+ipa_converter = epitran.Epitran(language_code='uig-Arab')
+## Front-End Utils
 def generate_short_text(script_choice):
     """Generate a random Uyghur short text based on the type."""
     text = random.choice(short_texts)
     text = random.choice(long_texts)
     return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
+## ASR Utils
+def remove_punctuation(text):
+  """Helper function to remove punctuation from text."""
+  extra_punctuation = "–؛;،؟?«»‹›−—¬”“"  # Add your additional custom punctuation from the training set here
+  all_punctuation = string.punctuation + extra_punctuation
+  return text.translate(str.maketrans('', '', all_punctuation))
 # def load_and_resample_audio(audio_data, target_rate):
 #     """Load audio and resample based on target sample rate"""
 #     if isinstance(audio_data, tuple):
 #     return audio_input, target_rate
+def calculate_pronunciation_accuracy(reference_text, output_text, script):
     """
     Calculate pronunciation accuracy between reference and ASR output text using Epitran.
         str: IPA transliteration of the reference text.
         str: IPA transliteration of the output text.
     """
+    if script == 'Uyghur Latin':
+        reference_text = ug_latn_to_arab(reference_text) # make sure input text is arabic script for IPA conversion
     # Remove punctuation from both texts
     reference_text_clean = remove_punctuation(reference_text)
     return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy