import random from umsc import UgMultiScriptConverter import string import epitran from difflib import SequenceMatcher ## Global Vars # Lists of Uyghur short and long texts short_texts = [ "سالام", "رەھمەت", "ياخشىمۇسىز", "خۇش كېپسىز", "خەيرلىك كۈن", "خەير خوش" ] long_texts = [ "مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", "يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.", "بىزنىڭ ئۆيدە تۆت تەكچە، تۆتىلىسى تەك-تەكچە", "قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ." ] # Initialize uyghur script converter ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS') # Initialize Epitran for Uyghur (Arabic script) ipa_converter = epitran.Epitran(language_code='uig-Arab') ## Front-End Utils def generate_short_text(script_choice): """Generate a random Uyghur short text based on the type.""" text = random.choice(short_texts) return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text def generate_long_text(script_choice): """Generate a random Uyghur long text based on the type.""" text = random.choice(long_texts) return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text ## ASR Utils def remove_punctuation(text): """Helper function to remove punctuation from text.""" extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Add your additional custom punctuation from the training set here all_punctuation = string.punctuation + extra_punctuation return text.translate(str.maketrans('', '', all_punctuation)) # def load_and_resample_audio(audio_data, target_rate): # """Load audio and resample based on target sample rate""" # if isinstance(audio_data, tuple): # # microphone # sampling_rate, audio_input = audio_data # audio_input = (audio_input / 32768.0).astype(np.float32) # elif isinstance(audio_data, str): # # file upload # audio_input, sampling_rate = torchaudio.load(audio_data) # else: # return "<>".format(type(audio_data)) # # Resample if needed # if sampling_rate != target_rate: # resampler = torchaudio.transforms.Resample(sampling_rate, target_rate) # audio_input = resampler(audio_input) # return audio_input, target_rate def calculate_pronunciation_accuracy(reference_text, output_text, script): """ Calculate pronunciation accuracy between reference and ASR output text using Epitran. Args: reference_text (str): The ground truth text in Uyghur (Arabic script). output_text (str): The ASR output text in Uyghur (Arabic script). language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur). Returns: float: Pronunciation accuracy as a percentage. str: IPA transliteration of the reference text. str: IPA transliteration of the output text. """ if script == 'Uyghur Latin': reference_text = ug_latn_to_arab(reference_text) # make sure input text is arabic script for IPA conversion # Remove punctuation from both texts reference_text_clean = remove_punctuation(reference_text) output_text_clean = remove_punctuation(output_text) # Transliterate both texts to IPA reference_ipa = ipa_converter.transliterate(reference_text_clean) output_ipa = ipa_converter.transliterate(output_text_clean) # Calculate pronunciation accuracy using SequenceMatcher matcher = SequenceMatcher(None, reference_text_clean, output_text_clean) match_ratio = matcher.ratio() # This is the fraction of matching characters # Convert to percentage pronunciation_accuracy = match_ratio * 100 # Generate Markdown-compatible styled text comparison_md = "" for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): ref_segment = reference_text_clean[i1:i2] out_segment = output_text_clean[j1:j2] if opcode == 'equal': # Matching characters comparison_md += f'{ref_segment}' elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing comparison_md += f'{ref_segment}' comparison_md = f"
{comparison_md}
" return reference_ipa, output_ipa, comparison_md, pronunciation_accuracy