Spaces:
Sleeping
Sleeping
import random | |
from umsc import UgMultiScriptConverter | |
import torchaudio | |
import string | |
import epitran | |
from difflib import SequenceMatcher | |
# Lists of Uyghur short and long texts | |
short_texts = [ | |
"سالام", "رەھمەت", "ياخشىمۇسىز" | |
] | |
long_texts = [ | |
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", | |
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.", | |
"بىزنىڭ ئۆيدەپ تۆت تەكچە تۆتىلىسى تەكتەكچە" | |
] | |
# Front-End Utils | |
def generate_short_text(script_choice): | |
"""Generate a random Uyghur short text based on the type.""" | |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') | |
text = random.choice(short_texts) | |
if script_choice == "Uyghur Latin": | |
return ug_arab_to_latn(text) | |
return text | |
def generate_long_text(script_choice): | |
"""Generate a random Uyghur long text based on the type.""" | |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') | |
text = random.choice(long_texts) | |
if script_choice == "Uyghur Latin": | |
return ug_arab_to_latn(text) | |
return text | |
# ASR Utils | |
def load_and_resample_audio(file_path, target_rate): | |
"""Load audio and resample based on target sample rate""" | |
audio_input, sampling_rate = torchaudio.load(file_path) | |
if sampling_rate != target_rate: | |
resampler = torchaudio.transforms.Resample(sampling_rate, target_rate) | |
audio_input = resampler(audio_input) | |
return audio_input, target_rate | |
def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'): | |
""" | |
Calculate pronunciation accuracy between reference and ASR output text using Epitran. | |
Args: | |
reference_text (str): The ground truth text in Uyghur (Arabic script). | |
output_text (str): The ASR output text in Uyghur (Arabic script). | |
language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur). | |
Returns: | |
float: Pronunciation accuracy as a percentage. | |
str: IPA transliteration of the reference text. | |
str: IPA transliteration of the output text. | |
""" | |
# Initialize Epitran for Uyghur (Arabic script) | |
ipa_converter = epitran.Epitran(language_code) | |
# Remove punctuation from both texts | |
reference_text_clean = remove_punctuation(reference_text) | |
output_text_clean = remove_punctuation(output_text) | |
# Transliterate both texts to IPA | |
reference_ipa = ipa_converter.transliterate(reference_text_clean) | |
output_ipa = ipa_converter.transliterate(output_text_clean) | |
# Calculate pronunciation accuracy using SequenceMatcher | |
matcher = SequenceMatcher(None, reference_ipa, output_ipa) | |
match_ratio = matcher.ratio() # This is the fraction of matching characters | |
# Convert to percentage | |
pronunciation_accuracy = match_ratio * 100 | |
# Generate HTML for comparison | |
comparison_html = "" | |
for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): | |
ref_segment = reference_ipa[i1:i2] | |
out_segment = output_ipa[j1:j2] | |
if opcode == 'equal': # Matching characters | |
comparison_html += f'<span style="color: green">{ref_segment}</span>' | |
elif opcode == 'replace': # Mismatched characters | |
comparison_html += f'<span style="color: red">{ref_segment}</span>' | |
elif opcode == 'delete': # Characters in reference but not in output | |
comparison_html += f'<span style="color: red">{ref_segment}</span>' | |
elif opcode == 'insert': # Characters in output but not in reference | |
comparison_html += f'<span style="color: red">{out_segment}</span>' | |
return reference_ipa, output_ipa, comparison_html, pronunciation_accuracy | |
def remove_punctuation(text): | |
"""Helper function to remove punctuation from text.""" | |
return text.translate(str.maketrans('', '', string.punctuation)) |