|
import random |
|
from umsc import UgMultiScriptConverter |
|
import torchaudio |
|
import string |
|
import epitran |
|
from difflib import SequenceMatcher |
|
|
|
|
|
short_texts = [ |
|
"سالام", "رەھمەت", "ياخشىمۇسىز" |
|
] |
|
long_texts = [ |
|
"مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", |
|
"يېزا مەنزىرىسى ھەقىقەتەن گۈزەل.", |
|
"بىزنىڭ ئۆيدەپ تۆت تەكچە تۆتىلىسى تەكتەكچە" |
|
] |
|
|
|
|
|
def generate_short_text(script_choice): |
|
"""Generate a random Uyghur short text based on the type.""" |
|
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') |
|
|
|
text = random.choice(short_texts) |
|
if script_choice == "Uyghur Latin": |
|
return ug_arab_to_latn(text) |
|
return text |
|
|
|
def generate_long_text(script_choice): |
|
"""Generate a random Uyghur long text based on the type.""" |
|
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS') |
|
|
|
text = random.choice(long_texts) |
|
if script_choice == "Uyghur Latin": |
|
return ug_arab_to_latn(text) |
|
return text |
|
|
|
|
|
def load_and_resample_audio(file_path, target_rate): |
|
"""Load audio and resample based on target sample rate""" |
|
audio_input, sampling_rate = torchaudio.load(file_path) |
|
if sampling_rate != target_rate: |
|
resampler = torchaudio.transforms.Resample(sampling_rate, target_rate) |
|
audio_input = resampler(audio_input) |
|
return audio_input, target_rate |
|
|
|
def calculate_pronunciation_accuracy(reference_text, output_text, language_code='uig-Arab'): |
|
""" |
|
Calculate pronunciation accuracy between reference and ASR output text using Epitran. |
|
|
|
Args: |
|
reference_text (str): The ground truth text in Uyghur (Arabic script). |
|
output_text (str): The ASR output text in Uyghur (Arabic script). |
|
language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur). |
|
|
|
Returns: |
|
float: Pronunciation accuracy as a percentage. |
|
str: IPA transliteration of the reference text. |
|
str: IPA transliteration of the output text. |
|
""" |
|
|
|
ipa_converter = epitran.Epitran(language_code) |
|
|
|
|
|
reference_text_clean = remove_punctuation(reference_text) |
|
output_text_clean = remove_punctuation(output_text) |
|
|
|
|
|
reference_ipa = ipa_converter.transliterate(reference_text_clean) |
|
output_ipa = ipa_converter.transliterate(output_text_clean) |
|
|
|
|
|
matcher = SequenceMatcher(None, reference_ipa, output_ipa) |
|
match_ratio = matcher.ratio() |
|
|
|
|
|
pronunciation_accuracy = match_ratio * 100 |
|
|
|
|
|
comparison_html = "" |
|
for opcode, i1, i2, j1, j2 in matcher.get_opcodes(): |
|
ref_segment = reference_ipa[i1:i2] |
|
out_segment = output_ipa[j1:j2] |
|
|
|
if opcode == 'equal': |
|
comparison_html += f'<span style="color: green">{ref_segment}</span>' |
|
elif opcode == 'replace': |
|
comparison_html += f'<span style="color: red">{ref_segment}</span>' |
|
elif opcode == 'delete': |
|
comparison_html += f'<span style="color: red">{ref_segment}</span>' |
|
elif opcode == 'insert': |
|
comparison_html += f'<span style="color: red">{out_segment}</span>' |
|
|
|
return reference_ipa, output_ipa, comparison_html, pronunciation_accuracy |
|
|
|
def remove_punctuation(text): |
|
"""Helper function to remove punctuation from text.""" |
|
return text.translate(str.maketrans('', '', string.punctuation)) |