Irpan
commited on
Commit
·
8e57d14
1
Parent(s):
ff91a06
app
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ with gr.Blocks() as app:
|
|
8 |
with gr.Row():
|
9 |
# Input Column
|
10 |
with gr.Column(scale=1):
|
|
|
11 |
with gr.Row():
|
12 |
script_choice = gr.Dropdown(
|
13 |
choices=["Uyghur Arabic", "Uyghur Latin"],
|
@@ -15,6 +16,7 @@ with gr.Blocks() as app:
|
|
15 |
value="Uyghur Arabic",
|
16 |
interactive=True
|
17 |
)
|
|
|
18 |
with gr.Group():
|
19 |
with gr.Row():
|
20 |
input_text = gr.Textbox(
|
@@ -25,15 +27,34 @@ with gr.Blocks() as app:
|
|
25 |
with gr.Row():
|
26 |
generate_short_btn = gr.Button("Generate Short Text")
|
27 |
generate_long_btn = gr.Button("Generate Long Text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
with gr.Group():
|
29 |
with gr.Row():
|
30 |
-
example_audio = gr.Audio(label="
|
31 |
with gr.Row():
|
32 |
tts_btn = gr.Button("Generate AI Pronunciation")
|
|
|
33 |
with gr.Group():
|
34 |
with gr.Row():
|
35 |
user_audio = gr.Audio(
|
36 |
-
label="
|
37 |
sources=["microphone", "upload"],
|
38 |
type="filepath",
|
39 |
)
|
@@ -42,20 +63,19 @@ with gr.Blocks() as app:
|
|
42 |
|
43 |
# Output Column
|
44 |
with gr.Column(scale=1):
|
45 |
-
#
|
46 |
with gr.Group():
|
47 |
with gr.Row():
|
48 |
transcript_ugArab_box = gr.Textbox(
|
49 |
-
label="User Transcript (
|
50 |
placeholder="ASR transcription of user audio..."
|
51 |
)
|
52 |
with gr.Row():
|
53 |
transcript_ugLatn_box = gr.Textbox(
|
54 |
-
label="User Transcript (
|
55 |
placeholder="ASR transcription of user audio..."
|
56 |
)
|
57 |
-
|
58 |
-
# Group AI and user pronunciation
|
59 |
with gr.Group():
|
60 |
with gr.Row():
|
61 |
correct_phoneme_box = gr.Textbox(
|
@@ -67,19 +87,19 @@ with gr.Blocks() as app:
|
|
67 |
label="User Phonemes",
|
68 |
placeholder="IPA representation of the user pronunciation..."
|
69 |
)
|
70 |
-
|
71 |
with gr.Group():
|
72 |
-
with gr.Row():
|
73 |
-
score_box = gr.Textbox(
|
74 |
-
label="Phonetic Score",
|
75 |
-
placeholder="Your pronunciation score as a percentage..."
|
76 |
-
)
|
77 |
with gr.Row():
|
78 |
match_box = gr.Markdown(
|
79 |
"""<h4>Pronunciation Feedback</h4>\n
|
80 |
Matching and mismatched characters will be visualized here...
|
81 |
"""
|
82 |
)
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# Bind functions to buttons
|
85 |
generate_short_btn.click(
|
@@ -93,6 +113,12 @@ with gr.Blocks() as app:
|
|
93 |
inputs=[script_choice],
|
94 |
outputs=[input_text]
|
95 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
tts_btn.click(
|
98 |
tts.generate_audio,
|
|
|
8 |
with gr.Row():
|
9 |
# Input Column
|
10 |
with gr.Column(scale=1):
|
11 |
+
# Script
|
12 |
with gr.Row():
|
13 |
script_choice = gr.Dropdown(
|
14 |
choices=["Uyghur Arabic", "Uyghur Latin"],
|
|
|
16 |
value="Uyghur Arabic",
|
17 |
interactive=True
|
18 |
)
|
19 |
+
# Text
|
20 |
with gr.Group():
|
21 |
with gr.Row():
|
22 |
input_text = gr.Textbox(
|
|
|
27 |
with gr.Row():
|
28 |
generate_short_btn = gr.Button("Generate Short Text")
|
29 |
generate_long_btn = gr.Button("Generate Long Text")
|
30 |
+
# Translations
|
31 |
+
with gr.Group():
|
32 |
+
with gr.Row():
|
33 |
+
translation_text = gr.Textbox(
|
34 |
+
label="(Optional) Translate Uyghur Text",
|
35 |
+
placeholder="Translated text here...",
|
36 |
+
)
|
37 |
+
# Add buttons for generating short and long texts
|
38 |
+
with gr.Row():
|
39 |
+
with gr.Column():
|
40 |
+
translate_choice = gr.Dropdown(
|
41 |
+
choices=util.translation_choices,
|
42 |
+
label="Select Translation Langauge",
|
43 |
+
value="english",
|
44 |
+
interactive=True
|
45 |
+
)
|
46 |
+
translate_btn = gr.Button("Translate")
|
47 |
+
# TTS
|
48 |
with gr.Group():
|
49 |
with gr.Row():
|
50 |
+
example_audio = gr.Audio(label="(Optional) Generate AI Pronunciation for Text")
|
51 |
with gr.Row():
|
52 |
tts_btn = gr.Button("Generate AI Pronunciation")
|
53 |
+
# ASR
|
54 |
with gr.Group():
|
55 |
with gr.Row():
|
56 |
user_audio = gr.Audio(
|
57 |
+
label="3. Record or Upload Your Pronunciation",
|
58 |
sources=["microphone", "upload"],
|
59 |
type="filepath",
|
60 |
)
|
|
|
63 |
|
64 |
# Output Column
|
65 |
with gr.Column(scale=1):
|
66 |
+
# ASR Transcripts
|
67 |
with gr.Group():
|
68 |
with gr.Row():
|
69 |
transcript_ugArab_box = gr.Textbox(
|
70 |
+
label="User Transcript (Arabic Script)",
|
71 |
placeholder="ASR transcription of user audio..."
|
72 |
)
|
73 |
with gr.Row():
|
74 |
transcript_ugLatn_box = gr.Textbox(
|
75 |
+
label="User Transcript (Latin Script)",
|
76 |
placeholder="ASR transcription of user audio..."
|
77 |
)
|
78 |
+
# IPA
|
|
|
79 |
with gr.Group():
|
80 |
with gr.Row():
|
81 |
correct_phoneme_box = gr.Textbox(
|
|
|
87 |
label="User Phonemes",
|
88 |
placeholder="IPA representation of the user pronunciation..."
|
89 |
)
|
90 |
+
# Feedback
|
91 |
with gr.Group():
|
|
|
|
|
|
|
|
|
|
|
92 |
with gr.Row():
|
93 |
match_box = gr.Markdown(
|
94 |
"""<h4>Pronunciation Feedback</h4>\n
|
95 |
Matching and mismatched characters will be visualized here...
|
96 |
"""
|
97 |
)
|
98 |
+
with gr.Row():
|
99 |
+
score_box = gr.Textbox(
|
100 |
+
label="Phonetic Score",
|
101 |
+
placeholder="Your pronunciation score as a percentage..."
|
102 |
+
)
|
103 |
|
104 |
# Bind functions to buttons
|
105 |
generate_short_btn.click(
|
|
|
113 |
inputs=[script_choice],
|
114 |
outputs=[input_text]
|
115 |
)
|
116 |
+
|
117 |
+
translate_btn.click(
|
118 |
+
util.translate_text,
|
119 |
+
inputs=[input_text, script_choice, translate_choice],
|
120 |
+
outputs=[translation_text]
|
121 |
+
)
|
122 |
|
123 |
tts_btn.click(
|
124 |
tts.generate_audio,
|
asr.py
CHANGED
@@ -42,7 +42,7 @@ def asr(audio_data, target_rate = 16000):
|
|
42 |
return transcript
|
43 |
|
44 |
|
45 |
-
def check_pronunciation(input_text,
|
46 |
# Transcripts from user input audio
|
47 |
transcript_ugLatn_box = asr(user_audio)
|
48 |
transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
|
@@ -51,6 +51,6 @@ def check_pronunciation(input_text, script, user_audio):
|
|
51 |
correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
|
52 |
reference_text = input_text,
|
53 |
output_text = transcript_ugArab_box,
|
54 |
-
|
55 |
|
56 |
return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score
|
|
|
42 |
return transcript
|
43 |
|
44 |
|
45 |
+
def check_pronunciation(input_text, script_choice, user_audio):
|
46 |
# Transcripts from user input audio
|
47 |
transcript_ugLatn_box = asr(user_audio)
|
48 |
transcript_ugArab_box = util.ug_latn_to_arab(transcript_ugLatn_box)
|
|
|
51 |
correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score = util.calculate_pronunciation_accuracy(
|
52 |
reference_text = input_text,
|
53 |
output_text = transcript_ugArab_box,
|
54 |
+
script_choice=script_choice)
|
55 |
|
56 |
return transcript_ugArab_box, transcript_ugLatn_box, correct_phoneme, user_phoneme, pronunciation_match, pronunciation_score
|
util.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
import random
|
2 |
from umsc import UgMultiScriptConverter
|
|
|
3 |
import string
|
4 |
import epitran
|
5 |
from difflib import SequenceMatcher
|
6 |
|
7 |
-
#
|
8 |
-
|
|
|
9 |
|
10 |
## Global Vars
|
11 |
# Lists of Uyghur short and long texts
|
@@ -19,6 +21,10 @@ long_texts = [
|
|
19 |
"قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
|
20 |
]
|
21 |
|
|
|
|
|
|
|
|
|
22 |
# Initialize uyghur script converter
|
23 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
24 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
@@ -26,7 +32,6 @@ ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
|
26 |
# Initialize Epitran for Uyghur (Arabic script)
|
27 |
ipa_converter = epitran.Epitran('uig-Arab')
|
28 |
|
29 |
-
|
30 |
## Front-End Utils
|
31 |
def generate_short_text(script_choice):
|
32 |
"""Generate a random Uyghur short text based on the type."""
|
@@ -38,11 +43,19 @@ def generate_long_text(script_choice):
|
|
38 |
text = random.choice(long_texts)
|
39 |
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
|
40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
## ASR Utils
|
43 |
def remove_punctuation(text):
|
44 |
"""Helper function to remove punctuation from text."""
|
45 |
-
extra_punctuation = "–؛;،؟?«»‹›−—¬”“" #
|
46 |
all_punctuation = string.punctuation + extra_punctuation
|
47 |
|
48 |
return text.translate(str.maketrans('', '', all_punctuation))
|
@@ -65,23 +78,13 @@ def remove_punctuation(text):
|
|
65 |
|
66 |
# return audio_input, target_rate
|
67 |
|
68 |
-
def calculate_pronunciation_accuracy(reference_text, output_text,
|
69 |
"""
|
70 |
Calculate pronunciation accuracy between reference and ASR output text using Epitran.
|
71 |
-
|
72 |
-
Args:
|
73 |
-
reference_text (str): The ground truth text in Uyghur (Arabic script).
|
74 |
-
output_text (str): The ASR output text in Uyghur (Arabic script).
|
75 |
-
language_code (str): Epitran language code (default is 'uig-Arab' for Uyghur).
|
76 |
-
|
77 |
-
Returns:
|
78 |
-
float: Pronunciation accuracy as a percentage.
|
79 |
-
str: IPA transliteration of the reference text.
|
80 |
-
str: IPA transliteration of the output text.
|
81 |
"""
|
82 |
|
83 |
# make sure input text is arabic script for IPA conversion
|
84 |
-
if
|
85 |
reference_text = ug_latn_to_arab(reference_text)
|
86 |
|
87 |
# Remove punctuation from both texts
|
@@ -100,7 +103,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
|
|
100 |
pronunciation_accuracy = match_ratio * 100
|
101 |
|
102 |
# Convert reference back to original script for feedback output
|
103 |
-
if
|
104 |
reference_text_clean = ug_arab_to_latn(reference_text_clean)
|
105 |
# Generate Markdown-compatible styled text
|
106 |
comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header
|
@@ -110,7 +113,7 @@ def calculate_pronunciation_accuracy(reference_text, output_text, script):
|
|
110 |
out_segment = output_text_clean[j1:j2]
|
111 |
|
112 |
if opcode == 'equal': # Matching characters
|
113 |
-
comparison_md += f'<span style="color:
|
114 |
elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
|
115 |
comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
|
116 |
comparison_md += "</div>"
|
|
|
1 |
import random
|
2 |
from umsc import UgMultiScriptConverter
|
3 |
+
from googletrans import Translator, LANGCODES
|
4 |
import string
|
5 |
import epitran
|
6 |
from difflib import SequenceMatcher
|
7 |
|
8 |
+
# For googletrans 4.0.0-rc1
|
9 |
+
import httpcore
|
10 |
+
setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
|
11 |
|
12 |
## Global Vars
|
13 |
# Lists of Uyghur short and long texts
|
|
|
21 |
"قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ."
|
22 |
]
|
23 |
|
24 |
+
# Initialize the translator
|
25 |
+
translator = Translator()
|
26 |
+
translation_choices = [L for L in LANGCODES]
|
27 |
+
|
28 |
# Initialize uyghur script converter
|
29 |
ug_arab_to_latn = UgMultiScriptConverter('UAS', 'ULS')
|
30 |
ug_latn_to_arab = UgMultiScriptConverter('ULS', 'UAS')
|
|
|
32 |
# Initialize Epitran for Uyghur (Arabic script)
|
33 |
ipa_converter = epitran.Epitran('uig-Arab')
|
34 |
|
|
|
35 |
## Front-End Utils
|
36 |
def generate_short_text(script_choice):
|
37 |
"""Generate a random Uyghur short text based on the type."""
|
|
|
43 |
text = random.choice(long_texts)
|
44 |
return ug_arab_to_latn(text) if script_choice == "Uyghur Latin" else text
|
45 |
|
46 |
+
def translate_text(input_text, script_choice, target_language):
|
47 |
+
"""
|
48 |
+
Translate Uyghur text to the target language
|
49 |
+
"""
|
50 |
+
if script_choice == 'Uyghur Latin':
|
51 |
+
input_text = ug_latn_to_arab(input_text)
|
52 |
+
translated_text = translator.translate(input_text, src="ug", dest=LANGCODES[target_language])
|
53 |
+
return translated_text.text
|
54 |
|
55 |
## ASR Utils
|
56 |
def remove_punctuation(text):
|
57 |
"""Helper function to remove punctuation from text."""
|
58 |
+
extra_punctuation = "–؛;،؟?«»‹›−—¬”“" # Additional custom uyghur punctuation
|
59 |
all_punctuation = string.punctuation + extra_punctuation
|
60 |
|
61 |
return text.translate(str.maketrans('', '', all_punctuation))
|
|
|
78 |
|
79 |
# return audio_input, target_rate
|
80 |
|
81 |
+
def calculate_pronunciation_accuracy(reference_text, output_text, script_choice):
|
82 |
"""
|
83 |
Calculate pronunciation accuracy between reference and ASR output text using Epitran.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
"""
|
85 |
|
86 |
# make sure input text is arabic script for IPA conversion
|
87 |
+
if script_choice == 'Uyghur Latin':
|
88 |
reference_text = ug_latn_to_arab(reference_text)
|
89 |
|
90 |
# Remove punctuation from both texts
|
|
|
103 |
pronunciation_accuracy = match_ratio * 100
|
104 |
|
105 |
# Convert reference back to original script for feedback output
|
106 |
+
if script_choice == 'Uyghur Latin':
|
107 |
reference_text_clean = ug_arab_to_latn(reference_text_clean)
|
108 |
# Generate Markdown-compatible styled text
|
109 |
comparison_md = "<h4>Pronunciation Feedback</h4>\n" # Small header
|
|
|
113 |
out_segment = output_text_clean[j1:j2]
|
114 |
|
115 |
if opcode == 'equal': # Matching characters
|
116 |
+
comparison_md += f'<span style="color: green; font-size: 20px;">{ref_segment}</span>'
|
117 |
elif opcode in ['replace', 'delete', 'insert']: # Mismatched or missing
|
118 |
comparison_md += f'<span style="color: red; font-size: 20px;">{ref_segment}</span>'
|
119 |
comparison_md += "</div>"
|