Irpan commited on
Commit
9db718b
·
1 Parent(s): 422a095
Files changed (2) hide show
  1. app.py +71 -41
  2. util.py +3 -3
app.py CHANGED
@@ -3,64 +3,94 @@ import asr
3
  import tts
4
  import util
5
 
6
- mms_transcribe = gr.Interface(
7
- fn=asr.transcribe,
8
- inputs=[
9
- gr.Audio(
 
10
  label="Record or Upload Uyghur Audio",
11
  sources=["microphone", "upload"],
12
  type="filepath",
13
- ),
14
- gr.Dropdown(
15
  choices=[model for model in asr.models_info],
16
  label="Select a Model",
17
  value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
18
  interactive=True
19
- ),
20
- ],
21
- outputs=[
22
- gr.Textbox(label="Uyghur Arabic Transcription"),
23
- gr.Textbox(label="Uyghur Latin Transcription"),
24
- ],
25
- examples=util.asr_examples,
26
- title="Speech-To-Text",
27
- description=(
28
- "Transcribe Uyghur speech audio from a microphone or input file."
29
- ),
30
- allow_flagging="never",
31
- )
 
 
 
32
 
33
- mms_synthesize = gr.Interface(
34
- fn=tts.synthesize,
35
- inputs=[
36
- gr.Text(label="Input text"),
37
- gr.Dropdown(
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  choices=[model for model in tts.models_info],
39
  label="Select a Model",
40
  value="Meta-MMS",
41
  interactive=True
42
  )
43
- ],
44
- outputs=[
45
- gr.Audio(label="Generated Audio"),
46
- ],
47
- examples=util.tts_examples,
48
- title="Text-To-Speech",
49
- description=(
50
- "Generate audio from input Uyghur text."
51
- ),
52
- allow_flagging="never",
53
- )
 
 
 
54
 
55
- tabbed_interface = gr.TabbedInterface(
56
- [mms_transcribe, mms_synthesize],
57
- ["Speech-To-Text", "Text-To-Speech"],
58
- )
 
 
 
 
 
 
 
59
 
 
60
  with gr.Blocks() as demo:
61
- gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
62
- tabbed_interface.render()
 
63
 
 
64
  if __name__ == "__main__":
65
  demo.queue()
66
  demo.launch()
 
3
  import tts
4
  import util
5
 
6
+ # Define the Speech-to-Text tab
7
+ with gr.Blocks() as mms_transcribe:
8
+ gr.Markdown("### Speech-To-Text")
9
+ with gr.Row():
10
+ audio_input = gr.Audio(
11
  label="Record or Upload Uyghur Audio",
12
  sources=["microphone", "upload"],
13
  type="filepath",
14
+ )
15
+ model_selection_stt = gr.Dropdown(
16
  choices=[model for model in asr.models_info],
17
  label="Select a Model",
18
  value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
19
  interactive=True
20
+ )
21
+ with gr.Row():
22
+ arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False)
23
+ latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False)
24
+ with gr.Row():
25
+ stt_submit_btn = gr.Button("Submit")
26
+ stt_clear_btn = gr.Button("Clear")
27
+
28
+ # Example button to load examples
29
+ with gr.Row():
30
+ stt_examples = gr.Examples(
31
+ examples=util.asr_examples,
32
+ inputs=[audio_input, model_selection_stt],
33
+ outputs=[arabic_output, latin_output],
34
+ label="Examples"
35
+ )
36
 
37
+ # Define button functionality
38
+ stt_submit_btn.click(
39
+ asr.transcribe,
40
+ inputs=[audio_input, model_selection_stt],
41
+ outputs=[arabic_output, latin_output]
42
+ )
43
+ stt_clear_btn.click(
44
+ lambda: (None, None, None), # Clear inputs and outputs
45
+ inputs=[],
46
+ outputs=[audio_input, arabic_output, latin_output]
47
+ )
48
+
49
+ # Define the Text-to-Speech tab
50
+ with gr.Blocks() as mms_synthesize:
51
+ gr.Markdown("### Text-To-Speech")
52
+ with gr.Row():
53
+ input_text = gr.Text(label="Input text")
54
+ model_selection_tts = gr.Dropdown(
55
  choices=[model for model in tts.models_info],
56
  label="Select a Model",
57
  value="Meta-MMS",
58
  interactive=True
59
  )
60
+ with gr.Row():
61
+ generated_audio = gr.Audio(label="Generated Audio", interactive=False)
62
+ with gr.Row():
63
+ tts_submit_btn = gr.Button("Submit")
64
+ tts_clear_btn = gr.Button("Clear")
65
+
66
+ # Example button to load examples
67
+ with gr.Row():
68
+ tts_examples = gr.Examples(
69
+ examples=util.tts_examples,
70
+ inputs=[input_text, model_selection_tts],
71
+ outputs=[generated_audio],
72
+ label="Examples"
73
+ )
74
 
75
+ # Define button functionality
76
+ tts_submit_btn.click(
77
+ tts.synthesize,
78
+ inputs=[input_text, model_selection_tts],
79
+ outputs=[generated_audio]
80
+ )
81
+ tts_clear_btn.click(
82
+ lambda: (None, None), # Clear inputs and outputs
83
+ inputs=[],
84
+ outputs=[input_text, generated_audio]
85
+ )
86
 
87
+ # Combine tabs into a Tabbed Interface
88
  with gr.Blocks() as demo:
89
+ gr.Markdown("### Uyghur Language Tools: STT and TTS")
90
+ with gr.TabbedInterface([mms_transcribe, mms_synthesize], ["Speech-To-Text", "Text-To-Speech"]):
91
+ pass
92
 
93
+ # Run the app
94
  if __name__ == "__main__":
95
  demo.queue()
96
  demo.launch()
util.py CHANGED
@@ -8,10 +8,10 @@ asr_examples = [['examples/1.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin'],
8
  ['examples/2.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin']]
9
 
10
  tts_examples = [
11
- ["مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", "Meta-MMS"],
12
  ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
13
- ["مېنىڭ قەلبىمنى كەڭ قىلغىن", "Meta-MMS"],
14
  ["Bu putbol musabiqisining axirlishishi", "Meta-MMS"],
 
15
  ["Yaxshimusiz?", "Meta-MMS"],
16
- ["Rehmet sizge!", "Meta-MMS"]
17
  ]
 
8
  ['examples/2.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin']]
9
 
10
  tts_examples = [
 
11
  ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
12
+ ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
13
  ["Bu putbol musabiqisining axirlishishi", "Meta-MMS"],
14
+ ["Bu putbol musabiqisining axirlishishi", "IS2AI-TurkicTTS"],
15
  ["Yaxshimusiz?", "Meta-MMS"],
16
+ ["Yaxshimusiz?", "IS2AI-TurkicTTS"]
17
  ]