ANYANTUDRE
commited on
Commit
·
f0dafb8
1
Parent(s):
5966f2d
fixed small bugs
Browse files- app.py +7 -4
- goai_helpers/goai_stt2.py +1 -1
- goai_helpers/goai_tts.py +1 -1
- goai_helpers/goai_ttt_tts_pipeline.py +1 -1
app.py
CHANGED
@@ -11,7 +11,7 @@ login(token=auth_token)
|
|
11 |
|
12 |
# list all files in the ./audios directory for the dropdown
|
13 |
AUDIO_FILES = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
|
14 |
-
MODELES_TTS = ["ArissBandoss/coqui-tts-moore-V1", "ArissBandoss/mms-tts-mos-
|
15 |
MODELES_ASR = ["ArissBandoss/whisper-small-mos", "openai/whisper-large-v3-turbo"]
|
16 |
LANGUAGES = ["Automatic Detection"]
|
17 |
|
@@ -113,7 +113,7 @@ goai_ttt_tts_pipeline_if = gr.Interface(
|
|
113 |
["Ils achetèrent des troupeaux, firent construire des cases, parcoururent tout le pays pour offrir à leur mère et à leurs femmes les plus beaux bijoux, les plus belles étoffes.", "exple_voix_feminine.wav", None]
|
114 |
],
|
115 |
cache_examples=False,
|
116 |
-
title="
|
117 |
description=DESCRIPTION,
|
118 |
)
|
119 |
|
@@ -137,14 +137,17 @@ goai_stt_ttt_pipeline_if = gr.Interface(
|
|
137 |
gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
|
138 |
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
139 |
],
|
140 |
-
outputs=[
|
|
|
|
|
|
|
141 |
examples=[["./audios/example1.mp3", "a ye ligdi"],
|
142 |
["./audios/example2.mp3", "zoe nimbãanega"],
|
143 |
["./audios/example3.mp3", "zãng-zãnga"],
|
144 |
["./audios/example4.mp3", "yõk foto"]
|
145 |
],
|
146 |
cache_examples=False,
|
147 |
-
title="Mooré ASR",
|
148 |
description=DESCRIPTION,
|
149 |
flagging_mode="auto",
|
150 |
)
|
|
|
11 |
|
12 |
# list all files in the ./audios directory for the dropdown
|
13 |
AUDIO_FILES = [f for f in os.listdir('./exples_voix') if os.path.isfile(os.path.join('./exples_voix', f))]
|
14 |
+
MODELES_TTS = ["ArissBandoss/coqui-tts-moore-V1", "ArissBandoss/mms-tts-mos-male-17-V5"]
|
15 |
MODELES_ASR = ["ArissBandoss/whisper-small-mos", "openai/whisper-large-v3-turbo"]
|
16 |
LANGUAGES = ["Automatic Detection"]
|
17 |
|
|
|
113 |
["Ils achetèrent des troupeaux, firent construire des cases, parcoururent tout le pays pour offrir à leur mère et à leurs femmes les plus beaux bijoux, les plus belles étoffes.", "exple_voix_feminine.wav", None]
|
114 |
],
|
115 |
cache_examples=False,
|
116 |
+
title="Mooré TTS & Traduction",
|
117 |
description=DESCRIPTION,
|
118 |
)
|
119 |
|
|
|
137 |
gr.Slider(label="Chunk Length (s)", minimum=1, maximum=60, value=17.5, step=0.1),
|
138 |
gr.Slider(label="Stride Length (s)", minimum=1, maximum=30, value=1, step=0.1),
|
139 |
],
|
140 |
+
outputs=[
|
141 |
+
gr.Textbox(label="Texte Mooré"),
|
142 |
+
gr.Textbox(label="Texte Francais"),
|
143 |
+
],
|
144 |
examples=[["./audios/example1.mp3", "a ye ligdi"],
|
145 |
["./audios/example2.mp3", "zoe nimbãanega"],
|
146 |
["./audios/example3.mp3", "zãng-zãnga"],
|
147 |
["./audios/example4.mp3", "yõk foto"]
|
148 |
],
|
149 |
cache_examples=False,
|
150 |
+
title="Mooré ASR & Traduction",
|
151 |
description=DESCRIPTION,
|
152 |
flagging_mode="auto",
|
153 |
)
|
goai_helpers/goai_stt2.py
CHANGED
@@ -51,7 +51,7 @@ def transcribe(
|
|
51 |
if forced_decoder_ids:
|
52 |
generate_kwargs["forced_decoder_ids"] = forced_decoder_ids
|
53 |
|
54 |
-
output = pipe(inputs, batch_size=batch_size, **generate_kwargs)
|
55 |
|
56 |
transcription_text = output['text']
|
57 |
|
|
|
51 |
if forced_decoder_ids:
|
52 |
generate_kwargs["forced_decoder_ids"] = forced_decoder_ids
|
53 |
|
54 |
+
output = pipe(inputs, batch_size=batch_size, padding=True, truncation=True, **generate_kwargs)
|
55 |
|
56 |
transcription_text = output['text']
|
57 |
|
goai_helpers/goai_tts.py
CHANGED
@@ -32,7 +32,7 @@ def goai_tts(texte):
|
|
32 |
start_time = time.time()
|
33 |
|
34 |
# Charger le modèle TTS avec le token d'authentification
|
35 |
-
model_id = "ArissBandoss/mms-tts-mos-
|
36 |
synthesiser = pipeline("text-to-speech", model_id, device=device)
|
37 |
|
38 |
# Inférence
|
|
|
32 |
start_time = time.time()
|
33 |
|
34 |
# Charger le modèle TTS avec le token d'authentification
|
35 |
+
model_id = "ArissBandoss/mms-tts-mos-male-17-V5"
|
36 |
synthesiser = pipeline("text-to-speech", model_id, device=device)
|
37 |
|
38 |
# Inférence
|
goai_helpers/goai_ttt_tts_pipeline.py
CHANGED
@@ -34,7 +34,7 @@ def goai_many_tts(
|
|
34 |
|
35 |
return sampling_rate, audio_array.numpy()
|
36 |
|
37 |
-
elif tts_model == "ArissBandoss/mms-tts-mos-
|
38 |
sample_rate, audio_data = goai_tts(text)
|
39 |
return sample_rate, audio_data
|
40 |
|
|
|
34 |
|
35 |
return sampling_rate, audio_array.numpy()
|
36 |
|
37 |
+
elif tts_model == "ArissBandoss/mms-tts-mos-male-17-V5":
|
38 |
sample_rate, audio_data = goai_tts(text)
|
39 |
return sample_rate, audio_data
|
40 |
|