Spaces:
Paused
Paused
rick
commited on
amelioration des lectures TTS en mode traduction a selections multiples.
Browse files
app.py
CHANGED
|
@@ -6,6 +6,7 @@ import tempfile
|
|
| 6 |
import base64
|
| 7 |
from pydub import AudioSegment
|
| 8 |
import os
|
|
|
|
| 9 |
|
| 10 |
# Configuration du client OpenAI avec la clé API
|
| 11 |
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
|
@@ -89,6 +90,10 @@ def language_detection(input_text, temperature=0.01):
|
|
| 89 |
)
|
| 90 |
return response.choices[0].message.content
|
| 91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
# Fonction pour convertir du texte en parole
|
| 93 |
def text_to_speech(text):
|
| 94 |
response = client.audio.speech.create(
|
|
@@ -104,11 +109,35 @@ def text_to_speech(text):
|
|
| 104 |
# Lire le contenu du fichier audio
|
| 105 |
with open(temp_audio.name, "rb") as audio_file:
|
| 106 |
audio_bytes = audio_file.read()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
# Fonction pour traiter les messages de l'utilisateur et générer une réponse
|
| 111 |
-
def process_message(message,
|
|
|
|
|
|
|
| 112 |
payload_content = f'{operation_prompt} :\n\"\"\"\n{message}\n\"\"\"'
|
| 113 |
|
| 114 |
st.session_state.messages.append({"role": "user", "content": payload_content})
|
|
@@ -131,8 +160,10 @@ def process_message(message, operation_prompt="", tts_enabled=False):
|
|
| 131 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 132 |
|
| 133 |
if tts_enabled:
|
| 134 |
-
tts_audio = text_to_speech(full_response)
|
| 135 |
-
|
|
|
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
# Classe pour stocker les prompts système globaux
|
|
@@ -234,6 +265,7 @@ def main():
|
|
| 234 |
if None == st.session_state.language_detected:
|
| 235 |
st.session_state.language_detected = language_detection(input_text=user_input, temperature=0.01)
|
| 236 |
|
|
|
|
| 237 |
for cursor_selected_lang in st.session_state.selected_languages:
|
| 238 |
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 239 |
|
|
@@ -241,9 +273,17 @@ def main():
|
|
| 241 |
init_process_mode()
|
| 242 |
|
| 243 |
# Traitement du message de l'utilisateur pour la langue cible actuelle
|
| 244 |
-
process_message(user_input,
|
| 245 |
operation_prompt=f"{OP_PROMPT}",
|
| 246 |
tts_enabled=st.session_state.enable_tts_for_input_from_text_field)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
# #################################################################
|
| 249 |
# Affichage de l'historique des messages (sauf le message système)
|
|
|
|
| 6 |
import base64
|
| 7 |
from pydub import AudioSegment
|
| 8 |
import os
|
| 9 |
+
import io
|
| 10 |
|
| 11 |
# Configuration du client OpenAI avec la clé API
|
| 12 |
client = OpenAI(api_key=getenv("OPENAI_API_KEY"))
|
|
|
|
| 90 |
)
|
| 91 |
return response.choices[0].message.content
|
| 92 |
|
| 93 |
+
def get_duration_pydub(audio_file):
|
| 94 |
+
audio = AudioSegment.from_file(audio_file)
|
| 95 |
+
return audio.duration_seconds
|
| 96 |
+
|
| 97 |
# Fonction pour convertir du texte en parole
|
| 98 |
def text_to_speech(text):
|
| 99 |
response = client.audio.speech.create(
|
|
|
|
| 109 |
# Lire le contenu du fichier audio
|
| 110 |
with open(temp_audio.name, "rb") as audio_file:
|
| 111 |
audio_bytes = audio_file.read()
|
| 112 |
+
|
| 113 |
+
# Lire la durée de l'audio en secondes
|
| 114 |
+
audio_duration = get_duration_pydub(temp_audio.name)
|
| 115 |
+
|
| 116 |
+
return audio_bytes, audio_duration
|
| 117 |
+
|
| 118 |
+
def concatenate_audio_files(audio_list):
|
| 119 |
+
# Créer un segment audio vide
|
| 120 |
+
final_audio = AudioSegment.empty()
|
| 121 |
|
| 122 |
+
# Silence de 5 secondes
|
| 123 |
+
silence = AudioSegment.silent(duration=5000) # 5000 ms = 5 secondes
|
| 124 |
+
|
| 125 |
+
for audio_bytes, _ in audio_list:
|
| 126 |
+
# Convertir les bytes en un segment audio
|
| 127 |
+
segment = AudioSegment.from_mp3(io.BytesIO(audio_bytes))
|
| 128 |
+
|
| 129 |
+
# Ajouter le segment et le silence au final_audio
|
| 130 |
+
final_audio += segment + silence
|
| 131 |
+
|
| 132 |
+
# Convertir le segment audio final en bytes
|
| 133 |
+
buffer = io.BytesIO()
|
| 134 |
+
final_audio.export(buffer, format="mp3")
|
| 135 |
+
return buffer.getvalue()
|
| 136 |
|
| 137 |
# Fonction pour traiter les messages de l'utilisateur et générer une réponse
|
| 138 |
+
def process_message(message,
|
| 139 |
+
operation_prompt="",
|
| 140 |
+
tts_enabled=False):
|
| 141 |
payload_content = f'{operation_prompt} :\n\"\"\"\n{message}\n\"\"\"'
|
| 142 |
|
| 143 |
st.session_state.messages.append({"role": "user", "content": payload_content})
|
|
|
|
| 160 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 161 |
|
| 162 |
if tts_enabled:
|
| 163 |
+
tts_audio, tts_duration = text_to_speech(full_response)
|
| 164 |
+
return tts_audio, tts_duration
|
| 165 |
+
return None, None
|
| 166 |
+
#st.audio(tts_audio, format="audio/mp3", autoplay=True)
|
| 167 |
|
| 168 |
|
| 169 |
# Classe pour stocker les prompts système globaux
|
|
|
|
| 265 |
if None == st.session_state.language_detected:
|
| 266 |
st.session_state.language_detected = language_detection(input_text=user_input, temperature=0.01)
|
| 267 |
|
| 268 |
+
audio_list = []
|
| 269 |
for cursor_selected_lang in st.session_state.selected_languages:
|
| 270 |
st.session_state.target_language = cursor_selected_lang["iso-639-1"]
|
| 271 |
|
|
|
|
| 273 |
init_process_mode()
|
| 274 |
|
| 275 |
# Traitement du message de l'utilisateur pour la langue cible actuelle
|
| 276 |
+
tts_audio, tts_duration = process_message(user_input,
|
| 277 |
operation_prompt=f"{OP_PROMPT}",
|
| 278 |
tts_enabled=st.session_state.enable_tts_for_input_from_text_field)
|
| 279 |
+
if tts_audio is not None:
|
| 280 |
+
audio_list.append((tts_audio, tts_duration))
|
| 281 |
+
|
| 282 |
+
if audio_list:
|
| 283 |
+
final_audio = concatenate_audio_files(audio_list)
|
| 284 |
+
st.audio(final_audio,
|
| 285 |
+
format="audio/mp3",
|
| 286 |
+
autoplay=True)
|
| 287 |
|
| 288 |
# #################################################################
|
| 289 |
# Affichage de l'historique des messages (sauf le message système)
|