rick commited on
Commit
dacaa94
·
unverified ·
1 Parent(s): 01f16b6

bug fixed now... not done...

Browse files
Files changed (1) hide show
  1. pages/main.py +175 -159
pages/main.py CHANGED
@@ -21,7 +21,9 @@ import streamlit as st
21
  from audiorecorder import audiorecorder
22
  from openai import OpenAI
23
  from pydub import AudioSegment
24
-
 
 
25
 
26
  __version__ = "1.2.4"
27
 
@@ -154,26 +156,33 @@ def split_audio(audio_file: str,
154
  print(f"Une erreur inattendue s'est produite : {e}")
155
  return []
156
 
157
- # Fonction modifiée pour transcrire l'audio en texte
158
- def transcribe_audio(audio_file: IO, language: Optional[str] = None) -> str:
159
  """
160
  Transcrit un fichier audio en texte.
161
 
162
  Args:
163
- audio_file (IO): Le fichier audio à transcrire.
164
  language (Optional[str]): La langue de l'audio. Par défaut None.
165
 
166
  Returns:
167
  str: Le texte transcrit.
168
  """
169
  max_size_mb = 25
170
- file_size_mb = os.path.getsize(audio_file.name) / (1024 * 1024)
171
 
172
  try:
173
  with st.status("Transcription de l'audio en cours...") as status:
 
 
 
 
 
 
 
 
174
  if file_size_mb > max_size_mb:
175
  status.update(label="Découpage de l'audio en segments...")
176
- segments = split_audio(audio_file.name, max_size_mb)
177
  full_transcript = ""
178
  for i, segment in enumerate(segments):
179
  status.update(label=f"Transcription du segment {i+1}/{len(segments)}...")
@@ -189,7 +198,7 @@ def transcribe_audio(audio_file: IO, language: Optional[str] = None) -> str:
189
  return full_transcript.strip()
190
  else:
191
  status.update(label="Transcription de l'audio...")
192
- with open(audio_file.name, "rb") as audio_file:
193
  transcript = client.audio.transcriptions.create(
194
  model="whisper-1",
195
  file=audio_file,
@@ -204,9 +213,8 @@ def transcribe_audio(audio_file: IO, language: Optional[str] = None) -> str:
204
  except Exception as e:
205
  st.error(f"Erreur lors de la transcription : {e}")
206
  return ""
207
- ## def transcribe_audio(audio_file: IO, language: Optional[str] = None) -> str:
208
 
209
- # Fonction pour détecter la langue d'un texte donné
210
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
211
  """
212
  Détecte la langue d'un texte donné.
@@ -288,6 +296,11 @@ def text_to_speech(text: str) -> Tuple[Optional[bytes], float]:
288
  except Exception as e:
289
  st.error(f"Erreur lors de la conversion texte-parole : {str(e)}")
290
  return None, 0.0
 
 
 
 
 
291
 
292
  def get_duration_pydub(audio_file: str) -> float:
293
  """
@@ -569,6 +582,7 @@ def init_process_mode(
569
  return "", ""
570
 
571
 
 
572
  def main_page():
573
  """Page principale de l'application."""
574
 
@@ -719,185 +733,187 @@ def main_page():
719
  # ##
720
  audio_status.update(label=f"{get_translation('erreur_concatenation_audio')} : {str(e)}", state="error", expanded=True)
721
 
722
-
723
  # Interface utilisateur pour l'enregistrement audio
724
  # st.write(f"🗣️ {get_translation('enregistrez_message')}")
725
  elif st.session_state.audio:
726
  # Traitement de l'entrée audio de l'utilisateur
727
  if len(st.session_state.audio) > 0:
728
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
729
- st.session_state.audio.export(temp_audio.name, format="wav")
 
730
  st.write(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
731
 
732
  # Transcrire l'audio en texte
733
- st.session_state.transcription = transcribe_audio(temp_audio, language=st.session_state.language_detected)
734
- # Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
735
- if st.session_state.language_detected is None:
736
- st.session_state.language_detected = detect_language(
737
- input_text=st.session_state.transcription, temperature=0.01
738
- )
739
- st.markdown(
740
- f"- {get_translation('langue_detectee')}".format(
741
- f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
742
- )
743
- )
744
 
 
 
 
 
 
 
745
  st.markdown(
746
- f"🎤 {get_translation('transcription_audio')}".format(
747
- f"{st.session_state.transcription}"
748
  )
749
  )
750
 
751
-
752
- st.session_state.audio_list = []
753
- for cursor_selected_lang in st.session_state.selected_languages:
754
- st.session_state.target_language = cursor_selected_lang["iso-639-1"]
755
- st.session_state.full_response = ""
756
-
757
- # Initialisation du mode de traitement pour la langue cible actuelle
758
- st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
759
- (
760
- st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
761
- st.session_state.interface_language
762
- )
763
- ),
764
- to_lang=st.session_state.target_language
765
- )
766
 
767
- with st.chat_message("assistant", avatar="👻"):
768
- message_placeholder = st.empty()
769
- st.session_state.response_generator = process_message(
770
- st.session_state.transcription,
771
- st.session_state.operation_prompt,
772
- st.session_state.enable_tts_for_input_from_audio_record,
773
- st.session_state.system_prompt
 
 
 
 
774
  )
 
 
 
775
 
776
- for response_chunk in st.session_state.response_generator:
777
- message_placeholder.markdown(response_chunk)
778
- st.session_state.end_response = st.session_state.response_generator.close()
779
- if st.session_state.full_response != "":
780
- message_placeholder.markdown(st.session_state.full_response)
 
 
 
781
 
782
- if st.session_state.enable_tts_for_input_from_audio_record:
783
- st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
784
-
785
- if st.session_state.tts_audio:
786
- st.session_state.audio_list.append(
787
- ( st.session_state.tts_audio,
788
- st.session_state.tts_duration )
789
- )
790
- else:
791
- pass
792
 
793
- if st.session_state.audio_list:
794
- st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
795
-
796
- with st.container(border=True):
797
-
798
- # Générer un nom de fichier unique
799
- st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
800
- st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
801
- st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
802
-
803
- st.audio(st.session_state.final_audio, format="audio/mp3", autoplay=st.session_state.autoplay_tts)
804
-
805
- st.download_button(
806
- label=f"📥 {get_translation('telecharger_audio')}",
807
- data=st.session_state.final_audio,
808
- file_name=st.session_state.nom_fichier,
809
- mime="audio/mp3",
810
- use_container_width=True,
811
- type="primary",
812
- key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
813
- )
814
 
815
- def clear_inputs_garbages(sessions_state_list: Optional[list] =
816
- [ 'transcription', 'operation_prompt', 'system_prompt',
817
- 'audio_list', 'full_response', 'tts_audio',
818
- 'tts_duration', 'timestamp', 'langues',
819
- 'nom_fichier', 'final_audio', 'response_generator',
820
- 'end_response', 'messages', 'audio', 'user_input' ]
821
- ):
822
- def delete_session_state_var(var_name: str):
823
- if f"{var_name}" in st.session_state:
824
- del st.session_state[f"{var_name}"]
 
 
 
 
 
 
 
 
 
 
 
825
 
826
- for it_var_name in sessions_state_list:
827
- delete_session_state_var(it_var_name)
 
 
 
 
 
 
 
 
828
 
 
 
829
 
830
- clear_inputs_garbages()
831
 
832
- def on_languages_change() -> None:
833
- clear_inputs_garbages()
834
- """Fonction de rappel pour le changement de langue(s) de destination."""
835
- selected_language_names: List[str] = st.session_state.language_selector
836
- st.session_state.selected_languages = [
837
- {"language": lang, "iso-639-1": convert_language_name_to_iso6391(lang)}
838
- for lang in selected_language_names
839
- ]
840
-
841
 
 
 
 
 
 
 
 
 
 
842
 
843
- # Configuration de la barre latérale
844
- with st.sidebar:
845
- st.logo("img/logo_2.png", icon_image="img/logo_2.png")
846
- st.header(get_translation("sidebar_titre"))
847
 
848
- with st.expander(f"{get_translation('a_propos')}",
849
- expanded=False,
850
- icon="ℹ️"):
851
- st.subheader(f"version: {__version__}")
852
- st.info(get_translation("info_app"))
853
-
854
 
 
 
 
 
 
 
855
 
856
- with st.expander(f"{get_translation('selection_langue')}",
857
- expanded=True,
858
- icon="🌐"):
859
- # Conteneur pour la sélection de langue
860
-
861
- # Sélection multiple des langues de destination
862
- st.multiselect(
863
- label=get_translation("langues_destination"),
864
- placeholder=get_translation("placeholder_langues"),
865
- options=SUPPORTED_LANGUAGES,
866
- default=["English"],
867
- key="language_selector",
868
- max_selections=4,
869
- on_change=on_languages_change,
870
- format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}"
871
- )
872
 
873
- with st.expander(f"{get_translation('parametres_tts')}",
874
- expanded=True,
875
- icon="🔊"):
876
- st.selectbox(
877
- get_translation("choix_voix_tts"),
878
- options=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
879
- index=3, # "onyx" est à l'index 3
880
- key="tts_voice",
881
- on_change=clear_inputs_garbages
882
- )
883
- st.checkbox(
884
- get_translation("activer_tts_texte"),
885
- key="enable_tts_for_input_from_text_field",
886
- value=True,
887
- on_change=clear_inputs_garbages
888
- )
889
- st.checkbox(
890
- get_translation("activer_tts_audio"),
891
- key="enable_tts_for_input_from_audio_record",
892
- value=True,
893
- on_change=clear_inputs_garbages
894
- )
895
- st.checkbox(
896
- get_translation("lecture_auto_tts"),
897
- key="autoplay_tts",
898
- value=True,
899
- on_change=clear_inputs_garbages
900
- )
901
 
902
 
903
 
 
21
  from audiorecorder import audiorecorder
22
  from openai import OpenAI
23
  from pydub import AudioSegment
24
+ import warnings
25
+ # Ignore DeprecationWarning
26
+ warnings.filterwarnings("ignore", category=DeprecationWarning)
27
 
28
  __version__ = "1.2.4"
29
 
 
156
  print(f"Une erreur inattendue s'est produite : {e}")
157
  return []
158
 
159
+
160
+ def transcribe_audio(audio_file: Union[str, IO], language: Optional[str] = None) -> str:
161
  """
162
  Transcrit un fichier audio en texte.
163
 
164
  Args:
165
+ audio_file (Union[str, IO]): Le chemin du fichier audio ou un objet fichier ouvert.
166
  language (Optional[str]): La langue de l'audio. Par défaut None.
167
 
168
  Returns:
169
  str: Le texte transcrit.
170
  """
171
  max_size_mb = 25
 
172
 
173
  try:
174
  with st.status("Transcription de l'audio en cours...") as status:
175
+ # Si audio_file est une chaîne, on l'ouvre comme un fichier
176
+ if isinstance(audio_file, str):
177
+ file_size_mb = os.path.getsize(audio_file) / (1024 * 1024)
178
+ audio_file_path = audio_file
179
+ else:
180
+ file_size_mb = os.path.getsize(audio_file.name) / (1024 * 1024)
181
+ audio_file_path = audio_file.name
182
+
183
  if file_size_mb > max_size_mb:
184
  status.update(label="Découpage de l'audio en segments...")
185
+ segments = split_audio(audio_file_path, max_size_mb)
186
  full_transcript = ""
187
  for i, segment in enumerate(segments):
188
  status.update(label=f"Transcription du segment {i+1}/{len(segments)}...")
 
198
  return full_transcript.strip()
199
  else:
200
  status.update(label="Transcription de l'audio...")
201
+ with open(audio_file_path, "rb") as audio_file:
202
  transcript = client.audio.transcriptions.create(
203
  model="whisper-1",
204
  file=audio_file,
 
213
  except Exception as e:
214
  st.error(f"Erreur lors de la transcription : {e}")
215
  return ""
 
216
 
217
+
218
  def detect_language(input_text: str, temperature: float = 0.01) -> str:
219
  """
220
  Détecte la langue d'un texte donné.
 
296
  except Exception as e:
297
  st.error(f"Erreur lors de la conversion texte-parole : {str(e)}")
298
  return None, 0.0
299
+ finally:
300
+ if temp_audio_path and os.path.exists(temp_audio_path):
301
+ os.remove(temp_audio_path)
302
+ if temp_dir and os.path.exists(temp_dir):
303
+ os.rmdir(temp_dir)
304
 
305
  def get_duration_pydub(audio_file: str) -> float:
306
  """
 
582
  return "", ""
583
 
584
 
585
+
586
  def main_page():
587
  """Page principale de l'application."""
588
 
 
733
  # ##
734
  audio_status.update(label=f"{get_translation('erreur_concatenation_audio')} : {str(e)}", state="error", expanded=True)
735
 
736
+
737
  # Interface utilisateur pour l'enregistrement audio
738
  # st.write(f"🗣️ {get_translation('enregistrez_message')}")
739
  elif st.session_state.audio:
740
  # Traitement de l'entrée audio de l'utilisateur
741
  if len(st.session_state.audio) > 0:
742
+ with tempfile.TemporaryDirectory() as temp_dir:
743
+ temp_audio_path = os.path.join(temp_dir, "temp_audio.wav")
744
+ st.session_state.audio.export(temp_audio_path, format="wav")
745
  st.write(f"Frame rate: {st.session_state.audio.frame_rate}, Frame width: {st.session_state.audio.frame_width}, Duration: {st.session_state.audio.duration_seconds} seconds")
746
 
747
  # Transcrire l'audio en texte
748
+ st.session_state.transcription = transcribe_audio(temp_audio_path, language=st.session_state.language_detected)
 
 
 
 
 
 
 
 
 
 
749
 
750
+ #os.remove(temp_audio_path)
751
+ # Detecter la langue du texte transcrit (si la langue source n'est pas détectée)
752
+ if st.session_state.language_detected is None:
753
+ st.session_state.language_detected = detect_language(
754
+ input_text=st.session_state.transcription, temperature=0.01
755
+ )
756
  st.markdown(
757
+ f"- {get_translation('langue_detectee')}".format(
758
+ f"{convert_iso6391_to_language_name(st.session_state.language_detected)}"
759
  )
760
  )
761
 
762
+ st.markdown(
763
+ f"🎤 {get_translation('transcription_audio')}".format(
764
+ f"{st.session_state.transcription}"
765
+ )
766
+ )
 
 
 
 
 
 
 
 
 
 
767
 
768
+
769
+ st.session_state.audio_list = []
770
+ for cursor_selected_lang in st.session_state.selected_languages:
771
+ st.session_state.target_language = cursor_selected_lang["iso-639-1"]
772
+ st.session_state.full_response = ""
773
+
774
+ # Initialisation du mode de traitement pour la langue cible actuelle
775
+ st.session_state.system_prompt, st.session_state.operation_prompt = init_process_mode(from_lang=
776
+ (
777
+ st.session_state.language_detected if "language_detected" in st.session_state.language_detected else convert_language_name_to_iso6391(
778
+ st.session_state.interface_language
779
  )
780
+ ),
781
+ to_lang=st.session_state.target_language
782
+ )
783
 
784
+ with st.chat_message("assistant", avatar="👻"):
785
+ message_placeholder = st.empty()
786
+ st.session_state.response_generator = process_message(
787
+ st.session_state.transcription,
788
+ st.session_state.operation_prompt,
789
+ st.session_state.enable_tts_for_input_from_audio_record,
790
+ st.session_state.system_prompt
791
+ )
792
 
793
+ for response_chunk in st.session_state.response_generator:
794
+ message_placeholder.markdown(response_chunk)
795
+ st.session_state.end_response = st.session_state.response_generator.close()
796
+ if st.session_state.full_response != "":
797
+ message_placeholder.markdown(st.session_state.full_response)
 
 
 
 
 
798
 
799
+ if st.session_state.enable_tts_for_input_from_audio_record:
800
+ st.session_state.tts_audio, st.session_state.tts_duration = process_tts_message(st.session_state.full_response)
801
+
802
+ if st.session_state.tts_audio:
803
+ st.session_state.audio_list.append(
804
+ ( st.session_state.tts_audio,
805
+ st.session_state.tts_duration )
806
+ )
807
+ else:
808
+ pass
 
 
 
 
 
 
 
 
 
 
 
809
 
810
+ if st.session_state.audio_list:
811
+ st.session_state.final_audio = concatenate_audio_files(st.session_state.audio_list)
812
+
813
+ with st.container(border=True):
814
+
815
+ # Générer un nom de fichier unique
816
+ st.session_state.timestamp = time.strftime("%Y%m%d-%H%M%S")
817
+ st.session_state.langues = "_".join([lang["iso-639-1"] for lang in st.session_state.selected_languages])
818
+ st.session_state.nom_fichier = f"reponse_audio_{st.session_state.langues}_{st.session_state.timestamp}.mp3"
819
+
820
+ st.audio(st.session_state.final_audio, format="audio/mp3", autoplay=st.session_state.autoplay_tts)
821
+
822
+ st.download_button(
823
+ label=f"📥 {get_translation('telecharger_audio')}",
824
+ data=st.session_state.final_audio,
825
+ file_name=st.session_state.nom_fichier,
826
+ mime="audio/mp3",
827
+ use_container_width=True,
828
+ type="primary",
829
+ key=f"download_button_{st.session_state.langues}_{st.session_state.timestamp}",
830
+ )
831
 
832
+ def clear_inputs_garbages(sessions_state_list: Optional[list] =
833
+ [ 'transcription', 'operation_prompt', 'system_prompt',
834
+ 'audio_list', 'full_response', 'tts_audio',
835
+ 'tts_duration', 'timestamp', 'langues',
836
+ 'nom_fichier', 'final_audio', 'response_generator',
837
+ 'end_response', 'messages', 'audio', 'user_input' ]
838
+ ):
839
+ def delete_session_state_var(var_name: str):
840
+ if f"{var_name}" in st.session_state:
841
+ del st.session_state[f"{var_name}"]
842
 
843
+ for it_var_name in sessions_state_list:
844
+ delete_session_state_var(it_var_name)
845
 
 
846
 
847
+ clear_inputs_garbages()
 
 
 
 
 
 
 
 
848
 
849
+ def on_languages_change() -> None:
850
+ clear_inputs_garbages()
851
+ """Fonction de rappel pour le changement de langue(s) de destination."""
852
+ selected_language_names: List[str] = st.session_state.language_selector
853
+ st.session_state.selected_languages = [
854
+ {"language": lang, "iso-639-1": convert_language_name_to_iso6391(lang)}
855
+ for lang in selected_language_names
856
+ ]
857
+
858
 
 
 
 
 
859
 
860
+ # Configuration de la barre latérale
861
+ with st.sidebar:
862
+ st.logo("img/logo_2.png", icon_image="img/logo_2.png")
863
+ st.header(get_translation("sidebar_titre"))
 
 
864
 
865
+ with st.expander(f"{get_translation('a_propos')}",
866
+ expanded=False,
867
+ icon="ℹ️"):
868
+ st.subheader(f"version: {__version__}")
869
+ st.info(get_translation("info_app"))
870
+
871
 
872
+ with st.expander(f"{get_translation('selection_langue')}",
873
+ expanded=True,
874
+ icon="🌐"):
875
+ # Conteneur pour la sélection de langue
876
+
877
+ # Sélection multiple des langues de destination
878
+ st.multiselect(
879
+ label=get_translation("langues_destination"),
880
+ placeholder=get_translation("placeholder_langues"),
881
+ options=SUPPORTED_LANGUAGES,
882
+ default=["English"],
883
+ key="language_selector",
884
+ max_selections=4,
885
+ on_change=on_languages_change,
886
+ format_func=lambda lang: f"{LANGUAGES_EMOJI.get(lang, '')} {lang}"
887
+ )
888
 
889
+ with st.expander(f"{get_translation('parametres_tts')}",
890
+ expanded=True,
891
+ icon="🔊"):
892
+ st.selectbox(
893
+ get_translation("choix_voix_tts"),
894
+ options=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
895
+ index=3, # "onyx" est à l'index 3
896
+ key="tts_voice",
897
+ on_change=clear_inputs_garbages
898
+ )
899
+ st.checkbox(
900
+ get_translation("activer_tts_texte"),
901
+ key="enable_tts_for_input_from_text_field",
902
+ value=True,
903
+ on_change=clear_inputs_garbages
904
+ )
905
+ st.checkbox(
906
+ get_translation("activer_tts_audio"),
907
+ key="enable_tts_for_input_from_audio_record",
908
+ value=True,
909
+ on_change=clear_inputs_garbages
910
+ )
911
+ st.checkbox(
912
+ get_translation("lecture_auto_tts"),
913
+ key="autoplay_tts",
914
+ value=True,
915
+ on_change=clear_inputs_garbages
916
+ )
917
 
918
 
919