avr23-cds-translation2

Running

App Files Files Community

Demosthene-OR commited on Mar 4

Commit

a545669

•

1 Parent(s): d3cbfff

Identification de la langue parlée uniquement avec whisper

Browse files

Files changed (1) hide show

tabs/modelisation_seq2seq_tab.py +20 -13

tabs/modelisation_seq2seq_tab.py CHANGED Viewed

@@ -7,7 +7,7 @@ from transformers import pipeline
 # from translate import Translator
 from deep_translator import GoogleTranslator
 from audio_recorder_streamlit import audio_recorder
-import speech_recognition as sr
 import whisper
 import io
 import wavio
@@ -488,32 +488,39 @@ def run():
             st.write("## **"+tr("Résultats")+" :**\n")
             st.audio(audio_bytes, format="audio/wav")
             try:
-                if detection:
-                    # Create a BytesIO object from the audio stream
-                    audio_stream_bytesio = io.BytesIO(audio_bytes)
-                    # Read the WAV stream using wavio
-                    wav = wavio.read(audio_stream_bytesio)
-                    # Extract the audio data from the wavio.Wav object
-                    audio_data = wav.data
-                    # Convert the audio data to a NumPy array
-                    audio_input = np.array(audio_data, dtype=np.float32)
-                    audio_input = np.mean(audio_input, axis=1)/32768
                     result = model_speech.transcribe(audio_input)
                     st.write(tr("Langue détectée")+" : "+result["language"])
                     Lang_detected = result["language"]
                     # Transcription Whisper (si result a été préalablement calculé)
                     custom_sentence = result["text"]
                 else:
                     Lang_detected = l_src
                     # Transcription google
                     audio_stream = sr.AudioData(audio_bytes, 32000, 2)
                     r = sr.Recognizer()
                     custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
                 if custom_sentence!="":
                     # Lang_detected = lang_classifier (custom_sentence)[0]['label']
                     #st.write('Langue détectée : **'+Lang_detected+'**')

 # from translate import Translator
 from deep_translator import GoogleTranslator
 from audio_recorder_streamlit import audio_recorder
+# import speech_recognition as sr
 import whisper
 import io
 import wavio
             st.write("## **"+tr("Résultats")+" :**\n")
             st.audio(audio_bytes, format="audio/wav")
             try:
+                # if detection:
+                # Create a BytesIO object from the audio stream
+                audio_stream_bytesio = io.BytesIO(audio_bytes)
+                # Read the WAV stream using wavio
+                wav = wavio.read(audio_stream_bytesio)
+                # Extract the audio data from the wavio.Wav object
+                audio_data = wav.data
+                # Convert the audio data to a NumPy array
+                audio_input = np.array(audio_data, dtype=np.float32)
+                audio_input = np.mean(audio_input, axis=1)/32768
+                if detection:
                     result = model_speech.transcribe(audio_input)
                     st.write(tr("Langue détectée")+" : "+result["language"])
                     Lang_detected = result["language"]
                     # Transcription Whisper (si result a été préalablement calculé)
                     custom_sentence = result["text"]
+                else:
+                    Lang_detected = l_src
+                    model_speech.set_language(Lang_detected)
+                    result = model_speech.transcribe(audio_input)
+                    custom_sentence = result["text"]
+                    model_speech.reset_language()
+                """
                 else:
                     Lang_detected = l_src
                     # Transcription google
                     audio_stream = sr.AudioData(audio_bytes, 32000, 2)
                     r = sr.Recognizer()
                     custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
+                """
                 if custom_sentence!="":
                     # Lang_detected = lang_classifier (custom_sentence)[0]['label']
                     #st.write('Langue détectée : **'+Lang_detected+'**')