Demosthene-OR commited on
Commit
a545669
1 Parent(s): d3cbfff

Identification de la langue parlée uniquement avec whisper

Browse files
Files changed (1) hide show
  1. tabs/modelisation_seq2seq_tab.py +20 -13
tabs/modelisation_seq2seq_tab.py CHANGED
@@ -7,7 +7,7 @@ from transformers import pipeline
7
  # from translate import Translator
8
  from deep_translator import GoogleTranslator
9
  from audio_recorder_streamlit import audio_recorder
10
- import speech_recognition as sr
11
  import whisper
12
  import io
13
  import wavio
@@ -488,32 +488,39 @@ def run():
488
  st.write("## **"+tr("Résultats")+" :**\n")
489
  st.audio(audio_bytes, format="audio/wav")
490
  try:
491
- if detection:
492
- # Create a BytesIO object from the audio stream
493
- audio_stream_bytesio = io.BytesIO(audio_bytes)
494
 
495
- # Read the WAV stream using wavio
496
- wav = wavio.read(audio_stream_bytesio)
497
 
498
- # Extract the audio data from the wavio.Wav object
499
- audio_data = wav.data
500
 
501
- # Convert the audio data to a NumPy array
502
- audio_input = np.array(audio_data, dtype=np.float32)
503
- audio_input = np.mean(audio_input, axis=1)/32768
504
-
505
  result = model_speech.transcribe(audio_input)
506
  st.write(tr("Langue détectée")+" : "+result["language"])
507
  Lang_detected = result["language"]
508
  # Transcription Whisper (si result a été préalablement calculé)
509
  custom_sentence = result["text"]
 
 
 
 
 
 
 
510
  else:
511
  Lang_detected = l_src
512
  # Transcription google
513
  audio_stream = sr.AudioData(audio_bytes, 32000, 2)
514
  r = sr.Recognizer()
515
  custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
516
-
517
  if custom_sentence!="":
518
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
519
  #st.write('Langue détectée : **'+Lang_detected+'**')
 
7
  # from translate import Translator
8
  from deep_translator import GoogleTranslator
9
  from audio_recorder_streamlit import audio_recorder
10
+ # import speech_recognition as sr
11
  import whisper
12
  import io
13
  import wavio
 
488
  st.write("## **"+tr("Résultats")+" :**\n")
489
  st.audio(audio_bytes, format="audio/wav")
490
  try:
491
+ # if detection:
492
+ # Create a BytesIO object from the audio stream
493
+ audio_stream_bytesio = io.BytesIO(audio_bytes)
494
 
495
+ # Read the WAV stream using wavio
496
+ wav = wavio.read(audio_stream_bytesio)
497
 
498
+ # Extract the audio data from the wavio.Wav object
499
+ audio_data = wav.data
500
 
501
+ # Convert the audio data to a NumPy array
502
+ audio_input = np.array(audio_data, dtype=np.float32)
503
+ audio_input = np.mean(audio_input, axis=1)/32768
504
+ if detection:
505
  result = model_speech.transcribe(audio_input)
506
  st.write(tr("Langue détectée")+" : "+result["language"])
507
  Lang_detected = result["language"]
508
  # Transcription Whisper (si result a été préalablement calculé)
509
  custom_sentence = result["text"]
510
+ else:
511
+ Lang_detected = l_src
512
+ model_speech.set_language(Lang_detected)
513
+ result = model_speech.transcribe(audio_input)
514
+ custom_sentence = result["text"]
515
+ model_speech.reset_language()
516
+ """
517
  else:
518
  Lang_detected = l_src
519
  # Transcription google
520
  audio_stream = sr.AudioData(audio_bytes, 32000, 2)
521
  r = sr.Recognizer()
522
  custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
523
+ """
524
  if custom_sentence!="":
525
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
526
  #st.write('Langue détectée : **'+Lang_detected+'**')