Demosthene-OR commited on
Commit
ec595bb
1 Parent(s): 14f4ac2

Retour en arrirer sure requirements, et ses2seq

Browse files
requirements.txt CHANGED
@@ -21,6 +21,7 @@ tensorflow==2.12.0
21
  sentencepiece==0.1.99
22
  openai-whisper==20231117
23
  torch==2.2.0
 
24
  audio_recorder_streamlit==0.0.8
25
  whisper==1.1.10
26
  wavio==0.0.8
 
21
  sentencepiece==0.1.99
22
  openai-whisper==20231117
23
  torch==2.2.0
24
+ speechrecognition==3.10.1
25
  audio_recorder_streamlit==0.0.8
26
  whisper==1.1.10
27
  wavio==0.0.8
tabs/exploration_tab.py CHANGED
@@ -7,6 +7,7 @@ from nltk import download
7
  from ast import literal_eval
8
  from translate_app import tr
9
  if st.session_state.Cloud == 0:
 
10
  import contextlib
11
  import re
12
  from nltk.corpus import stopwords
 
7
  from ast import literal_eval
8
  from translate_app import tr
9
  if st.session_state.Cloud == 0:
10
+ # import nltk
11
  import contextlib
12
  import re
13
  from nltk.corpus import stopwords
tabs/modelisation_seq2seq_tab.py CHANGED
@@ -7,7 +7,7 @@ from transformers import pipeline
7
  # from translate import Translator
8
  from deep_translator import GoogleTranslator
9
  from audio_recorder_streamlit import audio_recorder
10
- # import speech_recognition as sr
11
  import whisper
12
  import io
13
  import wavio
@@ -19,6 +19,7 @@ from tensorflow import keras
19
  from keras_nlp.layers import TransformerEncoder
20
  from tensorflow.keras import layers
21
  from tensorflow.keras.utils import plot_model
 
22
  from gtts import gTTS
23
  from extra_streamlit_components import tab_bar, TabBarItemData
24
  from translate_app import tr
@@ -488,7 +489,6 @@ def run():
488
  st.write("## **"+tr("Résultats")+" :**\n")
489
  st.audio(audio_bytes, format="audio/wav")
490
  try:
491
- # if detection:
492
  # Create a BytesIO object from the audio stream
493
  audio_stream_bytesio = io.BytesIO(audio_bytes)
494
 
@@ -501,16 +501,28 @@ def run():
501
  # Convert the audio data to a NumPy array
502
  audio_input = np.array(audio_data, dtype=np.float32)
503
  audio_input = np.mean(audio_input, axis=1)/32768
504
- if detection:
 
505
  result = model_speech.transcribe(audio_input)
506
  st.write(tr("Langue détectée")+" : "+result["language"])
507
  Lang_detected = result["language"]
508
  # Transcription Whisper (si result a été préalablement calculé)
509
  custom_sentence = result["text"]
510
  else:
 
 
 
 
 
 
 
 
 
511
  Lang_detected = l_src
512
  result = model_speech.transcribe(audio_input, language=Lang_detected)
513
  custom_sentence = result["text"]
 
 
514
  if custom_sentence!="":
515
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
516
  #st.write('Langue détectée : **'+Lang_detected+'**')
 
7
  # from translate import Translator
8
  from deep_translator import GoogleTranslator
9
  from audio_recorder_streamlit import audio_recorder
10
+ import speech_recognition as sr
11
  import whisper
12
  import io
13
  import wavio
 
19
  from keras_nlp.layers import TransformerEncoder
20
  from tensorflow.keras import layers
21
  from tensorflow.keras.utils import plot_model
22
+ # from PIL import Image
23
  from gtts import gTTS
24
  from extra_streamlit_components import tab_bar, TabBarItemData
25
  from translate_app import tr
 
489
  st.write("## **"+tr("Résultats")+" :**\n")
490
  st.audio(audio_bytes, format="audio/wav")
491
  try:
 
492
  # Create a BytesIO object from the audio stream
493
  audio_stream_bytesio = io.BytesIO(audio_bytes)
494
 
 
501
  # Convert the audio data to a NumPy array
502
  audio_input = np.array(audio_data, dtype=np.float32)
503
  audio_input = np.mean(audio_input, axis=1)/32768
504
+
505
+ if detection:
506
  result = model_speech.transcribe(audio_input)
507
  st.write(tr("Langue détectée")+" : "+result["language"])
508
  Lang_detected = result["language"]
509
  # Transcription Whisper (si result a été préalablement calculé)
510
  custom_sentence = result["text"]
511
  else:
512
+ # Avec l'aide de la bibliothèque speech_recognition de Google
513
+ Lang_detected = l_src
514
+ # Transcription google
515
+ audio_stream = sr.AudioData(audio_bytes, 32000, 2)
516
+ r = sr.Recognizer()
517
+ custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
518
+
519
+ # Sans la bibliothèque speech_recognition, uniquement avec Whisper
520
+ '''
521
  Lang_detected = l_src
522
  result = model_speech.transcribe(audio_input, language=Lang_detected)
523
  custom_sentence = result["text"]
524
+ '''
525
+
526
  if custom_sentence!="":
527
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
528
  #st.write('Langue détectée : **'+Lang_detected+'**')