Demosthene-OR commited on
Commit
b49c7c6
1 Parent(s): 7adca4e

Allegements

Browse files
requirements.txt CHANGED
@@ -6,16 +6,13 @@ numpy==1.23.5
6
  seaborn==0.13.2
7
  nltk==3.8.1
8
  scikit-learn==1.1.3
9
- scikit-learn-extra==0.3.0
10
  gensim==4.3.2
11
  sacrebleu==2.4.0
12
- pyspellchecker==0.8.1
13
  spacy==3.6.0
14
  https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
15
  https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.6.0/fr_core_news_sm-3.6.0.tar.gz
16
  pillow==9.5.0
17
  wordcloud==1.9.3
18
- pathlib==1.0.1
19
  networkx==2.7.0
20
  transformers==4.37.2
21
  keras-nlp==0.6.1
@@ -23,13 +20,9 @@ keras==2.12.0
23
  tensorflow==2.12.0
24
  sentencepiece==0.1.99
25
  openai-whisper==20231117
26
- sounddevice==0.4.6
27
  torch==2.2.0
28
- xformers==0.0.24
29
- translate==3.6.1
30
  speechrecognition==3.10.1
31
  audio_recorder_streamlit==0.0.8
32
- wave==0.0.2
33
  whisper==1.1.10
34
  wavio==0.0.8
35
  filesplit==4.0.1
@@ -39,7 +32,4 @@ graphviz==0.20.1
39
  gTTS==2.5.1
40
  https://files.pythonhosted.org/packages/cc/58/96aff0e5cb8b59c06232ea7e249ed902d04ec89f52636f5be06ceb0855fe/extra_streamlit_components-0.1.60-py3-none-any.whl
41
  streamlit-option-menu==0.3.12
42
- plotly==5.18.0
43
- bokeh==3.3.4
44
- shap==0.44.1
45
  deep-translator==1.11.4
 
6
  seaborn==0.13.2
7
  nltk==3.8.1
8
  scikit-learn==1.1.3
 
9
  gensim==4.3.2
10
  sacrebleu==2.4.0
 
11
  spacy==3.6.0
12
  https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.6.0/en_core_web_sm-3.6.0.tar.gz
13
  https://github.com/explosion/spacy-models/releases/download/fr_core_news_sm-3.6.0/fr_core_news_sm-3.6.0.tar.gz
14
  pillow==9.5.0
15
  wordcloud==1.9.3
 
16
  networkx==2.7.0
17
  transformers==4.37.2
18
  keras-nlp==0.6.1
 
20
  tensorflow==2.12.0
21
  sentencepiece==0.1.99
22
  openai-whisper==20231117
 
23
  torch==2.2.0
 
 
24
  speechrecognition==3.10.1
25
  audio_recorder_streamlit==0.0.8
 
26
  whisper==1.1.10
27
  wavio==0.0.8
28
  filesplit==4.0.1
 
32
  gTTS==2.5.1
33
  https://files.pythonhosted.org/packages/cc/58/96aff0e5cb8b59c06232ea7e249ed902d04ec89f52636f5be06ceb0855fe/extra_streamlit_components-0.1.60-py3-none-any.whl
34
  streamlit-option-menu==0.3.12
 
 
 
35
  deep-translator==1.11.4
tabs/data_viz_tab.py CHANGED
@@ -7,9 +7,6 @@ import numpy as np
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
  import seaborn as sns
10
- import plotly.express as px
11
- import plotly.graph_objects as go
12
- import plotly.figure_factory as ff
13
  from wordcloud import WordCloud
14
  import nltk
15
  from nltk.corpus import stopwords
 
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
  import seaborn as sns
 
 
 
10
  from wordcloud import WordCloud
11
  import nltk
12
  from nltk.corpus import stopwords
tabs/exploration_tab.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  import os
3
- import numpy as np
4
  import pandas as pd
5
  import collections
6
  from nltk.tokenize import word_tokenize
@@ -8,7 +7,7 @@ from nltk import download
8
  from ast import literal_eval
9
  from translate_app import tr
10
  if st.session_state.Cloud == 0:
11
- import nltk
12
  import contextlib
13
  import re
14
  from nltk.corpus import stopwords
 
1
  import streamlit as st
2
  import os
 
3
  import pandas as pd
4
  import collections
5
  from nltk.tokenize import word_tokenize
 
7
  from ast import literal_eval
8
  from translate_app import tr
9
  if st.session_state.Cloud == 0:
10
+ # import nltk
11
  import contextlib
12
  import re
13
  from nltk.corpus import stopwords
tabs/id_lang_tab.py CHANGED
@@ -1,10 +1,8 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
4
- import os
5
  import matplotlib.pyplot as plt
6
  import tiktoken
7
- import random
8
  import joblib
9
  import json
10
  import csv
@@ -12,7 +10,6 @@ from transformers import pipeline
12
  import keras
13
  from tensorflow.keras.preprocessing.sequence import pad_sequences
14
  from sklearn.preprocessing import LabelEncoder
15
- from sklearn.feature_extraction.text import CountVectorizer
16
  from tensorflow.keras.utils import plot_model
17
  from filesplit.merge import Merge
18
  from extra_streamlit_components import tab_bar, TabBarItemData
 
1
  import streamlit as st
2
  import pandas as pd
3
  import numpy as np
 
4
  import matplotlib.pyplot as plt
5
  import tiktoken
 
6
  import joblib
7
  import json
8
  import csv
 
10
  import keras
11
  from tensorflow.keras.preprocessing.sequence import pad_sequences
12
  from sklearn.preprocessing import LabelEncoder
 
13
  from tensorflow.keras.utils import plot_model
14
  from filesplit.merge import Merge
15
  from extra_streamlit_components import tab_bar, TabBarItemData
tabs/modelisation_dict_tab.py CHANGED
@@ -1,6 +1,5 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
  import os
5
  from sacrebleu import corpus_bleu
6
  if st.session_state.Cloud == 0:
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import os
4
  from sacrebleu import corpus_bleu
5
  if st.session_state.Cloud == 0:
tabs/modelisation_seq2seq_tab.py CHANGED
@@ -4,12 +4,12 @@ import numpy as np
4
  import os
5
  from sacrebleu import corpus_bleu
6
  from transformers import pipeline
7
- from translate import Translator
 
8
  from audio_recorder_streamlit import audio_recorder
9
  import speech_recognition as sr
10
  import whisper
11
  import io
12
- # import wave
13
  import wavio
14
  from filesplit.merge import Merge
15
  import tensorflow as tf
@@ -19,7 +19,7 @@ from tensorflow import keras
19
  from keras_nlp.layers import TransformerEncoder
20
  from tensorflow.keras import layers
21
  from tensorflow.keras.utils import plot_model
22
- from PIL import Image
23
  from gtts import gTTS
24
  from extra_streamlit_components import tab_bar, TabBarItemData
25
  from translate_app import tr
@@ -463,7 +463,8 @@ def run():
463
  with col2:
464
  st.write(":red[**Trad. Google Translate**]")
465
  try:
466
- translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
 
467
  if custom_sentence!="":
468
  translation = translator.translate(custom_sentence)
469
  st.write("**"+l_tgt+" :** "+translation)
@@ -488,31 +489,39 @@ def run():
488
  st.write("## **"+tr("Résultats")+" :**\n")
489
  st.audio(audio_bytes, format="audio/wav")
490
  try:
491
- if detection:
492
- # Create a BytesIO object from the audio stream
493
- audio_stream_bytesio = io.BytesIO(audio_bytes)
494
 
495
- # Read the WAV stream using wavio
496
- wav = wavio.read(audio_stream_bytesio)
497
 
498
- # Extract the audio data from the wavio.Wav object
499
- audio_data = wav.data
500
 
501
- # Convert the audio data to a NumPy array
502
- audio_input = np.array(audio_data, dtype=np.float32)
503
- audio_input = np.mean(audio_input, axis=1)/32768
504
-
 
505
  result = model_speech.transcribe(audio_input)
506
  st.write(tr("Langue détectée")+" : "+result["language"])
507
  Lang_detected = result["language"]
508
  # Transcription Whisper (si result a été préalablement calculé)
509
  custom_sentence = result["text"]
510
  else:
 
511
  Lang_detected = l_src
512
  # Transcription google
513
  audio_stream = sr.AudioData(audio_bytes, 32000, 2)
514
  r = sr.Recognizer()
515
  custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
 
 
 
 
 
 
 
516
 
517
  if custom_sentence!="":
518
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
@@ -520,7 +529,8 @@ def run():
520
  st.write("")
521
  st.write("**"+Lang_detected+" :** :blue["+custom_sentence+"]")
522
  st.write("")
523
- translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
 
524
  translation = translator.translate(custom_sentence)
525
  st.write("**"+l_tgt+" :** "+translation)
526
  st.write("")
 
4
  import os
5
  from sacrebleu import corpus_bleu
6
  from transformers import pipeline
7
+ # from translate import Translator
8
+ from deep_translator import GoogleTranslator
9
  from audio_recorder_streamlit import audio_recorder
10
  import speech_recognition as sr
11
  import whisper
12
  import io
 
13
  import wavio
14
  from filesplit.merge import Merge
15
  import tensorflow as tf
 
19
  from keras_nlp.layers import TransformerEncoder
20
  from tensorflow.keras import layers
21
  from tensorflow.keras.utils import plot_model
22
+ # from PIL import Image
23
  from gtts import gTTS
24
  from extra_streamlit_components import tab_bar, TabBarItemData
25
  from translate_app import tr
 
463
  with col2:
464
  st.write(":red[**Trad. Google Translate**]")
465
  try:
466
+ # translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
467
+ translator = GoogleTranslator(source=Lang_detected, target=l_tgt)
468
  if custom_sentence!="":
469
  translation = translator.translate(custom_sentence)
470
  st.write("**"+l_tgt+" :** "+translation)
 
489
  st.write("## **"+tr("Résultats")+" :**\n")
490
  st.audio(audio_bytes, format="audio/wav")
491
  try:
492
+ # Create a BytesIO object from the audio stream
493
+ audio_stream_bytesio = io.BytesIO(audio_bytes)
 
494
 
495
+ # Read the WAV stream using wavio
496
+ wav = wavio.read(audio_stream_bytesio)
497
 
498
+ # Extract the audio data from the wavio.Wav object
499
+ audio_data = wav.data
500
 
501
+ # Convert the audio data to a NumPy array
502
+ audio_input = np.array(audio_data, dtype=np.float32)
503
+ audio_input = np.mean(audio_input, axis=1)/32768
504
+
505
+ if detection:
506
  result = model_speech.transcribe(audio_input)
507
  st.write(tr("Langue détectée")+" : "+result["language"])
508
  Lang_detected = result["language"]
509
  # Transcription Whisper (si result a été préalablement calculé)
510
  custom_sentence = result["text"]
511
  else:
512
+ # Avec l'aide de la bibliothèque speech_recognition de Google
513
  Lang_detected = l_src
514
  # Transcription google
515
  audio_stream = sr.AudioData(audio_bytes, 32000, 2)
516
  r = sr.Recognizer()
517
  custom_sentence = r.recognize_google(audio_stream, language = Lang_detected)
518
+
519
+ # Sans la bibliothèque speech_recognition, uniquement avec Whisper
520
+ '''
521
+ Lang_detected = l_src
522
+ result = model_speech.transcribe(audio_input, language=Lang_detected)
523
+ custom_sentence = result["text"]
524
+ '''
525
 
526
  if custom_sentence!="":
527
  # Lang_detected = lang_classifier (custom_sentence)[0]['label']
 
529
  st.write("")
530
  st.write("**"+Lang_detected+" :** :blue["+custom_sentence+"]")
531
  st.write("")
532
+ # translator = Translator(to_lang=l_tgt, from_lang=Lang_detected)
533
+ translator = GoogleTranslator(source=Lang_detected, target=l_tgt)
534
  translation = translator.translate(custom_sentence)
535
  st.write("**"+l_tgt+" :** "+translation)
536
  st.write("")