yvankob commited on
Commit
8b7db2a
·
1 Parent(s): e5df979

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -139
app.py CHANGED
@@ -8,8 +8,6 @@ import tempfile
8
  from transformers.pipelines.audio_utils import ffmpeg_read
9
  from gradio.components import Audio, Dropdown, Radio, Textbox
10
  import os
11
- import numpy as np
12
- import soundfile as sf
13
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
14
 
15
 
@@ -47,66 +45,33 @@ def load_models():
47
 
48
  load_models()
49
 
50
- model_size = "large-v2"
51
- model = WhisperModel(model_size)
52
-
53
  # Fonction pour la transcription
54
  def transcribe_audio(audio_file):
55
- # model_size = "large-v2"
56
- # model = WhisperModel(model_size)
57
  # model = WhisperModel(model_size, device=device, compute_type="int8")
58
- global model
59
  segments, _ = model.transcribe(audio_file, beam_size=1)
60
  transcriptions = [("[%.2fs -> %.2fs]" % (seg.start, seg.end), seg.text) for seg in segments]
61
  return transcriptions
62
 
63
- # Fonction pour la traduction
64
  # Fonction pour la traduction
65
  def traduction(text, source_lang, target_lang):
66
- # Vérifier si les codes de langue sont dans flores_codes
67
- if source_lang not in flores_codes or target_lang not in flores_codes:
68
- print(f"Code de langue non trouvé : {source_lang} ou {target_lang}")
69
- return ""
70
-
71
- src_code = flores_codes[source_lang]
72
- tgt_code = flores_codes[target_lang]
73
-
74
  model_name = "nllb-distilled-600M"
75
  model = model_dict[model_name + "_model"]
76
  tokenizer = model_dict[model_name + "_tokenizer"]
77
  translator = pipeline("translation", model=model, tokenizer=tokenizer)
78
-
79
- return translator(text, src_lang=src_code, tgt_lang=tgt_code)[0]["translation_text"]
80
-
81
 
82
  # Fonction principale
83
- def full_transcription_and_translation(audio_input, source_lang, target_lang):
84
- # Si audio_input est une URL
85
- if isinstance(audio_input, str) and audio_input.startswith("http"):
86
- audio_file = download_yt_audio(audio_input)
87
- # Si audio_input est un dictionnaire contenant des données audio
88
- elif isinstance(audio_input, dict) and "array" in audio_input and "sampling_rate" in audio_input:
89
- audio_array = audio_input["array"]
90
- sampling_rate = audio_input["sampling_rate"]
91
- # Écrivez le tableau NumPy dans un fichier temporaire WAV
92
- with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as f:
93
- sf.write(f, audio_array, sampling_rate)
94
- audio_file = f.name
95
- else:
96
- # Supposons que c'est un chemin de fichier
97
- audio_file = audio_input
98
-
99
  transcriptions = transcribe_audio(audio_file)
100
  translations = [(timestamp, traduction(text, source_lang, target_lang)) for timestamp, text in transcriptions]
101
-
102
- # Supprimez le fichier temporaire s'il a été créé
103
- if isinstance(audio_input, dict):
104
- os.remove(audio_file)
105
-
106
  return transcriptions, translations
107
 
108
  # Téléchargement audio YouTube
109
- """def download_yt_audio(yt_url):
110
  with tempfile.NamedTemporaryFile(suffix='.mp3') as f:
111
  ydl_opts = {
112
  'format': 'bestaudio/best',
@@ -119,7 +84,7 @@ def full_transcription_and_translation(audio_input, source_lang, target_lang):
119
  }
120
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
121
  ydl.download([yt_url])
122
- return f.name"""
123
 
124
  lang_codes = list(flores_codes.keys())
125
 
@@ -132,105 +97,17 @@ def gradio_interface(audio_file, source_lang, target_lang):
132
  translated_text = '\n'.join([f"{timestamp}: {text}" for timestamp, text in translations])
133
  return transcribed_text, translated_text
134
 
135
-
136
- def _return_yt_html_embed(yt_url):
137
- video_id = yt_url.split("?v=")[-1]
138
- HTML_str = (
139
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
140
- " </center>"
141
- )
142
- return HTML_str
143
-
144
- def download_yt_audio(yt_url, filename):
145
- info_loader = youtube_dl.YoutubeDL()
146
-
147
- try:
148
- info = info_loader.extract_info(yt_url, download=False)
149
- except youtube_dl.utils.DownloadError as err:
150
- raise gr.Error(str(err))
151
-
152
- file_length = info["duration_string"]
153
- file_h_m_s = file_length.split(":")
154
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
155
-
156
- if len(file_h_m_s) == 1:
157
- file_h_m_s.insert(0, 0)
158
- if len(file_h_m_s) == 2:
159
- file_h_m_s.insert(0, 0)
160
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
161
-
162
- if file_length_s > YT_LENGTH_LIMIT_S:
163
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
164
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
165
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
166
-
167
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
168
-
169
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
170
- try:
171
- ydl.download([yt_url])
172
- except youtube_dl.utils.ExtractorError as err:
173
- raise gr.Error(str(err))
174
-
175
-
176
- def yt_transcribe(yt_url, task, max_filesize=75.0):
177
- html_embed_str = _return_yt_html_embed(yt_url)
178
- global model # Assurez-vous que model est accessibl
179
-
180
- with tempfile.TemporaryDirectory() as tmpdirname:
181
- filepath = os.path.join(tmpdirname, "video.mp4")
182
- download_yt_audio(yt_url, filepath)
183
- with open(filepath, "rb") as f:
184
- inputs = f.read()
185
-
186
- inputs = ffmpeg_read(inputs, model.feature_extractor.sampling_rate)
187
- inputs = {"array": inputs, "sampling_rate": model.feature_extractor.sampling_rate}
188
-
189
- transcriptions, translations = full_transcription_and_translation(inputs, source_lang, target_lang)
190
- transcribed_text = '\n'.join([f"{timestamp}: {text}" for timestamp, text in transcriptions])
191
- translated_text = '\n'.join([f"{timestamp}: {text}" for timestamp, text in translations])
192
- return html_embed_str, transcribed_text, translated_text
193
-
194
-
195
- # Interfaces
196
- demo = gr.Blocks()
197
-
198
- mf_transcribe = gr.Interface(
199
  fn=gradio_interface,
200
  inputs=[
201
- gr.Audio(sources=["microphone"], type="filepath"),
202
- gr.Dropdown(lang_codes, value='French', label='Source Language'),
203
- gr.Dropdown(lang_codes, value='English', label='Target Language')
204
  ],
205
  outputs=[
206
- gr.Textbox(label="Transcribed Text"),
207
- gr.Textbox(label="Translated Text")]
 
208
  )
209
 
210
- file_transcribe = gr.Interface(
211
- fn=gradio_interface,
212
- inputs=[
213
- gr.Audio(type="filepath", label="Audio file"),
214
- gr.Dropdown(lang_codes, value='French', label='Source Language'),
215
- gr.Dropdown(lang_codes, value='English', label='Target Language')
216
- ],
217
- outputs=[
218
- gr.Textbox(label="Transcribed Text"),
219
- gr.Textbox(label="Translated Text")]
220
- )
221
-
222
- yt_transcribe = gr.Interface(
223
- fn=yt_transcribe,
224
- inputs=[
225
- gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
226
- gr.Dropdown(lang_codes, value='French', label='Source Language'),
227
- gr.Dropdown(lang_codes, value='English', label='Target Language')
228
- ],
229
- outputs=["html", gr.Textbox(label="Transcribed Text"), gr.Textbox(label="Translated Text")]
230
- )
231
-
232
- with demo:
233
- gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
234
-
235
-
236
- demo.launch()
 
8
  from transformers.pipelines.audio_utils import ffmpeg_read
9
  from gradio.components import Audio, Dropdown, Radio, Textbox
10
  import os
 
 
11
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
12
 
13
 
 
45
 
46
  load_models()
47
 
 
 
 
48
  # Fonction pour la transcription
49
  def transcribe_audio(audio_file):
50
+ model_size = "large-v2"
51
+ model = WhisperModel(model_size)
52
  # model = WhisperModel(model_size, device=device, compute_type="int8")
 
53
  segments, _ = model.transcribe(audio_file, beam_size=1)
54
  transcriptions = [("[%.2fs -> %.2fs]" % (seg.start, seg.end), seg.text) for seg in segments]
55
  return transcriptions
56
 
 
57
  # Fonction pour la traduction
58
  def traduction(text, source_lang, target_lang):
 
 
 
 
 
 
 
 
59
  model_name = "nllb-distilled-600M"
60
  model = model_dict[model_name + "_model"]
61
  tokenizer = model_dict[model_name + "_tokenizer"]
62
  translator = pipeline("translation", model=model, tokenizer=tokenizer)
63
+ return translator(text, src_lang=flores_codes[source_lang], tgt_lang=flores_codes[target_lang])[0]["translation_text"]
 
 
64
 
65
  # Fonction principale
66
+ def full_transcription_and_translation(audio_file, source_lang, target_lang):
67
+ if audio_file.startswith("http"):
68
+ audio_file = download_yt_audio(audio_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  transcriptions = transcribe_audio(audio_file)
70
  translations = [(timestamp, traduction(text, source_lang, target_lang)) for timestamp, text in transcriptions]
 
 
 
 
 
71
  return transcriptions, translations
72
 
73
  # Téléchargement audio YouTube
74
+ def download_yt_audio(yt_url):
75
  with tempfile.NamedTemporaryFile(suffix='.mp3') as f:
76
  ydl_opts = {
77
  'format': 'bestaudio/best',
 
84
  }
85
  with youtube_dl.YoutubeDL(ydl_opts) as ydl:
86
  ydl.download([yt_url])
87
+ return f.name
88
 
89
  lang_codes = list(flores_codes.keys())
90
 
 
97
  translated_text = '\n'.join([f"{timestamp}: {text}" for timestamp, text in translations])
98
  return transcribed_text, translated_text
99
 
100
+ iface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  fn=gradio_interface,
102
  inputs=[
103
+ gr.Audio(type="filepath"),
104
+ gr.Dropdown(lang_codes, value='French', label='Source Language'),
105
+ gr.Dropdown(lang_codes, value='English', label='Target Language'),
106
  ],
107
  outputs=[
108
+ gr.Textbox(label="Transcribed Text"),
109
+ gr.Textbox(label="Translated Text")
110
+ ]
111
  )
112
 
113
+ iface.launch()