Update app.py
Browse files
app.py
CHANGED
@@ -90,17 +90,26 @@ def parse_simplified_diarization(simplified_text):
|
|
90 |
matches = re.findall(pattern, simplified_text, re.MULTILINE)
|
91 |
return [(speaker, text.strip()) for speaker, text in matches]
|
92 |
|
|
|
|
|
|
|
|
|
|
|
93 |
def process_transcription(*args):
|
94 |
generator = transcribe_and_diarize(*args)
|
95 |
for progress_message, raw_text, speaker_transcription in generator:
|
96 |
pass # Consommer le générateur jusqu'à la fin
|
97 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
98 |
-
|
|
|
|
|
99 |
|
100 |
def process_yt_transcription(*args):
|
101 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
102 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
103 |
-
|
|
|
|
|
104 |
|
105 |
|
106 |
# New functions for progress indicator
|
@@ -168,15 +177,21 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
|
168 |
progress(1.0, desc="Terminé!")
|
169 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
170 |
|
171 |
-
def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None):
|
172 |
metadata = {
|
173 |
"Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
|
174 |
"Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
|
175 |
"Lieu": location if location else "[non spécifié]",
|
|
|
176 |
"Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
|
177 |
"Contexte": context if context else "[non spécifié]"
|
178 |
}
|
179 |
|
|
|
|
|
|
|
|
|
|
|
180 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
181 |
|
182 |
try:
|
@@ -186,7 +201,8 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
186 |
if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
|
187 |
formatted_transcription = []
|
188 |
for speaker, text in speaker_transcription:
|
189 |
-
|
|
|
190 |
transcription_text = "\n\n".join(formatted_transcription)
|
191 |
else:
|
192 |
raise ValueError("Invalid speaker transcription format")
|
@@ -368,6 +384,13 @@ with demo:
|
|
368 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
369 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
370 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
371 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
372 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
373 |
|
@@ -389,6 +412,13 @@ with demo:
|
|
389 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
390 |
mic_audio_duration = gr.Textbox(label="⏱️ Durée de l'enregistrement (mm:ss)")
|
391 |
mic_location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
392 |
mic_speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
393 |
mic_context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
394 |
|
@@ -411,6 +441,13 @@ with demo:
|
|
411 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
412 |
yt_audio_duration = gr.Textbox(label="⏱️ Durée de la vidéo (mm:ss)")
|
413 |
yt_channel = gr.Textbox(label="📺 Nom de la chaîne YouTube")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
414 |
yt_publish_date = gr.Textbox(label="📅 Date de publication")
|
415 |
yt_context = gr.Textbox(label="📝 Contexte de la vidéo")
|
416 |
|
@@ -441,41 +478,41 @@ with demo:
|
|
441 |
|
442 |
# Connexions des boutons aux fonctions appropriées
|
443 |
transcribe_button.click(
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
)
|
448 |
-
|
449 |
format_button.click(
|
450 |
format_to_markdown,
|
451 |
-
inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context],
|
452 |
outputs=formatted_output
|
453 |
)
|
454 |
-
|
455 |
mic_transcribe_button.click(
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
)
|
460 |
-
|
461 |
mic_format_button.click(
|
462 |
format_to_markdown,
|
463 |
-
inputs=[mic_raw_output, mic_speaker_output,
|
464 |
outputs=mic_formatted_output
|
465 |
)
|
466 |
-
|
467 |
yt_transcribe_button.click(
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
)
|
472 |
-
|
473 |
yt_format_button.click(
|
474 |
format_to_markdown,
|
475 |
-
inputs=[yt_raw_output, yt_speaker_output,
|
476 |
outputs=yt_formatted_output
|
477 |
)
|
478 |
-
|
479 |
|
480 |
if __name__ == "__main__":
|
481 |
demo.queue().launch()
|
|
|
90 |
matches = re.findall(pattern, simplified_text, re.MULTILINE)
|
91 |
return [(speaker, text.strip()) for speaker, text in matches]
|
92 |
|
93 |
+
def count_unique_speakers(speaker_transcription):
|
94 |
+
if isinstance(speaker_transcription, str):
|
95 |
+
speaker_transcription = parse_simplified_diarization(speaker_transcription)
|
96 |
+
return len(set(speaker for speaker, _ in speaker_transcription))
|
97 |
+
|
98 |
def process_transcription(*args):
|
99 |
generator = transcribe_and_diarize(*args)
|
100 |
for progress_message, raw_text, speaker_transcription in generator:
|
101 |
pass # Consommer le générateur jusqu'à la fin
|
102 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
103 |
+
num_speakers = count_unique_speakers(speaker_transcription)
|
104 |
+
speaker_names = [{"Speaker ID": f"SPEAKER_{i:02d}", "Nom personnalisé": f"SPEAKER_{i:02d}"} for i in range(num_speakers)]
|
105 |
+
return progress_message, raw_text, simplified_diarization, num_speakers, speaker_names
|
106 |
|
107 |
def process_yt_transcription(*args):
|
108 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
109 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
110 |
+
num_speakers = count_unique_speakers(speaker_transcription)
|
111 |
+
speaker_names = [{"Speaker ID": f"SPEAKER_{i:02d}", "Nom personnalisé": f"SPEAKER_{i:02d}"} for i in range(num_speakers)]
|
112 |
+
return html_embed, raw_text, simplified_diarization, num_speakers, speaker_names
|
113 |
|
114 |
|
115 |
# New functions for progress indicator
|
|
|
177 |
progress(1.0, desc="Terminé!")
|
178 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
179 |
|
180 |
+
def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None, num_speakers=None, speaker_names=None):
|
181 |
metadata = {
|
182 |
"Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
|
183 |
"Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
|
184 |
"Lieu": location if location else "[non spécifié]",
|
185 |
+
"Nombre d'interlocuteurs": num_speakers if num_speakers else "[non spécifié]",
|
186 |
"Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
|
187 |
"Contexte": context if context else "[non spécifié]"
|
188 |
}
|
189 |
|
190 |
+
# Ajoutez les noms personnalisés des interlocuteurs aux métadonnées
|
191 |
+
if speaker_names is not None:
|
192 |
+
for row in speaker_names:
|
193 |
+
metadata[row["Speaker ID"]] = row["Nom personnalisé"]
|
194 |
+
|
195 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
196 |
|
197 |
try:
|
|
|
201 |
if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
|
202 |
formatted_transcription = []
|
203 |
for speaker, text in speaker_transcription:
|
204 |
+
custom_name = next((row["Nom personnalisé"] for row in speaker_names if row["Speaker ID"] == speaker), speaker)
|
205 |
+
formatted_transcription.append(f"**{custom_name}**: {text}")
|
206 |
transcription_text = "\n\n".join(formatted_transcription)
|
207 |
else:
|
208 |
raise ValueError("Invalid speaker transcription format")
|
|
|
384 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
385 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
386 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
387 |
+
num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
|
388 |
+
speaker_names = gr.Dataframe(
|
389 |
+
headers=["Speaker ID", "Nom personnalisé"],
|
390 |
+
datatype=["str", "str"],
|
391 |
+
col_count=(2, "fixed"),
|
392 |
+
label="Noms des interlocuteurs"
|
393 |
+
)
|
394 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
395 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
396 |
|
|
|
412 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
413 |
mic_audio_duration = gr.Textbox(label="⏱️ Durée de l'enregistrement (mm:ss)")
|
414 |
mic_location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
415 |
+
mic_num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
|
416 |
+
mic_speaker_names = gr.Dataframe(
|
417 |
+
headers=["Speaker ID", "Nom personnalisé"],
|
418 |
+
datatype=["str", "str"],
|
419 |
+
col_count=(2, "fixed"),
|
420 |
+
label="Noms des interlocuteurs"
|
421 |
+
)
|
422 |
mic_speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
423 |
mic_context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
424 |
|
|
|
441 |
with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
|
442 |
yt_audio_duration = gr.Textbox(label="⏱️ Durée de la vidéo (mm:ss)")
|
443 |
yt_channel = gr.Textbox(label="📺 Nom de la chaîne YouTube")
|
444 |
+
yt_num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
|
445 |
+
yt_speaker_names = gr.Dataframe(
|
446 |
+
headers=["Speaker ID", "Nom personnalisé"],
|
447 |
+
datatype=["str", "str"],
|
448 |
+
col_count=(2, "fixed"),
|
449 |
+
label="Noms des interlocuteurs"
|
450 |
+
)
|
451 |
yt_publish_date = gr.Textbox(label="📅 Date de publication")
|
452 |
yt_context = gr.Textbox(label="📝 Contexte de la vidéo")
|
453 |
|
|
|
478 |
|
479 |
# Connexions des boutons aux fonctions appropriées
|
480 |
transcribe_button.click(
|
481 |
+
process_transcription,
|
482 |
+
inputs=[audio_input, task_input],
|
483 |
+
outputs=[progress_display, raw_output, speaker_output, num_speakers, speaker_names]
|
484 |
)
|
485 |
+
|
486 |
format_button.click(
|
487 |
format_to_markdown,
|
488 |
+
inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, num_speakers, speaker_names],
|
489 |
outputs=formatted_output
|
490 |
)
|
491 |
+
|
492 |
mic_transcribe_button.click(
|
493 |
+
process_transcription,
|
494 |
+
inputs=[mic_input, mic_task_input],
|
495 |
+
outputs=[mic_progress_display, mic_raw_output, mic_speaker_output, mic_num_speakers, mic_speaker_names]
|
496 |
)
|
497 |
+
|
498 |
mic_format_button.click(
|
499 |
format_to_markdown,
|
500 |
+
inputs=[mic_raw_output, mic_speaker_output, mic_audio_duration, mic_location, mic_speaker_age, mic_context, mic_num_speakers, mic_speaker_names],
|
501 |
outputs=mic_formatted_output
|
502 |
)
|
503 |
+
|
504 |
yt_transcribe_button.click(
|
505 |
+
process_yt_transcription,
|
506 |
+
inputs=[yt_input, yt_task_input],
|
507 |
+
outputs=[yt_html_output, yt_raw_output, yt_speaker_output, yt_num_speakers, yt_speaker_names]
|
508 |
)
|
509 |
+
|
510 |
yt_format_button.click(
|
511 |
format_to_markdown,
|
512 |
+
inputs=[yt_raw_output, yt_speaker_output, yt_audio_duration, yt_channel, yt_publish_date, yt_context, yt_num_speakers, yt_speaker_names],
|
513 |
outputs=yt_formatted_output
|
514 |
)
|
515 |
+
|
516 |
|
517 |
if __name__ == "__main__":
|
518 |
demo.queue().launch()
|