Woziii commited on
Commit
0538f07
·
verified ·
1 Parent(s): c61d6fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -22
app.py CHANGED
@@ -90,17 +90,26 @@ def parse_simplified_diarization(simplified_text):
90
  matches = re.findall(pattern, simplified_text, re.MULTILINE)
91
  return [(speaker, text.strip()) for speaker, text in matches]
92
 
 
 
 
 
 
93
  def process_transcription(*args):
94
  generator = transcribe_and_diarize(*args)
95
  for progress_message, raw_text, speaker_transcription in generator:
96
  pass # Consommer le générateur jusqu'à la fin
97
  simplified_diarization = simplify_diarization_output(speaker_transcription)
98
- return progress_message, raw_text, simplified_diarization
 
 
99
 
100
  def process_yt_transcription(*args):
101
  html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
102
  simplified_diarization = simplify_diarization_output(speaker_transcription)
103
- return html_embed, raw_text, simplified_diarization
 
 
104
 
105
 
106
  # New functions for progress indicator
@@ -168,15 +177,21 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
168
  progress(1.0, desc="Terminé!")
169
  return "Transcription terminée!", transcription_result['text'], speaker_transcription
170
 
171
- def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None):
172
  metadata = {
173
  "Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
174
  "Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
175
  "Lieu": location if location else "[non spécifié]",
 
176
  "Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
177
  "Contexte": context if context else "[non spécifié]"
178
  }
179
 
 
 
 
 
 
180
  metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
181
 
182
  try:
@@ -186,7 +201,8 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
186
  if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
187
  formatted_transcription = []
188
  for speaker, text in speaker_transcription:
189
- formatted_transcription.append(f"**{speaker}**: {text}")
 
190
  transcription_text = "\n\n".join(formatted_transcription)
191
  else:
192
  raise ValueError("Invalid speaker transcription format")
@@ -368,6 +384,13 @@ with demo:
368
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
369
  audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
370
  location = gr.Textbox(label="📍 Lieu de l'enregistrement")
 
 
 
 
 
 
 
371
  speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
372
  context = gr.Textbox(label="📝 Contexte de l'enregistrement")
373
 
@@ -389,6 +412,13 @@ with demo:
389
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
390
  mic_audio_duration = gr.Textbox(label="⏱️ Durée de l'enregistrement (mm:ss)")
391
  mic_location = gr.Textbox(label="📍 Lieu de l'enregistrement")
 
 
 
 
 
 
 
392
  mic_speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
393
  mic_context = gr.Textbox(label="📝 Contexte de l'enregistrement")
394
 
@@ -411,6 +441,13 @@ with demo:
411
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
412
  yt_audio_duration = gr.Textbox(label="⏱️ Durée de la vidéo (mm:ss)")
413
  yt_channel = gr.Textbox(label="📺 Nom de la chaîne YouTube")
 
 
 
 
 
 
 
414
  yt_publish_date = gr.Textbox(label="📅 Date de publication")
415
  yt_context = gr.Textbox(label="📝 Contexte de la vidéo")
416
 
@@ -441,41 +478,41 @@ with demo:
441
 
442
  # Connexions des boutons aux fonctions appropriées
443
  transcribe_button.click(
444
- process_transcription,
445
- inputs=[audio_input, task_input],
446
- outputs=[progress_display, raw_output, speaker_output]
447
  )
448
-
449
  format_button.click(
450
  format_to_markdown,
451
- inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context],
452
  outputs=formatted_output
453
  )
454
-
455
  mic_transcribe_button.click(
456
- process_transcription,
457
- inputs=[mic_input, mic_task_input],
458
- outputs=[mic_progress_display, mic_raw_output, mic_speaker_output]
459
  )
460
-
461
  mic_format_button.click(
462
  format_to_markdown,
463
- inputs=[mic_raw_output, mic_speaker_output, audio_duration, location, speaker_age, context],
464
  outputs=mic_formatted_output
465
  )
466
-
467
  yt_transcribe_button.click(
468
- process_yt_transcription,
469
- inputs=[yt_input, yt_task_input],
470
- outputs=[yt_html_output, yt_raw_output, yt_speaker_output]
471
  )
472
-
473
  yt_format_button.click(
474
  format_to_markdown,
475
- inputs=[yt_raw_output, yt_speaker_output, audio_duration, location, speaker_age, context],
476
  outputs=yt_formatted_output
477
  )
478
-
479
 
480
  if __name__ == "__main__":
481
  demo.queue().launch()
 
90
  matches = re.findall(pattern, simplified_text, re.MULTILINE)
91
  return [(speaker, text.strip()) for speaker, text in matches]
92
 
93
+ def count_unique_speakers(speaker_transcription):
94
+ if isinstance(speaker_transcription, str):
95
+ speaker_transcription = parse_simplified_diarization(speaker_transcription)
96
+ return len(set(speaker for speaker, _ in speaker_transcription))
97
+
98
  def process_transcription(*args):
99
  generator = transcribe_and_diarize(*args)
100
  for progress_message, raw_text, speaker_transcription in generator:
101
  pass # Consommer le générateur jusqu'à la fin
102
  simplified_diarization = simplify_diarization_output(speaker_transcription)
103
+ num_speakers = count_unique_speakers(speaker_transcription)
104
+ speaker_names = [{"Speaker ID": f"SPEAKER_{i:02d}", "Nom personnalisé": f"SPEAKER_{i:02d}"} for i in range(num_speakers)]
105
+ return progress_message, raw_text, simplified_diarization, num_speakers, speaker_names
106
 
107
  def process_yt_transcription(*args):
108
  html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
109
  simplified_diarization = simplify_diarization_output(speaker_transcription)
110
+ num_speakers = count_unique_speakers(speaker_transcription)
111
+ speaker_names = [{"Speaker ID": f"SPEAKER_{i:02d}", "Nom personnalisé": f"SPEAKER_{i:02d}"} for i in range(num_speakers)]
112
+ return html_embed, raw_text, simplified_diarization, num_speakers, speaker_names
113
 
114
 
115
  # New functions for progress indicator
 
177
  progress(1.0, desc="Terminé!")
178
  return "Transcription terminée!", transcription_result['text'], speaker_transcription
179
 
180
+ def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None, num_speakers=None, speaker_names=None):
181
  metadata = {
182
  "Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
183
  "Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
184
  "Lieu": location if location else "[non spécifié]",
185
+ "Nombre d'interlocuteurs": num_speakers if num_speakers else "[non spécifié]",
186
  "Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
187
  "Contexte": context if context else "[non spécifié]"
188
  }
189
 
190
+ # Ajoutez les noms personnalisés des interlocuteurs aux métadonnées
191
+ if speaker_names is not None:
192
+ for row in speaker_names:
193
+ metadata[row["Speaker ID"]] = row["Nom personnalisé"]
194
+
195
  metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
196
 
197
  try:
 
201
  if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
202
  formatted_transcription = []
203
  for speaker, text in speaker_transcription:
204
+ custom_name = next((row["Nom personnalisé"] for row in speaker_names if row["Speaker ID"] == speaker), speaker)
205
+ formatted_transcription.append(f"**{custom_name}**: {text}")
206
  transcription_text = "\n\n".join(formatted_transcription)
207
  else:
208
  raise ValueError("Invalid speaker transcription format")
 
384
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
385
  audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
386
  location = gr.Textbox(label="📍 Lieu de l'enregistrement")
387
+ num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
388
+ speaker_names = gr.Dataframe(
389
+ headers=["Speaker ID", "Nom personnalisé"],
390
+ datatype=["str", "str"],
391
+ col_count=(2, "fixed"),
392
+ label="Noms des interlocuteurs"
393
+ )
394
  speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
395
  context = gr.Textbox(label="📝 Contexte de l'enregistrement")
396
 
 
412
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
413
  mic_audio_duration = gr.Textbox(label="⏱️ Durée de l'enregistrement (mm:ss)")
414
  mic_location = gr.Textbox(label="📍 Lieu de l'enregistrement")
415
+ mic_num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
416
+ mic_speaker_names = gr.Dataframe(
417
+ headers=["Speaker ID", "Nom personnalisé"],
418
+ datatype=["str", "str"],
419
+ col_count=(2, "fixed"),
420
+ label="Noms des interlocuteurs"
421
+ )
422
  mic_speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
423
  mic_context = gr.Textbox(label="📝 Contexte de l'enregistrement")
424
 
 
441
  with gr.Accordion("Métadonnées (optionnel) 📌", open=False):
442
  yt_audio_duration = gr.Textbox(label="⏱️ Durée de la vidéo (mm:ss)")
443
  yt_channel = gr.Textbox(label="📺 Nom de la chaîne YouTube")
444
+ yt_num_speakers = gr.Number(label="Nombre d'interlocuteurs", interactive=False)
445
+ yt_speaker_names = gr.Dataframe(
446
+ headers=["Speaker ID", "Nom personnalisé"],
447
+ datatype=["str", "str"],
448
+ col_count=(2, "fixed"),
449
+ label="Noms des interlocuteurs"
450
+ )
451
  yt_publish_date = gr.Textbox(label="📅 Date de publication")
452
  yt_context = gr.Textbox(label="📝 Contexte de la vidéo")
453
 
 
478
 
479
  # Connexions des boutons aux fonctions appropriées
480
  transcribe_button.click(
481
+ process_transcription,
482
+ inputs=[audio_input, task_input],
483
+ outputs=[progress_display, raw_output, speaker_output, num_speakers, speaker_names]
484
  )
485
+
486
  format_button.click(
487
  format_to_markdown,
488
+ inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, num_speakers, speaker_names],
489
  outputs=formatted_output
490
  )
491
+
492
  mic_transcribe_button.click(
493
+ process_transcription,
494
+ inputs=[mic_input, mic_task_input],
495
+ outputs=[mic_progress_display, mic_raw_output, mic_speaker_output, mic_num_speakers, mic_speaker_names]
496
  )
497
+
498
  mic_format_button.click(
499
  format_to_markdown,
500
+ inputs=[mic_raw_output, mic_speaker_output, mic_audio_duration, mic_location, mic_speaker_age, mic_context, mic_num_speakers, mic_speaker_names],
501
  outputs=mic_formatted_output
502
  )
503
+
504
  yt_transcribe_button.click(
505
+ process_yt_transcription,
506
+ inputs=[yt_input, yt_task_input],
507
+ outputs=[yt_html_output, yt_raw_output, yt_speaker_output, yt_num_speakers, yt_speaker_names]
508
  )
509
+
510
  yt_format_button.click(
511
  format_to_markdown,
512
+ inputs=[yt_raw_output, yt_speaker_output, yt_audio_duration, yt_channel, yt_publish_date, yt_context, yt_num_speakers, yt_speaker_names],
513
  outputs=yt_formatted_output
514
  )
515
+
516
 
517
  if __name__ == "__main__":
518
  demo.queue().launch()