Woziii commited on
Commit
7ef4d9e
·
verified ·
1 Parent(s): b2a20a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -25
app.py CHANGED
@@ -245,31 +245,46 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
245
  progress(1.0, desc="Terminé!")
246
  return "Transcription terminée!", transcription_result['text'], speaker_transcription
247
 
248
- def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None):
249
- metadata = {
250
- "Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
251
- "Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
252
- "Lieu": location if location else "[non spécifié]",
253
- "Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
254
- "Contexte": context if context else "[non spécifié]"
255
- }
256
-
257
- metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
258
-
 
 
 
 
 
 
 
 
 
 
259
  try:
260
- if isinstance(speaker_transcription, str):
261
- speaker_transcription = parse_simplified_diarization(speaker_transcription)
262
-
263
- if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
264
- formatted_transcription = []
265
- for speaker, text in speaker_transcription:
266
- formatted_transcription.append(f"**{speaker}**: {text}")
267
- transcription_text = "\n\n".join(formatted_transcription)
268
- else:
269
- raise ValueError("Invalid speaker transcription format")
 
 
 
 
 
270
  except Exception as e:
271
  print(f"Error formatting speaker transcription: {e}")
272
- transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + transcription_text
273
 
274
  formatted_output = f"""
275
  # Transcription Formatée
@@ -282,6 +297,9 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
282
  """
283
  return formatted_output
284
 
 
 
 
285
  def _return_yt_html_embed(yt_url):
286
  video_id = yt_url.split("?v=")[-1]
287
  HTML_str = (
@@ -446,6 +464,7 @@ with demo:
446
  audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
447
  location = gr.Textbox(label="📍 Lieu de l'enregistrement")
448
  speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
 
449
  context = gr.Textbox(label="📝 Contexte de l'enregistrement")
450
 
451
  format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
@@ -524,9 +543,9 @@ with demo:
524
  )
525
 
526
  format_button.click(
527
- format_to_markdown,
528
- inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context],
529
- outputs=formatted_output
530
  )
531
 
532
  mic_transcribe_button.click(
 
245
  progress(1.0, desc="Terminé!")
246
  return "Transcription terminée!", transcription_result['text'], speaker_transcription
247
 
248
+ def format_to_markdown(transcription_result, audio_duration=None, location=None, speaker_age=None, context=None, custom_speaker_names=None):
249
+ if isinstance(transcription_result, dict):
250
+ metadata = transcription_result.get("metadata", {})
251
+ transcription = transcription_result.get("transcription", [])
252
+ else:
253
+ metadata = {}
254
+ transcription = transcription_result
255
+
256
+ speaker_count = metadata.get("speaker_count", "non spécifié")
257
+ speakers = metadata.get("speakers", [])
258
+
259
+ metadata_text = "\n".join([
260
+ f"- **Date de traitement** : '{datetime.now().strftime('%d/%m/%Y %H:%M')}'",
261
+ f"- **Durée de l'audio** : '{audio_duration if audio_duration else '[à remplir]'} secondes'",
262
+ f"- **Lieu** : '{location if location else '[non spécifié]'}'",
263
+ f"- **Âge de l'intervenant** : '{f'{speaker_age} ans' if speaker_age else '[non spécifié]'}'",
264
+ f"- **Contexte** : '{context if context else '[non spécifié]'}'",
265
+ f"- **Nombre d'interlocuteurs** : '{speaker_count}'",
266
+ f"- **Interlocuteurs bruts** : '{', '.join(speakers)}'"
267
+ ])
268
+
269
  try:
270
+ formatted_transcription = []
271
+ for segment in transcription:
272
+ speaker = segment['speaker']
273
+ text = segment['text']
274
+ start_time = format_time(segment['start'])
275
+ end_time = format_time(segment['end'])
276
+
277
+ if custom_speaker_names and speaker in custom_speaker_names:
278
+ display_speaker = custom_speaker_names[speaker]
279
+ else:
280
+ display_speaker = speaker
281
+
282
+ formatted_transcription.append(f"**[{start_time} - {end_time}] {display_speaker}**: {text}")
283
+
284
+ transcription_text = "\n\n".join(formatted_transcription)
285
  except Exception as e:
286
  print(f"Error formatting speaker transcription: {e}")
287
+ transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + str(transcription)
288
 
289
  formatted_output = f"""
290
  # Transcription Formatée
 
297
  """
298
  return formatted_output
299
 
300
+ def format_time(seconds):
301
+ return f"{int(seconds // 60):02d}:{int(seconds % 60):02d}"
302
+
303
  def _return_yt_html_embed(yt_url):
304
  video_id = yt_url.split("?v=")[-1]
305
  HTML_str = (
 
464
  audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
465
  location = gr.Textbox(label="📍 Lieu de l'enregistrement")
466
  speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
467
+ custom_speaker_names = gr.TextArea(label="Noms personnalisés des locuteurs (format: SPEAKER_00: Nom1, SPEAKER_01: Nom2)")
468
  context = gr.Textbox(label="📝 Contexte de l'enregistrement")
469
 
470
  format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
 
543
  )
544
 
545
  format_button.click(
546
+ format_to_markdown,
547
+ inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, custom_speaker_names],
548
+ outputs=formatted_output
549
  )
550
 
551
  mic_transcribe_button.click(