Spaces:

Woziii
/

scribe

Running

App Files Files Community

Woziii commited on Aug 20, 2024

Commit

7ef4d9e

verified ·

1 Parent(s): b2a20a3

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -25

app.py CHANGED Viewed

@@ -245,31 +245,46 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
     progress(1.0, desc="Terminé!")
     return "Transcription terminée!", transcription_result['text'], speaker_transcription
-def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None):
-    metadata = {
-        "Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
-        "Durée de l'audio": f"{audio_duration} secondes" if audio_duration else "[à remplir]",
-        "Lieu": location if location else "[non spécifié]",
-        "Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
-        "Contexte": context if context else "[non spécifié]"
-    }
-    metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
     try:
-        if isinstance(speaker_transcription, str):
-            speaker_transcription = parse_simplified_diarization(speaker_transcription)
-        if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
-            formatted_transcription = []
-            for speaker, text in speaker_transcription:
-                formatted_transcription.append(f"**{speaker}**: {text}")
-            transcription_text = "\n\n".join(formatted_transcription)
-        else:
-            raise ValueError("Invalid speaker transcription format")
     except Exception as e:
         print(f"Error formatting speaker transcription: {e}")
-        transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + transcription_text
     formatted_output = f"""
 # Transcription Formatée
@@ -282,6 +297,9 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
 """
     return formatted_output
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
@@ -446,6 +464,7 @@ with demo:
                 audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
                 location = gr.Textbox(label="📍 Lieu de l'enregistrement")
                 speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
                 context = gr.Textbox(label="📝 Contexte de l'enregistrement")
             format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
@@ -524,9 +543,9 @@ with demo:
     )
     format_button.click(
-        format_to_markdown,
-        inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context],
-        outputs=formatted_output
     )
     mic_transcribe_button.click(

     progress(1.0, desc="Terminé!")
     return "Transcription terminée!", transcription_result['text'], speaker_transcription
+def format_to_markdown(transcription_result, audio_duration=None, location=None, speaker_age=None, context=None, custom_speaker_names=None):
+    if isinstance(transcription_result, dict):
+        metadata = transcription_result.get("metadata", {})
+        transcription = transcription_result.get("transcription", [])
+    else:
+        metadata = {}
+        transcription = transcription_result
+    speaker_count = metadata.get("speaker_count", "non spécifié")
+    speakers = metadata.get("speakers", [])
+    metadata_text = "\n".join([
+        f"- **Date de traitement** : '{datetime.now().strftime('%d/%m/%Y %H:%M')}'",
+        f"- **Durée de l'audio** : '{audio_duration if audio_duration else '[à remplir]'} secondes'",
+        f"- **Lieu** : '{location if location else '[non spécifié]'}'",
+        f"- **Âge de l'intervenant** : '{f'{speaker_age} ans' if speaker_age else '[non spécifié]'}'",
+        f"- **Contexte** : '{context if context else '[non spécifié]'}'",
+        f"- **Nombre d'interlocuteurs** : '{speaker_count}'",
+        f"- **Interlocuteurs bruts** : '{', '.join(speakers)}'"
+    ])
     try:
+        formatted_transcription = []
+        for segment in transcription:
+            speaker = segment['speaker']
+            text = segment['text']
+            start_time = format_time(segment['start'])
+            end_time = format_time(segment['end'])
+            if custom_speaker_names and speaker in custom_speaker_names:
+                display_speaker = custom_speaker_names[speaker]
+            else:
+                display_speaker = speaker
+            formatted_transcription.append(f"**[{start_time} - {end_time}] {display_speaker}**: {text}")
+        transcription_text = "\n\n".join(formatted_transcription)
     except Exception as e:
         print(f"Error formatting speaker transcription: {e}")
+        transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + str(transcription)
     formatted_output = f"""
 # Transcription Formatée
 """
     return formatted_output
+def format_time(seconds):
+    return f"{int(seconds // 60):02d}:{int(seconds % 60):02d}"
 def _return_yt_html_embed(yt_url):
     video_id = yt_url.split("?v=")[-1]
     HTML_str = (
                 audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
                 location = gr.Textbox(label="📍 Lieu de l'enregistrement")
                 speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
+                custom_speaker_names = gr.TextArea(label="Noms personnalisés des locuteurs (format: SPEAKER_00: Nom1, SPEAKER_01: Nom2)")
                 context = gr.Textbox(label="📝 Contexte de l'enregistrement")
             format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
     )
     format_button.click(
+    format_to_markdown,
+    inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, custom_speaker_names],
+    outputs=formatted_output
     )
     mic_transcribe_button.click(