Update app.py
Browse files
app.py
CHANGED
@@ -245,31 +245,46 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
|
245 |
progress(1.0, desc="Terminé!")
|
246 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
247 |
|
248 |
-
def format_to_markdown(
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
259 |
try:
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
|
|
|
|
|
|
|
|
|
|
270 |
except Exception as e:
|
271 |
print(f"Error formatting speaker transcription: {e}")
|
272 |
-
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" +
|
273 |
|
274 |
formatted_output = f"""
|
275 |
# Transcription Formatée
|
@@ -282,6 +297,9 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
282 |
"""
|
283 |
return formatted_output
|
284 |
|
|
|
|
|
|
|
285 |
def _return_yt_html_embed(yt_url):
|
286 |
video_id = yt_url.split("?v=")[-1]
|
287 |
HTML_str = (
|
@@ -446,6 +464,7 @@ with demo:
|
|
446 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
447 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
448 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
|
|
449 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
450 |
|
451 |
format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
|
@@ -524,9 +543,9 @@ with demo:
|
|
524 |
)
|
525 |
|
526 |
format_button.click(
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
)
|
531 |
|
532 |
mic_transcribe_button.click(
|
|
|
245 |
progress(1.0, desc="Terminé!")
|
246 |
return "Transcription terminée!", transcription_result['text'], speaker_transcription
|
247 |
|
248 |
+
def format_to_markdown(transcription_result, audio_duration=None, location=None, speaker_age=None, context=None, custom_speaker_names=None):
|
249 |
+
if isinstance(transcription_result, dict):
|
250 |
+
metadata = transcription_result.get("metadata", {})
|
251 |
+
transcription = transcription_result.get("transcription", [])
|
252 |
+
else:
|
253 |
+
metadata = {}
|
254 |
+
transcription = transcription_result
|
255 |
+
|
256 |
+
speaker_count = metadata.get("speaker_count", "non spécifié")
|
257 |
+
speakers = metadata.get("speakers", [])
|
258 |
+
|
259 |
+
metadata_text = "\n".join([
|
260 |
+
f"- **Date de traitement** : '{datetime.now().strftime('%d/%m/%Y %H:%M')}'",
|
261 |
+
f"- **Durée de l'audio** : '{audio_duration if audio_duration else '[à remplir]'} secondes'",
|
262 |
+
f"- **Lieu** : '{location if location else '[non spécifié]'}'",
|
263 |
+
f"- **Âge de l'intervenant** : '{f'{speaker_age} ans' if speaker_age else '[non spécifié]'}'",
|
264 |
+
f"- **Contexte** : '{context if context else '[non spécifié]'}'",
|
265 |
+
f"- **Nombre d'interlocuteurs** : '{speaker_count}'",
|
266 |
+
f"- **Interlocuteurs bruts** : '{', '.join(speakers)}'"
|
267 |
+
])
|
268 |
+
|
269 |
try:
|
270 |
+
formatted_transcription = []
|
271 |
+
for segment in transcription:
|
272 |
+
speaker = segment['speaker']
|
273 |
+
text = segment['text']
|
274 |
+
start_time = format_time(segment['start'])
|
275 |
+
end_time = format_time(segment['end'])
|
276 |
+
|
277 |
+
if custom_speaker_names and speaker in custom_speaker_names:
|
278 |
+
display_speaker = custom_speaker_names[speaker]
|
279 |
+
else:
|
280 |
+
display_speaker = speaker
|
281 |
+
|
282 |
+
formatted_transcription.append(f"**[{start_time} - {end_time}] {display_speaker}**: {text}")
|
283 |
+
|
284 |
+
transcription_text = "\n\n".join(formatted_transcription)
|
285 |
except Exception as e:
|
286 |
print(f"Error formatting speaker transcription: {e}")
|
287 |
+
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + str(transcription)
|
288 |
|
289 |
formatted_output = f"""
|
290 |
# Transcription Formatée
|
|
|
297 |
"""
|
298 |
return formatted_output
|
299 |
|
300 |
+
def format_time(seconds):
|
301 |
+
return f"{int(seconds // 60):02d}:{int(seconds % 60):02d}"
|
302 |
+
|
303 |
def _return_yt_html_embed(yt_url):
|
304 |
video_id = yt_url.split("?v=")[-1]
|
305 |
HTML_str = (
|
|
|
464 |
audio_duration = gr.Textbox(label="⏱️ Durée de l'audio (mm:ss)")
|
465 |
location = gr.Textbox(label="📍 Lieu de l'enregistrement")
|
466 |
speaker_age = gr.Number(label="👤 Âge de l'intervenant principal")
|
467 |
+
custom_speaker_names = gr.TextArea(label="Noms personnalisés des locuteurs (format: SPEAKER_00: Nom1, SPEAKER_01: Nom2)")
|
468 |
context = gr.Textbox(label="📝 Contexte de l'enregistrement")
|
469 |
|
470 |
format_button = gr.Button("✨ Générer la transcription formatée", elem_classes="button-secondary")
|
|
|
543 |
)
|
544 |
|
545 |
format_button.click(
|
546 |
+
format_to_markdown,
|
547 |
+
inputs=[raw_output, speaker_output, audio_duration, location, speaker_age, context, custom_speaker_names],
|
548 |
+
outputs=formatted_output
|
549 |
)
|
550 |
|
551 |
mic_transcribe_button.click(
|