Update app.py
Browse files
app.py
CHANGED
@@ -101,16 +101,13 @@ def process_transcription(*args):
|
|
101 |
pass # Consommer le générateur jusqu'à la fin
|
102 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
103 |
num_speakers = count_unique_speakers(speaker_transcription)
|
104 |
-
# Modifier cette ligne pour créer une liste de listes au lieu d'une liste de dictionnaires
|
105 |
speaker_names = [[f"SPEAKER_{i:02d}", f"SPEAKER_{i:02d}"] for i in range(num_speakers)]
|
106 |
return progress_message, raw_text, simplified_diarization, num_speakers, speaker_names
|
107 |
|
108 |
-
# Faire la même modification dans process_yt_transcription
|
109 |
def process_yt_transcription(*args):
|
110 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
111 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
112 |
num_speakers = count_unique_speakers(speaker_transcription)
|
113 |
-
# Modifier cette ligne de la même manière
|
114 |
speaker_names = [[f"SPEAKER_{i:02d}", f"SPEAKER_{i:02d}"] for i in range(num_speakers)]
|
115 |
return html_embed, raw_text, simplified_diarization, num_speakers, speaker_names
|
116 |
|
@@ -190,10 +187,14 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
190 |
"Contexte": context if context else "[non spécifié]"
|
191 |
}
|
192 |
|
193 |
-
#
|
194 |
if speaker_names is not None:
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
197 |
|
198 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
199 |
|
@@ -204,7 +205,7 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
204 |
if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
|
205 |
formatted_transcription = []
|
206 |
for speaker, text in speaker_transcription:
|
207 |
-
custom_name = next((
|
208 |
formatted_transcription.append(f"**{custom_name}**: {text}")
|
209 |
transcription_text = "\n\n".join(formatted_transcription)
|
210 |
else:
|
@@ -223,7 +224,7 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
223 |
{transcription_text}
|
224 |
"""
|
225 |
return formatted_output
|
226 |
-
|
227 |
def _return_yt_html_embed(yt_url):
|
228 |
video_id = yt_url.split("?v=")[-1]
|
229 |
HTML_str = (
|
|
|
101 |
pass # Consommer le générateur jusqu'à la fin
|
102 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
103 |
num_speakers = count_unique_speakers(speaker_transcription)
|
|
|
104 |
speaker_names = [[f"SPEAKER_{i:02d}", f"SPEAKER_{i:02d}"] for i in range(num_speakers)]
|
105 |
return progress_message, raw_text, simplified_diarization, num_speakers, speaker_names
|
106 |
|
|
|
107 |
def process_yt_transcription(*args):
|
108 |
html_embed, raw_text, speaker_transcription = yt_transcribe(*args)
|
109 |
simplified_diarization = simplify_diarization_output(speaker_transcription)
|
110 |
num_speakers = count_unique_speakers(speaker_transcription)
|
|
|
111 |
speaker_names = [[f"SPEAKER_{i:02d}", f"SPEAKER_{i:02d}"] for i in range(num_speakers)]
|
112 |
return html_embed, raw_text, simplified_diarization, num_speakers, speaker_names
|
113 |
|
|
|
187 |
"Contexte": context if context else "[non spécifié]"
|
188 |
}
|
189 |
|
190 |
+
# Gestion plus robuste de speaker_names
|
191 |
if speaker_names is not None:
|
192 |
+
if isinstance(speaker_names, list):
|
193 |
+
for item in speaker_names:
|
194 |
+
if isinstance(item, (list, tuple)) and len(item) == 2:
|
195 |
+
metadata[item[0]] = item[1]
|
196 |
+
elif isinstance(item, dict) and "Speaker ID" in item and "Nom personnalisé" in item:
|
197 |
+
metadata[item["Speaker ID"]] = item["Nom personnalisé"]
|
198 |
|
199 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
200 |
|
|
|
205 |
if isinstance(speaker_transcription, list) and all(isinstance(item, tuple) and len(item) == 2 for item in speaker_transcription):
|
206 |
formatted_transcription = []
|
207 |
for speaker, text in speaker_transcription:
|
208 |
+
custom_name = next((item[1] for item in speaker_names if item[0] == speaker), speaker) if isinstance(speaker_names, list) else speaker
|
209 |
formatted_transcription.append(f"**{custom_name}**: {text}")
|
210 |
transcription_text = "\n\n".join(formatted_transcription)
|
211 |
else:
|
|
|
224 |
{transcription_text}
|
225 |
"""
|
226 |
return formatted_output
|
227 |
+
|
228 |
def _return_yt_html_embed(yt_url):
|
229 |
video_id = yt_url.split("?v=")[-1]
|
230 |
HTML_str = (
|