Update app.py
Browse files
app.py
CHANGED
@@ -180,21 +180,24 @@ def transcribe_and_diarize(file_path, task, progress=gr.Progress()):
|
|
180 |
def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None, num_speakers=None, speaker_names=None):
|
181 |
metadata = {
|
182 |
"Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
|
183 |
-
"Durée de l'audio": f"{audio_duration}
|
184 |
"Lieu": location if location else "[non spécifié]",
|
185 |
"Nombre d'interlocuteurs": num_speakers if num_speakers else "[non spécifié]",
|
186 |
"Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
|
187 |
"Contexte": context if context else "[non spécifié]"
|
188 |
}
|
189 |
|
190 |
-
#
|
|
|
191 |
if speaker_names is not None:
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
198 |
|
199 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
200 |
|
@@ -202,14 +205,11 @@ def format_to_markdown(transcription_text, speaker_transcription, audio_duration
|
|
202 |
if isinstance(speaker_transcription, str):
|
203 |
speaker_transcription = parse_simplified_diarization(speaker_transcription)
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
transcription_text = "\n\n".join(formatted_transcription)
|
211 |
-
else:
|
212 |
-
raise ValueError("Invalid speaker transcription format")
|
213 |
except Exception as e:
|
214 |
print(f"Error formatting speaker transcription: {e}")
|
215 |
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + transcription_text
|
|
|
180 |
def format_to_markdown(transcription_text, speaker_transcription, audio_duration=None, location=None, speaker_age=None, context=None, num_speakers=None, speaker_names=None):
|
181 |
metadata = {
|
182 |
"Date de traitement": datetime.now().strftime('%d/%m/%Y %H:%M'),
|
183 |
+
"Durée de l'audio": f"{audio_duration}" if audio_duration else "[à remplir]",
|
184 |
"Lieu": location if location else "[non spécifié]",
|
185 |
"Nombre d'interlocuteurs": num_speakers if num_speakers else "[non spécifié]",
|
186 |
"Âge de l'intervenant": f"{speaker_age} ans" if speaker_age else "[non spécifié]",
|
187 |
"Contexte": context if context else "[non spécifié]"
|
188 |
}
|
189 |
|
190 |
+
# Créer un dictionnaire pour les noms personnalisés
|
191 |
+
custom_names = {}
|
192 |
if speaker_names is not None:
|
193 |
+
for row in speaker_names:
|
194 |
+
if isinstance(row, dict):
|
195 |
+
speaker_id = row.get("Speaker ID")
|
196 |
+
custom_name = row.get("Nom personnalisé")
|
197 |
+
if speaker_id and custom_name:
|
198 |
+
custom_names[speaker_id] = custom_name
|
199 |
+
elif isinstance(row, (list, tuple)) and len(row) == 2:
|
200 |
+
custom_names[row[0]] = row[1]
|
201 |
|
202 |
metadata_text = "\n".join([f"- **{key}** : '{value}'" for key, value in metadata.items()])
|
203 |
|
|
|
205 |
if isinstance(speaker_transcription, str):
|
206 |
speaker_transcription = parse_simplified_diarization(speaker_transcription)
|
207 |
|
208 |
+
formatted_transcription = []
|
209 |
+
for speaker, text in speaker_transcription:
|
210 |
+
custom_name = custom_names.get(speaker, speaker)
|
211 |
+
formatted_transcription.append(f"**{custom_name}**: {text}")
|
212 |
+
transcription_text = "\n\n".join(formatted_transcription)
|
|
|
|
|
|
|
213 |
except Exception as e:
|
214 |
print(f"Error formatting speaker transcription: {e}")
|
215 |
transcription_text = "Error formatting speaker transcription. Using raw transcription instead.\n\n" + transcription_text
|