Spaces:

Delik
/

pyannote-speaker-diarization-3.1

Running on Zero

App Files Files Community

poiqazwsx commited on May 6, 2024

Commit

9ecee17

verified ·

1 Parent(s): a599ac3

Update app.py

Browse files

Added labelled audio.txt that can be used on a daw like audacity
![image.png](https://cdn-uploads.huggingface.co/production/uploads/64dcc1db4e2c7863c308bd88/hbbYVo2pFiXibfVhtzYoO.png)

Files changed (1) hide show

app.py +52 -9

app.py CHANGED Viewed

@@ -55,6 +55,49 @@ def diarize_audio(temp_file, num_speakers, min_speakers, max_speakers):
     # Return the diarization output
     return str(diarization)
 with gr.Blocks() as demo:
     gr.Markdown("""
@@ -69,18 +112,18 @@ with gr.Blocks() as demo:
     If you find this space helpful, please ❤ it.
     """)
-    audio_input = gr.Audio(type="filepath", label="Upload Audio")
-    num_speakers_input = gr.Number(label="Number of Speakers (The maximum number of speakers to detect)", value=0)
-    min_speakers_input = gr.Number(label="Minimum Number of Speakers (The maximum number of speakers to detect)", value=0)
-    max_speakers_input = gr.Number(label="Maximum Number of Speakers (The maximum number of speakers to detect)", value=0)
     process_button = gr.Button("Process")
     diarization_output = gr.Textbox(label="Diarization Output")
     process_button.click(
-        fn=lambda audio, num_speakers, min_speakers, max_speakers:
-            diarize_audio(save_audio(audio), num_speakers, min_speakers, max_speakers),
         inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input],
-        outputs=diarization_output
-    )
 demo.launch()

     # Return the diarization output
     return str(diarization)
+def timestamp_to_seconds(timestamp):
+    try:
+        # Extracts hour, minute, and second from timestamp and converts to total seconds
+        h, m, s = map(float, timestamp.split(':'))
+        return 3600 * h + 60 * m + s
+    except ValueError as e:
+        print(f"Error converting timestamp to seconds: '{timestamp}'. Error: {e}")
+        return None
+def generate_labels_from_diarization(diarization_output):
+    successful_lines = 0  # Counter for successfully processed lines
+    labels_path = 'labels.txt'
+    try:
+        with open(labels_path, 'w') as outfile:
+            lines = diarization_output.strip().split('\n')
+            for line in lines:
+                try:
+                    parts = line.strip()[1:-1].split(' --> ')
+                    start_time = parts[0].strip()
+                    end_time = parts[1].split(']')[0].strip()
+                    label = line.split()[-1].strip()  # Extracting the last word as label
+                    start_seconds = timestamp_to_seconds(start_time)
+                    end_seconds = timestamp_to_seconds(end_time)
+                    outfile.write(f"{start_seconds}\t{end_seconds}\t{label}\n")
+                    successful_lines += 1
+                except Exception as e:
+                    print(f"Error processing line: '{line.strip()}'. Error: {e}")
+        print(f"Processed {successful_lines} lines successfully.")
+        return labels_path if successful_lines > 0 else None
+    except Exception as e:
+        print(f"Cannot write to file '{labels_path}'. Error: {e}")
+        return None
+def process_audio(audio, num_speakers, min_speakers, max_speakers):
+    diarization_result = diarize_audio(save_audio(audio), num_speakers, min_speakers, max_speakers)
+    if diarization_result.startswith("Error"):
+        return diarization_result, None  # Return None for label file link if there's an error
+    else:
+        label_file = generate_labels_from_diarization(diarization_result)
+        return diarization_result, label_file
 with gr.Blocks() as demo:
     gr.Markdown("""
     If you find this space helpful, please ❤ it.
     """)
+    audio_input = gr.Audio(type="filepath", label="Upload Audio")
+    num_speakers_input = gr.Number(label="Number of Speakers", value=0)
+    min_speakers_input = gr.Number(label="Minimum Number of Speakers", value=0)
+    max_speakers_input = gr.Number(label="Maximum Number of Speakers", value=0)
     process_button = gr.Button("Process")
     diarization_output = gr.Textbox(label="Diarization Output")
+    label_file_link = gr.File(label="Download Audacity Labels")
+    # Use a lambda function to preprocess the inputs or to directly pass them
     process_button.click(
+        fn=process_audio,
         inputs=[audio_input, num_speakers_input, min_speakers_input, max_speakers_input],
+        outputs=[diarization_output, label_file_link]
+)
 demo.launch()