Spaces:

rafaaa2105
/

speaker_diarization

Runtime error

App Files Files Community

speaker_diarization / app.py

rafaaa2105

Update app.py

52d4965 verified over 1 year ago

raw

history blame contribute delete

2.91 kB

	import gradio as gr
	import zipfile
	import os
	import shutil
	from pyannote.audio import Pipeline
	import torch

	# Set up the directory for processing
	TEMP_DIR = "temp_audio"
	os.makedirs(TEMP_DIR, exist_ok=True)

	# Initialize the pyannote.audio pipeline
	pipeline = Pipeline.from_pretrained(
	"pyannote/speaker-diarization-3.1",
	use_auth_token=os.getenv("HF_TOKEN")
	)

	# Move pipeline to GPU if available
	if torch.cuda.is_available():
	pipeline.to(torch.device("cuda"))

	def process_audio_zip(file_info):
	# Unzip the uploaded file
	with zipfile.ZipFile(file_info, 'r') as zip_ref:
	zip_ref.extractall(TEMP_DIR)

	speaker1_dir = os.path.join(TEMP_DIR, "speaker1")
	speaker2_dir = os.path.join(TEMP_DIR, "speaker2")
	os.makedirs(speaker1_dir, exist_ok=True)
	os.makedirs(speaker2_dir, exist_ok=True)

	# Process each audio file in the temporary directory
	for filename in os.listdir(TEMP_DIR):
	if filename.endswith(".wav"):
	file_path = os.path.join(TEMP_DIR, filename)

	# Run the diarization pipeline
	diarization = pipeline(file_path)

	# Determine if the audio is mostly from speaker1 or speaker2
	total_duration = {1: 0.0, 2: 0.0}
	for turn, _, speaker in diarization.itertracks(yield_label=True):
	total_duration[speaker] += turn.duration

	# Move file to the corresponding speaker directory
	dominant_speaker = 1 if total_duration[1] >= total_duration[2] else 2
	if dominant_speaker == 1:
	shutil.move(file_path, os.path.join(speaker1_dir, filename))
	else:
	shutil.move(file_path, os.path.join(speaker2_dir, filename))

	# Zip the results
	speaker1_zip = "speaker1.zip"
	speaker2_zip = "speaker2.zip"

	def zipdir(path, ziph):
	# Zip the directories
	for root, dirs, files in os.walk(path):
	for file in files:
	ziph.write(os.path.join(root, file),
	os.path.relpath(os.path.join(root, file),
	os.path.join(path, '..')))

	with zipfile.ZipFile(speaker1_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
	zipdir(speaker1_dir, zipf)

	with zipfile.ZipFile(speaker2_zip, 'w', zipfile.ZIP_DEFLATED) as zipf:
	zipdir(speaker2_dir, zipf)

	# Clean up the temporary directory
	shutil.rmtree(TEMP_DIR)

	return speaker1_zip, speaker2_zip

	# Gradio interface
	iface = gr.Interface(
	fn=process_audio_zip,
	inputs=gr.File(type="filepath"),
	outputs=[
	gr.File(label="Speaker 1 Audio"),
	gr.File(label="Speaker 2 Audio")
	],
	title="Speaker Diarization",
	description="Upload a ZIP file containing audio files, and this will return two ZIP files containing diarized audio for each speaker."
	)

	iface.launch()