Spaces:

jcudit
/

voice-tools

Running on Zero

App Files Files Community

voice-tools / src /cli /denoise.py

jcudit HF Staff

feat: implement cross-mode robustness fixes (phases 1-8)

95e1515 about 1 month ago

raw

history blame contribute delete

9.76 kB

	"""
	CLI command for voice denoising.

	Removes silence and background noise from audio files.
	"""

	import json
	import logging
	import sys
	from pathlib import Path
	from typing import Optional

	import click
	from rich.console import Console
	from rich.progress import BarColumn, Progress, SpinnerColumn, TextColumn, TimeRemainingColumn

	from src.lib.audio_io import AudioIOError, write_audio
	from src.services.voice_denoising import VoiceDenoisingService

	logger = logging.getLogger(__name__)
	console = Console()


	@click.command()
	@click.argument("input_file", type=click.Path(exists=True, path_type=Path))
	@click.option(
	"--output",
	"-o",
	type=click.Path(path_type=Path),
	help="Output file path (default: input_denoised.m4a)",
	)
	@click.option(
	"--vad-threshold",
	type=click.FloatRange(0.0, 1.0),
	default=0.5,
	help="Voice activity detection threshold (0.0-1.0, default: 0.5). Higher = more aggressive.",
	)
	@click.option(
	"--silence-threshold",
	type=float,
	default=1.5,
	help="Maximum silence duration to remove in seconds (default: 1.5).",
	)
	@click.option(
	"--min-duration",
	type=float,
	default=0.5,
	help="Minimum voice segment duration in seconds (default: 0.5).",
	)
	@click.option(
	"--crossfade",
	type=int,
	default=75,
	help="Crossfade duration between segments in milliseconds (default: 75).",
	)
	@click.option(
	"--silence",
	type=int,
	default=150,
	help="Silence duration between segments in milliseconds (default: 150).",
	)
	@click.option(
	"--output-format",
	type=click.Choice(["m4a", "wav"], case_sensitive=False),
	default="m4a",
	help="Output audio format (default: m4a).",
	)
	@click.option(
	"--report",
	type=click.Path(path_type=Path),
	help="Path to save denoising report JSON (default: same directory as output).",
	)
	def denoise(
	input_file: Path,
	output: Optional[Path],
	vad_threshold: float,
	silence_threshold: float,
	min_duration: float,
	crossfade: int,
	silence: int,
	output_format: str,
	report: Optional[Path],
	):
	"""
	Remove silence and background noise from audio file.

	This command uses voice activity detection (VAD) to identify voice segments,
	removes background noise, and concatenates the voice segments with smooth
	transitions.

	Examples:

	# Basic denoising with defaults
	voice-tools denoise noisy_audio.m4a

	# Custom output path
	voice-tools denoise noisy_audio.m4a --output clean_audio.m4a

	# Aggressive noise removal
	voice-tools denoise noisy_audio.m4a --vad-threshold 0.7 --silence-threshold 1.0

	# Keep more audio (less aggressive)
	voice-tools denoise noisy_audio.m4a --vad-threshold 0.3 --silence-threshold 3.0
	"""
	console.print("\n[bold cyan]Voice Tools - Voice Denoising[/bold cyan]\n")

	# Validate input file
	if not input_file.exists():
	console.print(f"[red]Error: Input file not found: {input_file}[/red]", file=sys.stderr)
	sys.exit(1)

	# Determine output path
	if output is None:
	output = input_file.parent / f"{input_file.stem}_denoised.{output_format}"
	else:
	# Ensure output has correct extension
	if output.suffix.lower().lstrip(".") != output_format.lower():
	output = output.with_suffix(f".{output_format}")

	# Determine report path
	if report is None:
	report = output.parent / "denoising_report.json"

	try:
	# Initialize service
	with Progress(
	SpinnerColumn(),
	TextColumn("[progress.description]{task.description}"),
	transient=True,
	console=console,
	) as progress:
	progress.add_task(description="Initializing voice denoising models...", total=None)

	try:
	service = VoiceDenoisingService(vad_threshold=vad_threshold)
	except Exception as e:
	console.print(
	f"[red]Error: Failed to initialize models: {e}[/red]", file=sys.stderr
	)
	sys.exit(3)

	console.print("[green]✓[/green] Models loaded\n")

	# Log configuration
	logger.info(f"Denoising {input_file}")
	logger.info(f"VAD threshold: {vad_threshold}, Silence threshold: {silence_threshold}s")
	logger.info(
	f"Min duration: {min_duration}s, Crossfade: {crossfade}ms, Silence: {silence}ms"
	)

	# Process audio
	with Progress(
	SpinnerColumn(),
	TextColumn("[progress.description]{task.description}"),
	BarColumn(),
	TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
	TimeRemainingColumn(),
	console=console,
	) as progress:
	task = progress.add_task(description="Processing audio...", total=100)

	# Progress callback
	def progress_callback(stage: str, current: float, total: float):
	# Interpret float-based (0.0-1.0) vs integer-based formats
	if total == 1.0:
	# Float format: current is 0.0-1.0, scale to 100 for display
	display_progress = int(current * 100)
	else:
	# Integer format: convert to percentage
	display_progress = int((current / total) * 100) if total > 0 else 0

	progress.update(
	task, description=f"[cyan]{stage}[/cyan]", completed=display_progress
	)

	# Step 1: Read and denoise (70%)
	progress.update(task, description="[cyan]Reading audio and detecting voice...[/cyan]")

	try:
	denoised_audio, denoise_report = service.denoise_audio(
	str(input_file),
	silence_threshold=silence_threshold,
	min_segment_duration=min_duration,
	crossfade_ms=crossfade,
	silence_ms=silence,
	progress_callback=progress_callback,
	)

	# Check if result is an error report
	if denoised_audio is None and denoise_report.get("status") == "failed":
	error_type = denoise_report.get("error_type", "processing")
	# Color-code by error type
	color_map = {
	"audio_io": "red",
	"processing": "red",
	"validation": "yellow",
	"ssl": "magenta",
	"model_loading": "magenta",
	}
	color = color_map.get(error_type, "red")
	console.print(
	f"[{color}]Error ({error_type}): {denoise_report['error']}[/{color}]",
	file=sys.stderr,
	)
	sys.exit(2)
	except Exception as e:
	console.print(f"[red]Error: Unexpected error: {e}[/red]", file=sys.stderr)
	logger.exception("Unexpected error")
	sys.exit(3)

	progress.update(task, advance=70)

	# Check if any audio was kept
	if len(denoised_audio) == 0:
	console.print(
	"\n[yellow]Warning: No voice segments detected[/yellow]\n"
	" Try lowering the VAD threshold (--vad-threshold) or\n"
	" increasing the silence threshold (--silence-threshold)\n",
	file=sys.stderr,
	)
	sys.exit(3)

	# Step 2: Write output (20%)
	progress.update(task, description="[cyan]Writing output file...[/cyan]")

	try:
	write_audio(str(output), denoised_audio, 16000)
	except Exception as e:
	console.print(f"[red]Error: Failed to write output: {e}[/red]", file=sys.stderr)
	logger.exception("Write error")
	sys.exit(4)

	progress.update(task, advance=20)

	# Step 3: Write report (10%)
	progress.update(task, description="[cyan]Generating report...[/cyan]")

	try:
	report_data = {
	**denoise_report,
	"output_file": str(output),
	"report_file": str(report),
	}

	with open(report, "w") as f:
	json.dump(report_data, f, indent=2)
	except Exception as e:
	console.print(f"[yellow]Warning: Failed to write report: {e}[/yellow]")
	logger.warning(f"Report write failed: {e}")

	progress.update(task, advance=10)

	# Display results
	console.print("\n[green]✓ Denoising complete![/green]\n")
	console.print(f"[cyan]Output:[/cyan] {output}")
	console.print(f"[cyan]Report:[/cyan] {report}\n")

	# Display statistics
	console.print("[bold]Statistics:[/bold]")
	console.print(f" Original duration: {denoise_report['original_duration']:.1f}s")
	console.print(f" Output duration: {denoise_report['output_duration']:.1f}s")
	console.print(f" Compression ratio: {denoise_report['compression_ratio']:.1%}")
	console.print(f" Segments kept: {denoise_report['segments_kept']}")
	console.print(f" Segments removed: {denoise_report['segments_removed']}\n")

	sys.exit(0)

	except KeyboardInterrupt:
	console.print("\n[yellow]Interrupted by user[/yellow]")
	sys.exit(130)
	except Exception as e:
	console.print(f"[red]Unexpected error: {e}[/red]", file=sys.stderr)
	logger.exception("Unexpected error")
	sys.exit(1)