Spaces:

ISABS
/

VibeVoice

Paused

App Files Files Community

VibeVoice / vllm_plugin /scripts /start_server.py

ISABS

Upload folder using huggingface_hub

0a81958 verified about 2 months ago

raw

history blame contribute delete

4.63 kB

	#!/usr/bin/env python3
	"""
	VibeVoice vLLM ASR Server Launcher

	One-click deployment script that handles:
	1. Installing system dependencies (FFmpeg, etc.)
	2. Installing VibeVoice Python package
	3. Downloading model from HuggingFace
	4. Generating tokenizer files
	5. Starting vLLM server

	Usage:
	python3 start_server.py [--model MODEL_ID] [--port PORT]
	"""

	import argparse
	import os
	import subprocess
	import sys


	def run_command(cmd: list[str], description: str, shell: bool = False) -> None:
	"""Run a command with logging."""
	print(f"\n{'='*60}")
	print(f" {description}")
	print(f"{'='*60}\n")
	if shell:
	subprocess.run(cmd, shell=True, check=True)
	else:
	subprocess.run(cmd, check=True)


	def install_system_deps() -> None:
	"""Install system dependencies (FFmpeg, etc.)."""
	run_command(["apt-get", "update"], "Updating package list")
	run_command(
	["apt-get", "install", "-y", "ffmpeg", "libsndfile1"],
	"Installing FFmpeg and audio libraries"
	)


	def install_vibevoice() -> None:
	"""Install VibeVoice Python package."""
	run_command(
	[sys.executable, "-m", "pip", "install", "-e", "/app[vllm]"],
	"Installing VibeVoice with vLLM support"
	)


	def download_model(model_id: str) -> str:
	"""Download model from HuggingFace using default cache."""
	print(f"\n{'='*60}")
	print(f" Downloading model: {model_id}")
	print(f"{'='*60}\n")

	import warnings
	from huggingface_hub import snapshot_download

	# Suppress deprecation warnings from huggingface_hub
	with warnings.catch_warnings():
	warnings.simplefilter("ignore")
	model_path = snapshot_download(model_id)

	print(f"\n{'='*60}")
	print(f" ✅ Model downloaded successfully!")
	print(f" 📁 Path: {model_path}")
	print(f"{'='*60}\n")
	return model_path


	def generate_tokenizer(model_path: str) -> None:
	"""Generate tokenizer files for the model."""
	run_command(
	[sys.executable, "-m", "vllm_plugin.tools.generate_tokenizer_files",
	"--output", model_path],
	"Generating tokenizer files"
	)


	def start_vllm_server(model_path: str, port: int) -> None:
	"""Start vLLM server (replaces current process)."""
	print(f"\n{'='*60}")
	print(f" Starting vLLM server on port {port}")
	print(f"{'='*60}\n")

	vllm_cmd = [
	"vllm", "serve", model_path,
	"--served-model-name", "vibevoice",
	"--trust-remote-code",
	"--dtype", "bfloat16",
	"--max-num-seqs", "64",
	"--max-model-len", "65536",
	# "--max-num-batched-tokens", "32768",
	"--gpu-memory-utilization", "0.8",
	# "--enforce-eager",
	"--no-enable-prefix-caching",
	"--enable-chunked-prefill",
	"--chat-template-content-format", "openai",
	"--tensor-parallel-size", "1",
	"--allowed-local-media-path", "/app",
	"--port", str(port),
	]

	os.execvp("vllm", vllm_cmd)


	def main():
	parser = argparse.ArgumentParser(
	description="VibeVoice vLLM ASR Server - One-Click Deployment",
	formatter_class=argparse.RawDescriptionHelpFormatter,
	epilog="""
	Examples:
	# Start with default settings
	python3 start_server.py

	# Use custom port
	python3 start_server.py --port 8080

	# Skip dependency installation (if already installed)
	python3 start_server.py --skip-deps
	"""
	)
	parser.add_argument(
	"--model", "-m",
	default="microsoft/VibeVoice-ASR",
	help="HuggingFace model ID (default: microsoft/VibeVoice-ASR)"
	)
	parser.add_argument(
	"--port", "-p",
	type=int,
	default=8000,
	help="Server port (default: 8000)"
	)
	parser.add_argument(
	"--skip-deps",
	action="store_true",
	help="Skip installing system dependencies"
	)
	parser.add_argument(
	"--skip-tokenizer",
	action="store_true",
	help="Skip generating tokenizer files"
	)
	args = parser.parse_args()

	print("\n" + "="*60)
	print(" VibeVoice vLLM ASR Server - One-Click Deployment")
	print("="*60)

	# Step 1: Install system dependencies
	if not args.skip_deps:
	install_system_deps()

	# Step 2: Install VibeVoice
	install_vibevoice()

	# Step 3: Download model
	model_path = download_model(args.model)

	# Step 4: Generate tokenizer files
	if not args.skip_tokenizer:
	generate_tokenizer(model_path)

	# Step 5: Start vLLM server
	start_vllm_server(model_path, args.port)


	if __name__ == "__main__":
	main()