Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Running on Zero

Generate_Audio_for_Video / HunyuanVideo-Foley /setup.py

Jack Wu

c60109f 5 days ago

5.92 kB

	#!/usr/bin/env python3
	"""
	HunyuanVideo-Foley: Multimodal Diffusion with Representation Alignment
	for High-Fidelity Foley Audio Generation

	Setup script for building and installing the HunyuanVideo-Foley package.
	"""

	import os
	import re
	from typing import List
	from setuptools import setup, find_packages

	def read_file(filename: str) -> str:
	"""Read content from a file."""
	here = os.path.abspath(os.path.dirname(__file__))
	with open(os.path.join(here, filename), 'r', encoding='utf-8') as f:
	return f.read()

	def get_version() -> str:
	"""Extract version from constants.py or use default."""
	try:
	constants_path = os.path.join('hunyuanvideo_foley', 'constants.py')
	content = read_file(constants_path)
	version_match = re.search(r"__version__\s=\s['\"]([^'\"]*)['\"]", content)
	if version_match:
	return version_match.group(1)
	except FileNotFoundError:
	pass
	return "1.0.0"

	def parse_requirements(filename: str) -> List[str]:
	"""Parse requirements from requirements.txt file."""
	try:
	content = read_file(filename)
	lines = content.splitlines()
	requirements = []

	for line in lines:
	line = line.strip()
	if not line or line.startswith('#'):
	continue

	# Handle git+https dependencies - convert to standard package names
	if line.startswith('git+'):
	if 'transformers' in line:
	requirements.append('transformers>=4.49.0')
	elif 'audiotools' in line:
	# Use a placeholder for audiotools since it's not on PyPI
	# Users will need to install it separately
	continue # Skip for now
	else:
	continue # Skip other git dependencies
	else:
	requirements.append(line)

	return requirements
	except FileNotFoundError:
	return []

	def get_long_description() -> str:
	"""Get long description from README.md."""
	try:
	readme = read_file("README.md")
	# Remove HTML tags and excessive styling for PyPI compatibility
	readme = re.sub(r'<[^>]+>', '', readme)
	return readme
	except FileNotFoundError:
	return "Multimodal Diffusion with Representation Alignment for High-Fidelity Foley Audio Generation"

	# Read requirements
	install_requires = parse_requirements("requirements.txt")

	# Separate development requirements
	dev_requirements = [
	"black>=23.0.0",
	"isort>=5.12.0",
	"flake8>=6.0.0",
	"mypy>=1.3.0",
	"pre-commit>=3.0.0",
	"pytest>=7.0.0",
	"pytest-cov>=4.0.0",
	]

	# Optional dependencies for different features
	extras_require = {
	"dev": dev_requirements,
	"test": [
	"pytest>=7.0.0",
	"pytest-cov>=4.0.0",
	],
	"gradio": [
	"gradio==3.50.2",
	],
	"comfyui": [
	# ComfyUI specific dependencies can be added here
	],
	"all": dev_requirements + ["gradio==3.50.2"],
	}

	setup(
	name="hunyuanvideo-foley",
	version=get_version(),

	# Package metadata
	author="Tencent Hunyuan Team",
	author_email="hunyuan@tencent.com",
	description="Multimodal Diffusion with Representation Alignment for High-Fidelity Foley Audio Generation",
	long_description=get_long_description(),
	long_description_content_type="text/markdown",

	# URLs
	url="https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley",
	project_urls={
	"Homepage": "https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley",
	"Repository": "https://github.com/Tencent-Hunyuan/HunyuanVideo-Foley",
	"Documentation": "https://szczesnys.github.io/hunyuanvideo-foley",
	"Paper": "https://arxiv.org/abs/2508.16930",
	"Demo": "https://huggingface.co/spaces/tencent/HunyuanVideo-Foley",
	"Models": "https://huggingface.co/tencent/HunyuanVideo-Foley",
	},

	# Package discovery
	packages=find_packages(
	include=["hunyuanvideo_foley", "hunyuanvideo_foley.*"]
	),
	include_package_data=True,

	# Package requirements
	python_requires=">=3.8",
	install_requires=install_requires,
	extras_require=extras_require,

	# Entry points for command line scripts
	entry_points={
	"console_scripts": [
	"hunyuanvideo-foley=hunyuanvideo_foley.cli:main",
	],
	},

	# Package data
	package_data={
	"hunyuanvideo_foley": [
	"configs/*.yaml",
	"configs/*.yml",
	"*.yaml",
	"*.yml",
	],
	},

	# Classification
	classifiers=[
	"Development Status :: 4 - Beta",
	"Intended Audience :: Developers",
	"Intended Audience :: Science/Research",
	"License :: OSI Approved :: Apache Software License",
	"Operating System :: OS Independent",
	"Programming Language :: Python :: 3",
	"Programming Language :: Python :: 3.8",
	"Programming Language :: Python :: 3.9",
	"Programming Language :: Python :: 3.10",
	"Programming Language :: Python :: 3.11",
	"Topic :: Scientific/Engineering :: Artificial Intelligence",
	"Topic :: Multimedia :: Sound/Audio :: Analysis",
	"Topic :: Multimedia :: Video",
	],

	# Keywords for discoverability
	keywords=[
	"artificial intelligence",
	"machine learning",
	"deep learning",
	"multimodal",
	"diffusion models",
	"audio generation",
	"foley audio",
	"video-to-audio",
	"text-to-audio",
	"pytorch",
	"huggingface",
	"tencent",
	"hunyuan"
	],

	# Licensing
	license="Apache-2.0",

	# Build configuration
	zip_safe=False,

	# Additional metadata
	platforms=["any"],
	)