Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,573 Bytes
37a9836 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import argparse
import logging
import os
from typing import Optional
from core.bark.generate_audio_semantic_dataset import (
generate_wav_semantic_dataset,
BarkGenerationConfig,
)
from core.utils import upload_file_to_hf, zip_folder
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
def parse_dataset_args(args_list=None):
"""Parse arguments specific to dataset creation."""
parser = argparse.ArgumentParser(description="Audio Semantic Dataset Creation")
parser.add_argument(
"--text-file",
type=str,
default="data/test_data.txt",
help="Path to text file for dataset generation",
)
parser.add_argument(
"--batch-size",
type=int,
default=2,
help="Batch size for processing (default: 1)",
)
parser.add_argument(
"--output-dir",
type=str,
default="./dataset",
help="Output directory for generated files (default: ./dataset)",
)
parser.add_argument(
"--max-tokens",
type=int,
default=256,
help="Maximum tokens per example (default: 256)",
)
parser.add_argument(
"--use-small-model",
action="store_true",
help="Use small model for generation",
)
parser.add_argument(
"--save-raw-audio",
action="store_true",
help="Store generated audio as .wav instead of .npz",
)
parser.add_argument(
"--publish-hf",
action="store_true",
help="Publish dataset to HuggingFace Hub",
)
parser.add_argument(
"--repo-id",
type=str,
help="HuggingFace repo ID to publish to",
)
parser.add_argument(
"--path-in-repo",
type=str,
help="Path in HF repo",
default=None,
)
parser.add_argument(
"--silent", action="store_true", help="Suppress progress output"
)
return parser.parse_args(args_list)
def create_audio_semantic_dataset(
text_file: str,
output_dir: str = "./dataset",
batch_size: int = 1,
max_tokens: int = 256,
use_small_model: bool = False,
save_raw_audio: bool = False,
publish_hf: bool = False,
repo_id: Optional[str] = None,
path_in_repo: Optional[str] = None,
silent: bool = False,
) -> None:
"""Create audio semantic dataset from text file.
Can be called directly with parameters or via command line using parse_dataset_args().
Args:
text_file: Path to input text file
output_dir: Directory to save generated dataset
batch_size: Batch size for processing
max_tokens: Maximum tokens per example
use_small_model: Whether to use small model
save_raw_audio: Save as raw audio (.wav) instead of .npz
publish_hf: Whether to publish to HuggingFace Hub
repo_id: HF repo ID to publish to
path_in_repo: Path in HF repo
silent: Suppress progress output
"""
os.makedirs(output_dir, exist_ok=True)
if not os.path.isfile(text_file):
raise FileNotFoundError(f"Text file not found: {text_file}")
logger.info(f"Starting dataset generation from {text_file}")
generation_config = BarkGenerationConfig(
temperature=None,
generate_coarse_temperature=None,
generate_fine_temperature=None,
use_small_model=use_small_model,
)
generate_wav_semantic_dataset(
text_file_path=text_file,
generation_config=generation_config,
batch_size=batch_size,
save_path=output_dir,
save_data_as_raw_audio=save_raw_audio,
silent=silent,
)
logger.info("Dataset generation completed")
if publish_hf and repo_id:
logger.info("Publishing dataset to huggingface hub")
zip_path = "./dataset.zip"
success = zip_folder(output_dir, zip_path)
if not success:
raise RuntimeError(f"Unable to zip folder {output_dir}")
upload_file_to_hf(zip_path, repo_id, "dataset", path_in_repo=path_in_repo)
if __name__ == "__main__":
args = parse_dataset_args()
create_audio_semantic_dataset(
text_file=args.text_file,
output_dir=args.output_dir,
batch_size=args.batch_size,
max_tokens=args.max_tokens,
use_small_model=args.use_small_model,
save_raw_audio=args.save_raw_audio,
publish_hf=args.publish_hf,
repo_id=args.repo_id,
path_in_repo=args.path_in_repo,
silent=args.silent,
)
|