audiosr / audiosr /__main__.py
j
added audiosr module from original versatile_audio_super_resolution repository
56c8a92
raw
history blame contribute delete
No virus
1.91 kB
import os
import torch
import logging
import argparse
from audiosr import super_resolution, build_model, save_wave
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
os.environ["TOKENIZERS_PARALLELISM"] = "true"
torch.set_float32_matmul_precision("high")
def main(args):
audiosr = build_model(model_name=args.model_name, device="auto")
waveform = super_resolution(
audiosr,
args.input_path,
seed=42,
guidance_scale=3.5,
ddim_steps=50,
latent_t_per_second=12.8
)
save_wave(waveform, args.save_path, name="output", samplerate=48000)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Perform super-resolution on audio files using audiosr package.')
parser.add_argument('-i', '--input_path', required=True, help='Path to the input waveform file.')
parser.add_argument('-s', '--save_path', required=True, help='Path to save the output waveform file.')
parser.add_argument('--model_name', choices=['basic', 'speech'], default='speech', help='Name of the model to be used.')
parser.add_argument('-d', '--device', default="auto", help='The device for computation. If not specified, the script will automatically choose the device based on your environment.')
parser.add_argument('--ddim_steps', type=int, default=50, help='The sampling step for DDIM.')
parser.add_argument('-gs', '--guidance_scale', type=float, default=3.5, help='Guidance scale (Large => better quality and relavancy to text; Small => better diversity).')
parser.add_argument('--seed', type=int, default=42, help='Change this value (any integer number) will lead to a different generation result.')
parser.add_argument('-il', '--input_file_list', help='A file that contains all audio files that need to perform audio super resolution.')
args = parser.parse_args()
main(args)