import torch import librosa import numpy as np import soundfile as sf import argparse import os def load_rvc_model(model_path): """Load the RVC voice conversion model.""" if not os.path.exists(model_path): raise FileNotFoundError(f"Model not found: {model_path}") model = torch.load(model_path, map_location="cuda" if torch.cuda.is_available() else "cpu") model.eval() return model def convert_voice(input_audio, output_audio, model_path, index_path, pitch_shift=0): """Convert input audio using RVC.""" model = load_rvc_model(model_path) audio, sr = librosa.load(input_audio, sr=44100) # Apply conversion audio = torch.tensor(audio, dtype=torch.float32).unsqueeze(0) with torch.no_grad(): converted_audio = model(audio) # Convert using RVC model # Save output sf.write(output_audio, converted_audio.numpy().squeeze(), sr) print(f"Converted voice saved to {output_audio}") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--input", required=True, help="Input TTS audio file") parser.add_argument("--output", required=True, help="Output converted audio file") parser.add_argument("--model", required=True, help="Path to RVC model (e.g., zeldabotw.pth)") parser.add_argument("--index", required=True, help="Path to RVC index file (e.g., zeldabotw.index)") parser.add_argument("--pitch_shift", type=int, default=0, help="Pitch shift value") args = parser.parse_args() convert_voice(args.input, args.output, args.model, args.index, args.pitch_shift)