| import torchaudio | |
| import numpy as np | |
| import pyworld as pw | |
| import scipy.io.wavfile as wavfile | |
| def extract_pitch(audio, sr): | |
| _f0, t = pw.dio(audio.astype(np.float64), sr) | |
| f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr) | |
| return f0 | |
| def run_diffsinger_inference(input_path): | |
| # Load audio | |
| waveform, sr = torchaudio.load(input_path) | |
| audio = waveform[0].numpy() | |
| # Pitch extraction | |
| f0 = extract_pitch(audio, sr) | |
| # Simulate pitch & vibrato mod (placeholder until DiffSinger model added) | |
| new_audio = audio * 0.8 # just reduce volume for test | |
| # Save as WAV | |
| output_path = "/tmp/output_singing.wav" | |
| wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16)) | |
| return output_path |