Text2Sing-DiffSinger / diff_singer_infer.py
Vaishnavi0404's picture
Update diff_singer_infer.py
ee3dec5 verified
import torchaudio
import numpy as np
import pyworld as pw
import scipy.io.wavfile as wavfile
def extract_pitch(audio, sr):
_f0, t = pw.dio(audio.astype(np.float64), sr)
f0 = pw.stonemask(audio.astype(np.float64), _f0, t, sr)
return f0
def run_diffsinger_inference(input_path):
# Load audio
waveform, sr = torchaudio.load(input_path)
audio = waveform[0].numpy()
# Pitch extraction
f0 = extract_pitch(audio, sr)
# Simulate pitch & vibrato mod (placeholder until DiffSinger model added)
new_audio = audio * 0.8 # just reduce volume for test
# Save as WAV
output_path = "/tmp/output_singing.wav"
wavfile.write(output_path, sr, (new_audio * 32767).astype(np.int16))
return output_path