|
import os |
|
import re |
|
import random |
|
from scipy.io.wavfile import write, read |
|
import numpy as np |
|
import yt_dlp |
|
import subprocess |
|
from pydub import AudioSegment |
|
from audio_separator.separator import Separator |
|
from lib.infer import infer_audio |
|
import edge_tts |
|
import tempfile |
|
import anyio |
|
from pathlib import Path |
|
from lib.language_tts import language_dict |
|
import zipfile |
|
import shutil |
|
import urllib.request |
|
import gdown |
|
import streamlit as st |
|
|
|
main_dir = Path().resolve() |
|
print(main_dir) |
|
os.chdir(main_dir) |
|
models_dir = "models" |
|
|
|
|
|
def download_audio(url): |
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'outtmpl': 'ytdl/%(title)s.%(ext)s', |
|
'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}], |
|
} |
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=True) |
|
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' |
|
sample_rate, audio_data = read(file_path) |
|
audio_array = np.asarray(audio_data, dtype=np.int16) |
|
return sample_rate, audio_array |
|
|
|
|
|
def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc): |
|
if not os.path.exists(output_dir): |
|
os.makedirs(output_dir) |
|
|
|
separator = Separator(output_dir=output_dir) |
|
|
|
vocals = os.path.join(output_dir, 'Vocals.wav') |
|
instrumental = os.path.join(output_dir, 'Instrumental.wav') |
|
vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav') |
|
vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav') |
|
lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav') |
|
backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav') |
|
|
|
separator.load_model(model_filename=model_voc_inst) |
|
voc_inst = separator.separate(input_audio) |
|
os.rename(os.path.join(output_dir, voc_inst[0]), instrumental) |
|
os.rename(os.path.join(output_dir, voc_inst[1]), vocals) |
|
|
|
separator.load_model(model_filename=model_deecho) |
|
voc_no_reverb = separator.separate(vocals) |
|
os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb) |
|
os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb) |
|
|
|
separator.load_model(model_filename=model_back_voc) |
|
backing_voc = separator.separate(vocals_no_reverb) |
|
os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals) |
|
os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals) |
|
|
|
return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals |
|
|
|
|
|
async def text_to_speech_edge(text, language_code): |
|
voice = language_dict.get(language_code, "default_voice") |
|
communicate = edge_tts.Communicate(text, voice) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: |
|
tmp_path = tmp_file.name |
|
await communicate.save(tmp_path) |
|
return tmp_path |
|
|
|
|
|
|
|
st.title("Hex RVC") |
|
|
|
tabs = st.tabs(["Inference", "Download RVC Model", "Audio Separation"]) |
|
|
|
|
|
with tabs[0]: |
|
st.header("Inference") |
|
|
|
model_name = st.text_input("Model Name", placeholder="Enter model name") |
|
sound_path = st.text_input("Audio Path (Optional)", placeholder="Leave blank to upload audio") |
|
uploaded_audio = st.file_uploader("Upload Audio", type=["wav", "mp3"]) |
|
|
|
if uploaded_audio is not None: |
|
with open("uploaded_audio.wav", "wb") as f: |
|
f.write(uploaded_audio.read()) |
|
sound_path = "uploaded_audio.wav" |
|
|
|
f0_change = st.number_input("Pitch Change (semitones)", value=0) |
|
f0_method = st.selectbox("F0 Method", ["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "hybrid[rmvpe+fcpe]"], index=5) |
|
|
|
if st.button("Run Inference"): |
|
st.write("Running inference...") |
|
|
|
|
|
with tabs[1]: |
|
st.header("Download RVC Model") |
|
url = st.text_input("Model URL") |
|
dir_name = st.text_input("Model Name") |
|
|
|
if st.button("Download Model"): |
|
try: |
|
download_online_model(url, dir_name) |
|
st.success(f"Model {dir_name} downloaded successfully!") |
|
except Exception as e: |
|
st.error(str(e)) |
|
|
|
|
|
with tabs[2]: |
|
st.header("Audio Separation") |
|
input_audio = st.file_uploader("Upload Audio for Separation", type=["wav", "mp3"]) |
|
|
|
if input_audio is not None: |
|
with open("input_audio.wav", "wb") as f: |
|
f.write(input_audio.read()) |
|
st.write("Audio uploaded successfully.") |
|
|
|
if st.button("Separate Audio"): |
|
st.write("Separating audio...") |
|
output_dir = "./separated_audio" |
|
inst, voc, voc_rev, voc_no_rev, lead_voc, back_voc = separate_audio("input_audio.wav", output_dir, |
|
'model_bs_roformer.ckpt', |
|
'UVR-DeEcho-DeReverb.pth', |
|
'mel_band_karaoke.ckpt') |
|
st.audio(inst) |
|
st.audio(voc) |
|
st.audio(voc_rev) |
|
st.audio(voc_no_rev) |
|
st.audio(lead_voc) |
|
st.audio(back_voc) |
|
|