import gradio as gr from infer_rvc_python import BaseLoader import random from urllib.request import urlretrieve files_to_retrieve = [ "https://replicate.delivery/pbxt/N97QM3XNFrooJhV6Fb0meBff0aAG1rEDfvuxcdLS6fTx1vmWC/test.zip", "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt?download=true", "https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true" ] for file in files_to_retrieve: print(f"Downloading {file}") urlretrieve(file, file.split("/")[-1]) # unzip test.zip import zipfile with zipfile.ZipFile("test.zip", 'r') as zip_ref: zip_ref.extractall(".") converter = BaseLoader( only_cpu=True, hubert_path="./hubert_base.pt", rmvpe_path="./rmvpe.pt" ) model = "test.pth" index = "added_IVF839_Flat_nprobe_1_test_v2.index" def voice_conversion( audio, pitch_change, filter_radius, envelope_ratio, index_influence, consonant_breath_protection, ): audio_out = run( [str(audio)], model, "rmvpe+", pitch_change, index, index_influence, filter_radius, envelope_ratio, consonant_breath_protection, ) print(audio_out) return audio_out[0] def convert_now(audio_files, random_tag): return converter(audio_files, random_tag, overwrite=False, parallel_workers=8) def run( audio_files, file_m, pitch_alg, pitch_lvl, file_index, index_inf, r_m_f, e_r, c_b_p, ): random_tag = "USER_" + str(random.randint(10000000, 99999999)) converter.apply_conf( tag=random_tag, file_model=file_m, pitch_algo=pitch_alg, pitch_lvl=pitch_lvl, file_index=file_index, index_influence=index_inf, respiration_median_filtering=r_m_f, envelope_ratio=e_r, consonant_breath_protection=c_b_p, resample_sr=44100 if audio_files[0].endswith(".mp3") else 0, ) return convert_now(audio_files, random_tag) # Create the Gradio interface # audio_input = gr.Audio(type="file") # audio_output = gr.Audio(type="file") # gr.Interface(fn=voice_conversion, inputs=audio_input, outputs=audio_output).launch() def ui(): with gr.Blocks() as demo: audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath") with gr.Row(): pitch_slider = gr.Slider( minimum=-24, maximum=24, value=0, step=1, label="Pitch", interactive=True, ) index_influence_slider = gr.Slider( minimum=0, maximum=1, value=0.75, step=0.01, label="Index Influence", interactive=True, ) respiration_median_filtering = gr.Slider( minimum=0, maximum=10, value=3, step=1, label="Resp. Median Filtering", interactive=True, ) envelope_ratio = gr.Slider( minimum=0, maximum=1, value=0.25, step=0.01, label="Envelope Ratio", interactive=True, ) consonant_breath_protection = gr.Slider( minimum=0, maximum=1, value=0.5, step=0.01, label="Consonant Breath Protection", interactive=True, ) button = gr.Button("Convert") audio_output = gr.Audio(type="filepath") button.click( voice_conversion, inputs=[ audio_input, pitch_slider, respiration_median_filtering, envelope_ratio, index_influence_slider, consonant_breath_protection, ], outputs=audio_output, ) return demo ui().launch(auth=("output", "becreative"))