import gradio as gr import os from lib.infer import infer_audio from pydub import AudioSegment main_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # Function for inference def inference(model_name, audio, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect, split_infer, min_silence, silence_threshold, seek_step, keep_silence, quefrency, timbre, f0_autotune, output_format): # Perform inference inferred_audio = infer_audio( model_name, audio_path, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect, split_infer, min_silence, silence_threshold, seek_step, keep_silence, quefrency, timbre, f0_autotune, output_format ) # Convert the output audio os.chdir(main_dir) output_audio = AudioSegment.from_file(inferred_audio) # Save the output audio and return output_path = f"output.{output_format}" output_audio.export(output_path, format=output_format) return output_path # Gradio UI with gr.Blocks(theme="Ryouko65777/ryo", js="() => {document.body.classList.toggle('dark');}") as demo: gr.Markdown("# Ryo RVC ") with gr.Tabs(): audio_input = gr.Audio(label="Input Audio", type="filepath") model_name = gr.Textbox(label="Model Name") f0_change = gr.Number(label="Pitch Change (F0 Change)", value=0) f0_method = gr.Dropdown( label="F0 Method", choices= [ "crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "fcpe_legacy", "hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", "hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]", ], value="fcpe", ) min_pitch = gr.Textbox(label="Min Pitch", value="50") max_pitch = gr.Textbox(label="Max Pitch", value="1100") crepe_hop_length = gr.Number(label="CREPE Hop Length", value=120) index_rate = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75) filter_radius = gr.Number(label="Filter Radius", value=3) rms_mix_rate = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25) protect = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33) split_infer = gr.Checkbox(label="Enable Split Inference", value=False) min_silence = gr.Number(label="Min Silence (ms)", value=500) silence_threshold = gr.Number(label="Silence Threshold (dB)", value=-50) seek_step = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1) keep_silence = gr.Number(label="Keep Silence (ms)", value=200) quefrency = gr.Number(label="Quefrency", value=0) timbre = gr.Number(label="Timbre", value=1) f0_autotune = gr.Checkbox(label="Enable F0 Autotune", value=False) output_format = gr.Dropdown(label="Output Format", choices=["wav", "flac", "mp3"], value="wav") output_audio = gr.Audio(label="Output Audio") submit_btn = gr.Button("Run Inference") # Define the interaction between input and function submit_btn.click(fn=inference, inputs=[model_name, audio_input, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect, split_infer, min_silence, silence_threshold, seek_step, keep_silence, quefrency, timbre, f0_autotune, output_format], outputs=output_audio) # Launch the demo demo.launch()