Spaces:

Ryouko65777
/

ryouko65777-ryo-rvc

Sleeping

App Files Files Community

Ryouko65777 commited on Oct 21, 2024

Commit

eb1e6af

verified ·

1 Parent(s): 9e1688c

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import gradio as gr
+import os
+from lib.infer import infer_audio
+from pydub import AudioSegment
+f0_method = {
+    "crepe",
+    "harvest",
+    "mangio-crepe",
+    "rmvpe",
+    "rmvpe+",
+    "fcpe",
+    "fcpe_legacy",
+    "hybrid[mangio-crepe+rmvpe]",
+    "hybrid[mangio-crepe+fcpe]",
+    "hybrid[rmvpe+fcpe]",
+    "hybrid[mangio-crepe+rmvpe+fcpe]",
+}
+# Function for inference
+def inference(model_name, audio, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length,
+              index_rate, filter_radius, rms_mix_rate, protect, split_infer, min_silence,
+              silence_threshold, seek_step, keep_silence, formant_shift, quefrency, timbre,
+              f0_autotune, output_format):
+    main_dir = ""
+    os.chdir(main_dir)
+    # Save the uploaded audio file
+    audio_path = "uploaded_audio.wav"
+    audio.save(audio_path)
+    os.system("chmod +x stftpitchshift")
+    # Perform inference
+    inferred_audio = infer_audio(
+        model_name,
+        audio_path,
+        f0_change,
+        f0_method,
+        min_pitch,
+        max_pitch,
+        crepe_hop_length,
+        index_rate,
+        filter_radius,
+        rms_mix_rate,
+        protect,
+        split_infer,
+        min_silence,
+        silence_threshold,
+        seek_step,
+        keep_silence,
+        formant_shift,
+        quefrency,
+        timbre,
+        f0_autotune,
+        output_format
+    )
+    # Convert the output audio
+    os.chdir(main_dir)
+    output_audio = AudioSegment.from_file(inferred_audio)
+    # Save the output audio and return
+    output_path = f"output.{output_format}"
+    output_audio.export(output_path, format=output_format)
+    return output_path
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("## Audio Inference")
+    with gr.Row():
+        model_name = gr.Textbox(label="Model Name")
+        f0_change = gr.Number(label="Pitch Change (F0 Change)", value=0)
+        f0_method = gr.Dropdown(label="F0 Method", choices=list(f0_method.keys())=, value="fcpe")
+        min_pitch = gr.Textbox(label="Min Pitch", value="50")
+        max_pitch = gr.Textbox(label="Max Pitch", value="1100")
+        crepe_hop_length = gr.Number(label="CREPE Hop Length", value=120)
+        index_rate = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75)
+        filter_radius = gr.Number(label="Filter Radius", value=3)
+        rms_mix_rate = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25)
+        protect = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33)
+    with gr.Row():
+        split_infer = gr.Checkbox(label="Enable Split Inference", value=False)
+        min_silence = gr.Number(label="Min Silence (ms)", value=500)
+        silence_threshold = gr.Number(label="Silence Threshold (dB)", value=-50)
+        seek_step = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1)
+        keep_silence = gr.Number(label="Keep Silence (ms)", value=200)
+        formant_shift = gr.Checkbox(label="Enable Formant Shift", value=False)
+        quefrency = gr.Number(label="Quefrency", value=0)
+        timbre = gr.Number(label="Timbre", value=1)
+        f0_autotune = gr.Checkbox(label="Enable F0 Autotune", value=False)
+        output_format = gr.Dropdown(label="Output Format", choices=["wav", "flac", "mp3"], value="wav")
+    audio_input = gr.Audio(label="Input Audio", type="file")
+    output_audio = gr.Audio(label="Output Audio")
+    submit_btn = gr.Button("Run Inference")
+    # Define the interaction between input and function
+    submit_btn.click(fn=inference,
+                     inputs=[model_name, audio_input, f0_change, f0_method, min_pitch, max_pitch,
+                             crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect,
+                             split_infer, min_silence, silence_threshold, seek_step, keep_silence,
+                             formant_shift, quefrency, timbre, f0_autotune, output_format],
+                     outputs=output_audio)
+# Launch the demo
+demo.launch()