Ryouko65777 commited on
Commit
eb1e6af
·
verified ·
1 Parent(s): 9e1688c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from lib.infer import infer_audio
4
+ from pydub import AudioSegment
5
+
6
+
7
+
8
+
9
+ f0_method = {
10
+ "crepe",
11
+ "harvest",
12
+ "mangio-crepe",
13
+ "rmvpe",
14
+ "rmvpe+",
15
+ "fcpe",
16
+ "fcpe_legacy",
17
+ "hybrid[mangio-crepe+rmvpe]",
18
+ "hybrid[mangio-crepe+fcpe]",
19
+ "hybrid[rmvpe+fcpe]",
20
+ "hybrid[mangio-crepe+rmvpe+fcpe]",
21
+ }
22
+
23
+
24
+
25
+
26
+
27
+
28
+ # Function for inference
29
+ def inference(model_name, audio, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length,
30
+ index_rate, filter_radius, rms_mix_rate, protect, split_infer, min_silence,
31
+ silence_threshold, seek_step, keep_silence, formant_shift, quefrency, timbre,
32
+ f0_autotune, output_format):
33
+
34
+ main_dir = ""
35
+ os.chdir(main_dir)
36
+
37
+ # Save the uploaded audio file
38
+ audio_path = "uploaded_audio.wav"
39
+ audio.save(audio_path)
40
+
41
+ os.system("chmod +x stftpitchshift")
42
+
43
+ # Perform inference
44
+ inferred_audio = infer_audio(
45
+ model_name,
46
+ audio_path,
47
+ f0_change,
48
+ f0_method,
49
+ min_pitch,
50
+ max_pitch,
51
+ crepe_hop_length,
52
+ index_rate,
53
+ filter_radius,
54
+ rms_mix_rate,
55
+ protect,
56
+ split_infer,
57
+ min_silence,
58
+ silence_threshold,
59
+ seek_step,
60
+ keep_silence,
61
+ formant_shift,
62
+ quefrency,
63
+ timbre,
64
+ f0_autotune,
65
+ output_format
66
+ )
67
+
68
+ # Convert the output audio
69
+ os.chdir(main_dir)
70
+ output_audio = AudioSegment.from_file(inferred_audio)
71
+
72
+ # Save the output audio and return
73
+ output_path = f"output.{output_format}"
74
+ output_audio.export(output_path, format=output_format)
75
+ return output_path
76
+
77
+ # Gradio UI
78
+ with gr.Blocks() as demo:
79
+ gr.Markdown("## Audio Inference")
80
+
81
+ with gr.Row():
82
+ model_name = gr.Textbox(label="Model Name")
83
+ f0_change = gr.Number(label="Pitch Change (F0 Change)", value=0)
84
+ f0_method = gr.Dropdown(label="F0 Method", choices=list(f0_method.keys())=, value="fcpe")
85
+ min_pitch = gr.Textbox(label="Min Pitch", value="50")
86
+ max_pitch = gr.Textbox(label="Max Pitch", value="1100")
87
+ crepe_hop_length = gr.Number(label="CREPE Hop Length", value=120)
88
+ index_rate = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75)
89
+ filter_radius = gr.Number(label="Filter Radius", value=3)
90
+ rms_mix_rate = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25)
91
+ protect = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33)
92
+
93
+ with gr.Row():
94
+ split_infer = gr.Checkbox(label="Enable Split Inference", value=False)
95
+ min_silence = gr.Number(label="Min Silence (ms)", value=500)
96
+ silence_threshold = gr.Number(label="Silence Threshold (dB)", value=-50)
97
+ seek_step = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1)
98
+ keep_silence = gr.Number(label="Keep Silence (ms)", value=200)
99
+ formant_shift = gr.Checkbox(label="Enable Formant Shift", value=False)
100
+ quefrency = gr.Number(label="Quefrency", value=0)
101
+ timbre = gr.Number(label="Timbre", value=1)
102
+ f0_autotune = gr.Checkbox(label="Enable F0 Autotune", value=False)
103
+ output_format = gr.Dropdown(label="Output Format", choices=["wav", "flac", "mp3"], value="wav")
104
+
105
+ audio_input = gr.Audio(label="Input Audio", type="file")
106
+ output_audio = gr.Audio(label="Output Audio")
107
+
108
+ submit_btn = gr.Button("Run Inference")
109
+
110
+ # Define the interaction between input and function
111
+ submit_btn.click(fn=inference,
112
+ inputs=[model_name, audio_input, f0_change, f0_method, min_pitch, max_pitch,
113
+ crepe_hop_length, index_rate, filter_radius, rms_mix_rate, protect,
114
+ split_infer, min_silence, silence_threshold, seek_step, keep_silence,
115
+ formant_shift, quefrency, timbre, f0_autotune, output_format],
116
+ outputs=output_audio)
117
+
118
+ # Launch the demo
119
+ demo.launch()