Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -71,18 +71,16 @@ def load(speaker):
|
|
71 |
global model
|
72 |
sid = speaker
|
73 |
model = Svc(models_info[sid]["model"], models_info[sid]["config"], cluster_model_path=models_info[sid]["cluster"], feature_retrieval=models_info[sid]["feature_retrieval"])
|
74 |
-
return "
|
75 |
-
|
|
|
76 |
|
77 |
def vc_fn(input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale):
|
78 |
global sid
|
79 |
if input_audio is None:
|
80 |
return "You need to upload an audio", None
|
81 |
sampling_rate, audio = input_audio
|
82 |
-
# print(audio.shape,sampling_rate)
|
83 |
duration = audio.shape[0] / sampling_rate
|
84 |
-
# if duration > 90:
|
85 |
-
# return "请上传小于90s的音频,需要转换长音频请本地进行转换", None
|
86 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
87 |
if len(audio.shape) > 1:
|
88 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
@@ -98,18 +96,19 @@ def vc_fn(input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scal
|
|
98 |
app = gr.Blocks()
|
99 |
with app:
|
100 |
with gr.Tabs():
|
101 |
-
with gr.TabItem("
|
102 |
-
speaker = gr.Dropdown(label="
|
103 |
-
model_submit = gr.Button("
|
104 |
model_output1 = gr.Textbox(label="Output Message")
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
113 |
vc_output1 = gr.Textbox(label="Output Message")
|
114 |
vc_output2 = gr.Audio(label="Output Audio")
|
115 |
model_submit.click(load, [speaker], [model_output1, model_output2])
|
|
|
71 |
global model
|
72 |
sid = speaker
|
73 |
model = Svc(models_info[sid]["model"], models_info[sid]["config"], cluster_model_path=models_info[sid]["cluster"], feature_retrieval=models_info[sid]["feature_retrieval"])
|
74 |
+
return "Model loaded successfully", sid
|
75 |
+
sid = speakers[0]
|
76 |
+
load(sid)
|
77 |
|
78 |
def vc_fn(input_audio, vc_transform, auto_f0,cluster_ratio, slice_db, noise_scale):
|
79 |
global sid
|
80 |
if input_audio is None:
|
81 |
return "You need to upload an audio", None
|
82 |
sampling_rate, audio = input_audio
|
|
|
83 |
duration = audio.shape[0] / sampling_rate
|
|
|
|
|
84 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
85 |
if len(audio.shape) > 1:
|
86 |
audio = librosa.to_mono(audio.transpose(1, 0))
|
|
|
96 |
app = gr.Blocks()
|
97 |
with app:
|
98 |
with gr.Tabs():
|
99 |
+
with gr.TabItem("Inference"):
|
100 |
+
speaker = gr.Dropdown(label="Speaker", choices=speakers, value=speakers[0])
|
101 |
+
model_submit = gr.Button("Load Model", variant="primary")
|
102 |
model_output1 = gr.Textbox(label="Output Message")
|
103 |
+
load(sid)
|
104 |
+
model_output2 = gr.Textbox(label="Speaker", value=sid)
|
105 |
+
vc_input3 = gr.Audio(label="Upload Audio")
|
106 |
+
vc_transform = gr.Number(label="Pitch Shift (integer, can be positive or negative, number of semitones, raising an octave is +12)", value=0)
|
107 |
+
cluster_ratio = gr.Number(label="Cluster Model Mixing Ratio (0-1): Defaults to 0 (clustering disabled). Improves timbre similarity but may reduce articulation clarity. Recommended value: ~0.5 if used", value=0)
|
108 |
+
auto_f0 = gr.Checkbox(label="Auto f0 Prediction: Works better with the cluster model for f0 prediction but disables the pitch shift feature. (For voice conversion only; do not enable this for singing voices, as it will result in extreme off-pitch issues)", value=False)
|
109 |
+
slice_db = gr.Number(label="Slicing Threshold", value=-40)
|
110 |
+
noise_scale = gr.Number(label="noise_scale", value=0.4)
|
111 |
+
vc_submit = gr.Button("Convert", variant="primary")
|
112 |
vc_output1 = gr.Textbox(label="Output Message")
|
113 |
vc_output2 = gr.Audio(label="Output Audio")
|
114 |
model_submit.click(load, [speaker], [model_output1, model_output2])
|