Spaces:
Running
Running
clementruhm
commited on
Commit
·
3ebe5da
1
Parent(s):
9c7fd5c
More fixes to handling stereo, add note on wave
Browse files- app.py +3 -0
- vc_service_request.py +10 -2
app.py
CHANGED
@@ -47,6 +47,9 @@ def main():
|
|
47 |
2. Select an audio sample that represents the target voice you want to convert to.
|
48 |
3. Click the "Convert" button and listen to the result!
|
49 |
|
|
|
|
|
|
|
50 |
If you are interested to plug in Voice Conversion
|
51 |
service into your own application, don't hesitate to get in touch with us at
|
52 |
[contact@balacoon.com](mailto:contact@balacoon.com)
|
|
|
47 |
2. Select an audio sample that represents the target voice you want to convert to.
|
48 |
3. Click the "Convert" button and listen to the result!
|
49 |
|
50 |
+
If providing your own audio files, please use WAVE PCM.
|
51 |
+
Service works with 16kHz, 16 bit, mono audio.
|
52 |
+
|
53 |
If you are interested to plug in Voice Conversion
|
54 |
service into your own application, don't hesitate to get in touch with us at
|
55 |
[contact@balacoon.com](mailto:contact@balacoon.com)
|
vc_service_request.py
CHANGED
@@ -35,8 +35,14 @@ def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
|
|
35 |
|
36 |
if wav.ndim == 2:
|
37 |
# average channels
|
38 |
-
wav
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
# ensure proper sampling rate
|
41 |
if sr != 16000:
|
42 |
wav = (wav / 32768.0).astype(np.float)
|
@@ -94,6 +100,8 @@ def vc_service_request(
|
|
94 |
"""
|
95 |
src = prepare_audio(source_audio)
|
96 |
tgt = prepare_audio(target_audio)
|
|
|
|
|
97 |
if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
|
98 |
# input is way too long, dont return anything
|
99 |
return
|
|
|
35 |
|
36 |
if wav.ndim == 2:
|
37 |
# average channels
|
38 |
+
if wav.shape[0] == 2:
|
39 |
+
wav = np.mean(wav, axis=0, keepdims=False)
|
40 |
+
if wav.shape[1] == 2:
|
41 |
+
wav = np.mean(wav, axis=1, keepdims=False)
|
42 |
+
|
43 |
+
if wav.ndim != 1:
|
44 |
+
return None
|
45 |
+
|
46 |
# ensure proper sampling rate
|
47 |
if sr != 16000:
|
48 |
wav = (wav / 32768.0).astype(np.float)
|
|
|
100 |
"""
|
101 |
src = prepare_audio(source_audio)
|
102 |
tgt = prepare_audio(target_audio)
|
103 |
+
if not src or not tgt:
|
104 |
+
return
|
105 |
if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
|
106 |
# input is way too long, dont return anything
|
107 |
return
|