clementruhm commited on
Commit
3ebe5da
·
1 Parent(s): 9c7fd5c

More fixes to handling stereo, add note on wave

Browse files
Files changed (2) hide show
  1. app.py +3 -0
  2. vc_service_request.py +10 -2
app.py CHANGED
@@ -47,6 +47,9 @@ def main():
47
  2. Select an audio sample that represents the target voice you want to convert to.
48
  3. Click the "Convert" button and listen to the result!
49
 
 
 
 
50
  If you are interested to plug in Voice Conversion
51
  service into your own application, don't hesitate to get in touch with us at
52
  [contact@balacoon.com](mailto:contact@balacoon.com)
 
47
  2. Select an audio sample that represents the target voice you want to convert to.
48
  3. Click the "Convert" button and listen to the result!
49
 
50
+ If providing your own audio files, please use WAVE PCM.
51
+ Service works with 16kHz, 16 bit, mono audio.
52
+
53
  If you are interested to plug in Voice Conversion
54
  service into your own application, don't hesitate to get in touch with us at
55
  [contact@balacoon.com](mailto:contact@balacoon.com)
vc_service_request.py CHANGED
@@ -35,8 +35,14 @@ def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
35
 
36
  if wav.ndim == 2:
37
  # average channels
38
- wav = np.mean(wav, axis=0, keepdims=False)
39
-
 
 
 
 
 
 
40
  # ensure proper sampling rate
41
  if sr != 16000:
42
  wav = (wav / 32768.0).astype(np.float)
@@ -94,6 +100,8 @@ def vc_service_request(
94
  """
95
  src = prepare_audio(source_audio)
96
  tgt = prepare_audio(target_audio)
 
 
97
  if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
98
  # input is way too long, dont return anything
99
  return
 
35
 
36
  if wav.ndim == 2:
37
  # average channels
38
+ if wav.shape[0] == 2:
39
+ wav = np.mean(wav, axis=0, keepdims=False)
40
+ if wav.shape[1] == 2:
41
+ wav = np.mean(wav, axis=1, keepdims=False)
42
+
43
+ if wav.ndim != 1:
44
+ return None
45
+
46
  # ensure proper sampling rate
47
  if sr != 16000:
48
  wav = (wav / 32768.0).astype(np.float)
 
100
  """
101
  src = prepare_audio(source_audio)
102
  tgt = prepare_audio(target_audio)
103
+ if not src or not tgt:
104
+ return
105
  if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
106
  # input is way too long, dont return anything
107
  return