ld76 commited on
Commit
d9429e2
1 Parent(s): 55b5a01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -28,14 +28,21 @@ def translate(audio):
28
 
29
  def synthesise(text):
30
  inputs = processor(text=text, return_tensors="pt")
31
- speech = model.generate_speech(inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder)
 
 
32
  return speech.cpu()
33
 
 
 
 
34
 
35
  def speech_to_speech_translation(audio):
 
 
36
  translated_text = translate(audio)
37
  synthesised_speech = synthesise(translated_text)
38
- synthesised_speech = (synthesised_speech.numpy() * 32767).astype(np.int16)
39
  return 16000, synthesised_speech
40
 
41
 
@@ -53,20 +60,17 @@ mic_translate = gr.Interface(
53
  fn=speech_to_speech_translation,
54
  inputs=gr.Audio(source="microphone", type="filepath"),
55
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
56
- title=title,
57
- description=description,
58
  )
59
 
60
  file_translate = gr.Interface(
61
  fn=speech_to_speech_translation,
62
  inputs=gr.Audio(source="upload", type="filepath"),
63
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
64
- examples=[["./example.wav"]],
65
- title=title,
66
- description=description,
67
  )
68
 
 
69
  with demo:
70
  gr.TabbedInterface([mic_translate, file_translate], ["Microphone", "Audio File"])
71
 
 
72
  demo.launch()
 
28
 
29
  def synthesise(text):
30
  inputs = processor(text=text, return_tensors="pt")
31
+ speech = model.generate_speech(
32
+ inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder
33
+ )
34
  return speech.cpu()
35
 
36
+ target_dtype = np.int16
37
+ max_range = np.iinfo(target_dtype).max
38
+
39
 
40
  def speech_to_speech_translation(audio):
41
+ target_dtype = np.int16
42
+ max_range = np.iinfo(target_dtype).max
43
  translated_text = translate(audio)
44
  synthesised_speech = synthesise(translated_text)
45
+ synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
46
  return 16000, synthesised_speech
47
 
48
 
 
60
  fn=speech_to_speech_translation,
61
  inputs=gr.Audio(source="microphone", type="filepath"),
62
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
 
 
63
  )
64
 
65
  file_translate = gr.Interface(
66
  fn=speech_to_speech_translation,
67
  inputs=gr.Audio(source="upload", type="filepath"),
68
  outputs=gr.Audio(label="Generated Speech", type="numpy"),
 
 
 
69
  )
70
 
71
+
72
  with demo:
73
  gr.TabbedInterface([mic_translate, file_translate], ["Microphone", "Audio File"])
74
 
75
+
76
  demo.launch()