Ahsen Khaliq commited on
Commit
633eaa6
β€’
1 Parent(s): 2c6c0c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -11
app.py CHANGED
@@ -5,13 +5,12 @@ import scipy.io.wavfile
5
  from espnet2.bin.tts_inference import Text2Speech
6
  from espnet2.utils.types import str_or_none
7
 
8
- lang = 'English'
9
- tag = 'kan-bayashi/ljspeech_vits'
10
- vocoder_tag = "none"
11
 
12
- text2speech = Text2Speech.from_pretrained(
13
- model_tag=str_or_none(tag),
14
- vocoder_tag=str_or_none(vocoder_tag),
15
  device="cpu",
16
  # Only for Tacotron 2 & Transformer
17
  threshold=0.5,
@@ -29,11 +28,61 @@ text2speech = Text2Speech.from_pretrained(
29
  )
30
 
31
 
32
- def inference(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  with torch.no_grad():
34
- start = time.time()
35
- wav = text2speech(text)["wav"]
36
- scipy.io.wavfile.write("out.wav",text2speech.fs , wav.view(-1).cpu().numpy())
 
 
 
 
 
 
37
  return "out.wav"
38
  title = "ESPnet2-TTS"
39
  description = "Gradio demo for ESPnet2-TTS: Extending the Edge of TTS Research. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."
@@ -43,7 +92,7 @@ examples=[['This paper describes ESPnet2-TTS, an end-to-end text-to-speech (E2E-
43
 
44
  gr.Interface(
45
  inference,
46
- gr.inputs.Textbox(label="input text",lines=10),
47
  gr.outputs.Audio(type="file", label="Output"),
48
  title=title,
49
  description=description,
 
5
  from espnet2.bin.tts_inference import Text2Speech
6
  from espnet2.utils.types import str_or_none
7
 
8
+ tagen = 'kan-bayashi/ljspeech_vits'
9
+ vocoder_tagen = "none"
 
10
 
11
+ text2speechen = Text2Speech.from_pretrained(
12
+ model_tag=str_or_none(tagen),
13
+ vocoder_tag=str_or_none(vocoder_tagen),
14
  device="cpu",
15
  # Only for Tacotron 2 & Transformer
16
  threshold=0.5,
 
28
  )
29
 
30
 
31
+ tagjp = 'kan-bayashi/jsut_full_band_vits_prosody'
32
+ vocoder_tagjp = 'none'
33
+
34
+ text2speechjp = Text2Speech.from_pretrained(
35
+ model_tag=str_or_none(tagjp),
36
+ vocoder_tag=str_or_none(vocoder_tagjp),
37
+ device="cpu",
38
+ # Only for Tacotron 2 & Transformer
39
+ threshold=0.5,
40
+ # Only for Tacotron 2
41
+ minlenratio=0.0,
42
+ maxlenratio=10.0,
43
+ use_att_constraint=False,
44
+ backward_window=1,
45
+ forward_window=3,
46
+ # Only for FastSpeech & FastSpeech2 & VITS
47
+ speed_control_alpha=1.0,
48
+ # Only for VITS
49
+ noise_scale=0.333,
50
+ noise_scale_dur=0.333,
51
+ )
52
+
53
+ tagch = 'kan-bayashi/csmsc_full_band_vits'
54
+ vocoder_tagch = "none"
55
+
56
+ text2speechch = Text2Speech.from_pretrained(
57
+ model_tag=str_or_none(tagch),
58
+ vocoder_tag=str_or_none(vocoder_tagch),
59
+ device="cpu",
60
+ # Only for Tacotron 2 & Transformer
61
+ threshold=0.5,
62
+ # Only for Tacotron 2
63
+ minlenratio=0.0,
64
+ maxlenratio=10.0,
65
+ use_att_constraint=False,
66
+ backward_window=1,
67
+ forward_window=3,
68
+ # Only for FastSpeech & FastSpeech2 & VITS
69
+ speed_control_alpha=1.0,
70
+ # Only for VITS
71
+ noise_scale=0.333,
72
+ noise_scale_dur=0.333,
73
+ )
74
+
75
+ def inference(text,lang):
76
  with torch.no_grad():
77
+ if lang == "english"
78
+ wav = text2speechen(text)["wav"]
79
+ scipy.io.wavfile.write("out.wav",text2speechen.fs , wav.view(-1).cpu().numpy())
80
+ if lang == "chinese"
81
+ wav = text2speechch(text)["wav"]
82
+ scipy.io.wavfile.write("out.wav",text2speechench.fs , wav.view(-1).cpu().numpy())
83
+ if lang == "japanese"
84
+ wav = text2speechjp(text)["wav"]
85
+ scipy.io.wavfile.write("out.wav",text2speechjp.fs , wav.view(-1).cpu().numpy())
86
  return "out.wav"
87
  title = "ESPnet2-TTS"
88
  description = "Gradio demo for ESPnet2-TTS: Extending the Edge of TTS Research. To use it, simply add your audio, or click one of the examples to load them. Read more at the links below."
 
92
 
93
  gr.Interface(
94
  inference,
95
+ [gr.inputs.Textbox(label="input text",lines=10),gr.inputs.Radio(choices=["english", "chinese", "japanese"], type="value", default="english", label="language")]],
96
  gr.outputs.Audio(type="file", label="Output"),
97
  title=title,
98
  description=description,