awacke1 commited on
Commit
1f705ff
1 Parent(s): 89c118f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -73
app.py CHANGED
@@ -23,6 +23,43 @@ def get_db_firestore():
23
  db = get_db_firestore()
24
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  def transcribe(audio):
27
  text = asr(audio)["text"]
28
  return text
@@ -63,8 +100,19 @@ def selectall(text):
63
  doclist += r
64
  return doclist
65
 
66
- demo = gr.Blocks()
 
 
 
 
 
 
 
 
 
 
67
 
 
68
  with demo:
69
  #audio_file = gr.Audio(type="filepath")
70
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
@@ -72,91 +120,35 @@ with demo:
72
  label = gr.Label()
73
  saved = gr.Textbox()
74
  savedAll = gr.Textbox()
 
75
 
76
  b1 = gr.Button("Recognize Speech")
77
  b2 = gr.Button("Classify Sentiment")
78
  b3 = gr.Button("Save Speech to Text")
79
  b4 = gr.Button("Retrieve All")
 
80
 
81
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
82
  b2.click(text_to_sentiment, inputs=text, outputs=label)
83
  b3.click(upsert, inputs=text, outputs=saved)
84
  b4.click(selectall, inputs=text, outputs=savedAll)
 
85
 
86
  demo.launch(share=True)
87
 
88
 
89
 
90
 
91
-
92
-
93
- MODEL_NAMES = [
94
- # "en/ek1/tacotron2",
95
- "en/ljspeech/tacotron2-DDC",
96
- # "en/ljspeech/tacotron2-DDC_ph",
97
- # "en/ljspeech/glow-tts",
98
- # "en/ljspeech/tacotron2-DCA",
99
- # "en/ljspeech/speedy-speech-wn",
100
- # "en/ljspeech/vits",
101
- # "en/vctk/sc-glow-tts",
102
- # "en/vctk/vits",
103
- # "en/sam/tacotron-DDC",
104
- # "es/mai/tacotron2-DDC",
105
- "fr/mai/tacotron2-DDC",
106
- "zh-CN/baker/tacotron2-DDC-GST",
107
- "nl/mai/tacotron2-DDC",
108
- "de/thorsten/tacotron2-DCA",
109
- # "ja/kokoro/tacotron2-DDC",
110
- ]
111
- MODELS = {}
112
-
113
- manager = ModelManager()
114
-
115
- for MODEL_NAME in MODEL_NAMES:
116
- print(f"downloading {MODEL_NAME}")
117
- model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
118
- vocoder_name: Optional[str] = model_item["default_vocoder"]
119
- vocoder_path = None
120
- vocoder_config_path = None
121
- if vocoder_name is not None:
122
- vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
123
-
124
- synthesizer = Synthesizer(
125
- model_path, config_path, None, vocoder_path, vocoder_config_path,
126
- )
127
- MODELS[MODEL_NAME] = synthesizer
128
-
129
-
130
- def tts(text: str, model_name: str):
131
- print(text, model_name)
132
- synthesizer = MODELS.get(model_name, None)
133
- if synthesizer is None:
134
- raise NameError("model not found")
135
- wavs = synthesizer.tts(text)
136
- # output = (synthesizer.output_sample_rate, np.array(wavs))
137
- # return output
138
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
139
- synthesizer.save_wav(wavs, fp)
140
- return fp.name
141
-
142
-
143
-
144
- iface = gr.Interface(
145
- fn=tts,
146
- inputs=[
147
- gr.inputs.Textbox(
148
- label="Input",
149
- default="Hello, how are you?",
150
- ),
151
- gr.inputs.Radio(
152
- label="Pick a TTS Model",
153
- choices=MODEL_NAMES,
154
- ),
155
- ],
156
- outputs=gr.outputs.Audio(label="Output"),
157
- title="🐸💬 - Coqui TTS",
158
- theme="huggingface",
159
- description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
160
- article="more info at https://github.com/coqui-ai/TTS",
161
- )
162
- iface.launch()
 
23
  db = get_db_firestore()
24
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
25
 
26
+
27
+ MODEL_NAMES = [
28
+ # "en/ek1/tacotron2",
29
+ "en/ljspeech/tacotron2-DDC",
30
+ # "en/ljspeech/tacotron2-DDC_ph",
31
+ # "en/ljspeech/glow-tts",
32
+ # "en/ljspeech/tacotron2-DCA",
33
+ # "en/ljspeech/speedy-speech-wn",
34
+ # "en/ljspeech/vits",
35
+ # "en/vctk/sc-glow-tts",
36
+ # "en/vctk/vits",
37
+ # "en/sam/tacotron-DDC",
38
+ # "es/mai/tacotron2-DDC",
39
+ "fr/mai/tacotron2-DDC",
40
+ "zh-CN/baker/tacotron2-DDC-GST",
41
+ "nl/mai/tacotron2-DDC",
42
+ "de/thorsten/tacotron2-DCA",
43
+ # "ja/kokoro/tacotron2-DDC",
44
+ ]
45
+ MODELS = {}
46
+ manager = ModelManager()
47
+ for MODEL_NAME in MODEL_NAMES:
48
+ print(f"downloading {MODEL_NAME}")
49
+ model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
50
+ vocoder_name: Optional[str] = model_item["default_vocoder"]
51
+ vocoder_path = None
52
+ vocoder_config_path = None
53
+ if vocoder_name is not None:
54
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
55
+
56
+ synthesizer = Synthesizer(
57
+ model_path, config_path, None, vocoder_path, vocoder_config_path,
58
+ )
59
+ MODELS[MODEL_NAME] = synthesizer
60
+
61
+
62
+
63
  def transcribe(audio):
64
  text = asr(audio)["text"]
65
  return text
 
100
  doclist += r
101
  return doclist
102
 
103
+ def tts(text: str, model_name: str):
104
+ print(text, model_name)
105
+ synthesizer = MODELS.get(model_name, None)
106
+ if synthesizer is None:
107
+ raise NameError("model not found")
108
+ wavs = synthesizer.tts(text)
109
+ # output = (synthesizer.output_sample_rate, np.array(wavs))
110
+ # return output
111
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
112
+ synthesizer.save_wav(wavs, fp)
113
+ return fp.name
114
 
115
+ demo = gr.Blocks()
116
  with demo:
117
  #audio_file = gr.Audio(type="filepath")
118
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
 
120
  label = gr.Label()
121
  saved = gr.Textbox()
122
  savedAll = gr.Textbox()
123
+ TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
124
 
125
  b1 = gr.Button("Recognize Speech")
126
  b2 = gr.Button("Classify Sentiment")
127
  b3 = gr.Button("Save Speech to Text")
128
  b4 = gr.Button("Retrieve All")
129
+ b5 = gr.Button("Read It Back Aloud")
130
 
131
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
132
  b2.click(text_to_sentiment, inputs=text, outputs=label)
133
  b3.click(upsert, inputs=text, outputs=saved)
134
  b4.click(selectall, inputs=text, outputs=savedAll)
135
+ b5.click(tts, inputs=text,TTSchoice, outputs=Audio(label="Output"))
136
 
137
  demo.launch(share=True)
138
 
139
 
140
 
141
 
142
+ #iface = gr.Interface(
143
+ # fn=tts,
144
+ # inputs=[
145
+ # gr.inputs.Textbox( label="Input", default="Hello, how are you?", ),
146
+ # gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, ),
147
+ # ],
148
+ # outputs=gr.outputs.Audio(label="Output"),
149
+ # title="🐸💬 - Coqui TTS",
150
+ # theme="huggingface",
151
+ # description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
152
+ # article="more info at https://github.com/coqui-ai/TTS",
153
+ #)
154
+ #iface.launch()