awacke1 commited on
Commit
dee841c
1 Parent(s): 84d8598

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -20
app.py CHANGED
@@ -1,29 +1,27 @@
1
  import gradio as gr
2
- import tensorflow as tf
3
- import transformers
4
-
5
-
6
  from transformers import pipeline
 
 
 
 
 
 
 
 
7
 
8
- #import streamlit as st
9
  import firebase_admin
10
  from firebase_admin import credentials
11
  from firebase_admin import firestore
12
  import datetime
 
 
13
 
14
  import tempfile
15
  from typing import Optional
16
  import numpy as np
17
  from TTS.utils.manage import ModelManager
18
  from TTS.utils.synthesizer import Synthesizer
19
-
20
- import io, base64
21
- import mediapy
22
- import os
23
- import sys
24
-
25
- from PIL import Image
26
- from huggingface_hub import snapshot_download
27
 
28
 
29
  # firestore singleton is a cached multiuser instance to persist shared crowdsource memory
@@ -39,7 +37,31 @@ db = get_db_firestore()
39
 
40
  # create ASR ML pipeline
41
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
42
- #asr = pipeline("automatic-speech-recognition", "snakers4/silero-models")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  # create Text Classification pipeline
45
  classifier = pipeline("text-classification")
@@ -145,24 +167,27 @@ def generate_interpolation(gallery):
145
  demo = gr.Blocks()
146
 
147
  with demo:
148
- #with gr.Row():
149
- # Left column (inputs)
150
- # with gr.Column():
151
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
152
  text = gr.Textbox()
153
  label = gr.Label()
154
  saved = gr.Textbox()
155
- savedAll = gr.Textbox()
156
- # with gr.Column():
 
 
157
  b1 = gr.Button("Recognize Speech")
158
  b2 = gr.Button("Classify Sentiment")
159
  b3 = gr.Button("Save Speech to Text")
160
  b4 = gr.Button("Retrieve All")
161
-
 
162
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
163
  b2.click(text_to_sentiment, inputs=text, outputs=label)
164
  b3.click(upsert, inputs=text, outputs=saved)
165
  b4.click(selectall, inputs=text, outputs=savedAll)
 
 
166
 
167
  with gr.Row():
168
  # Left column (inputs)
 
1
  import gradio as gr
 
 
 
 
2
  from transformers import pipeline
3
+ import io, base64
4
+ from PIL import Image
5
+ import numpy as np
6
+ import tensorflow as tf
7
+ import mediapy
8
+ import os
9
+ import sys
10
+ from huggingface_hub import snapshot_download
11
 
12
+ import streamlit as st
13
  import firebase_admin
14
  from firebase_admin import credentials
15
  from firebase_admin import firestore
16
  import datetime
17
+ from transformers import pipeline
18
+ import gradio as gr
19
 
20
  import tempfile
21
  from typing import Optional
22
  import numpy as np
23
  from TTS.utils.manage import ModelManager
24
  from TTS.utils.synthesizer import Synthesizer
 
 
 
 
 
 
 
 
25
 
26
 
27
  # firestore singleton is a cached multiuser instance to persist shared crowdsource memory
 
37
 
38
  # create ASR ML pipeline
39
  asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
40
+
41
+ MODEL_NAMES = [
42
+ "en/ljspeech/tacotron2-DDC",
43
+ "en/ljspeech/glow-tts",
44
+ "en/ljspeech/speedy-speech-wn",
45
+ "en/ljspeech/vits",
46
+ "en/sam/tacotron-DDC",
47
+ "fr/mai/tacotron2-DDC",
48
+ "de/thorsten/tacotron2-DCA",
49
+ ]
50
+ MODELS = {}
51
+ manager = ModelManager()
52
+ for MODEL_NAME in MODEL_NAMES:
53
+ print(f"downloading {MODEL_NAME}")
54
+ model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
55
+ vocoder_name: Optional[str] = model_item["default_vocoder"]
56
+ vocoder_path = None
57
+ vocoder_config_path = None
58
+ if vocoder_name is not None:
59
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
60
+
61
+ synthesizer = Synthesizer(
62
+ model_path, config_path, None, vocoder_path, vocoder_config_path,
63
+ )
64
+ MODELS[MODEL_NAME] = synthesizer
65
 
66
  # create Text Classification pipeline
67
  classifier = pipeline("text-classification")
 
167
  demo = gr.Blocks()
168
 
169
  with demo:
170
+
 
 
171
  audio_file = gr.inputs.Audio(source="microphone", type="filepath")
172
  text = gr.Textbox()
173
  label = gr.Label()
174
  saved = gr.Textbox()
175
+ savedAll = gr.Textbox()
176
+ TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
177
+ audio = gr.Audio(label="Output", interactive=False)
178
+
179
  b1 = gr.Button("Recognize Speech")
180
  b2 = gr.Button("Classify Sentiment")
181
  b3 = gr.Button("Save Speech to Text")
182
  b4 = gr.Button("Retrieve All")
183
+ b5 = gr.Button("Read It Back Aloud")
184
+
185
  b1.click(speech_to_text, inputs=audio_file, outputs=text)
186
  b2.click(text_to_sentiment, inputs=text, outputs=label)
187
  b3.click(upsert, inputs=text, outputs=saved)
188
  b4.click(selectall, inputs=text, outputs=savedAll)
189
+ b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
190
+
191
 
192
  with gr.Row():
193
  # Left column (inputs)