awacke1 commited on
Commit
e954652
1 Parent(s): eb611ba

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -0
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import firebase_admin
3
+ from firebase_admin import credentials
4
+ from firebase_admin import firestore
5
+ import datetime
6
+ from transformers import pipeline
7
+ import gradio as gr
8
+
9
+ import tempfile
10
+ from typing import Optional
11
+ import numpy as np
12
+ from TTS.utils.manage import ModelManager
13
+ from TTS.utils.synthesizer import Synthesizer
14
+
15
+ @st.experimental_singleton
16
+ def get_db_firestore():
17
+ cred = credentials.Certificate('test.json')
18
+ firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
19
+ db = firestore.client()
20
+ return db
21
+
22
+
23
+ db = get_db_firestore()
24
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
25
+
26
+ def transcribe(audio):
27
+ text = asr(audio)["text"]
28
+ return text
29
+
30
+ classifier = pipeline("text-classification")
31
+
32
+ def speech_to_text(speech):
33
+ text = asr(speech)["text"]
34
+ return text
35
+
36
+ def text_to_sentiment(text):
37
+ sentiment = classifier(text)[0]["label"]
38
+ return sentiment
39
+
40
+ def upsert(text):
41
+ date_time =str(datetime.datetime.today())
42
+ doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
43
+ doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/Text2SpeechSentimentSave', u'last': text, u'born': date_time,})
44
+ saved = select('Text2SpeechSentimentSave', date_time)
45
+ # check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
46
+ return saved
47
+
48
+ def select(collection, document):
49
+ doc_ref = db.collection(collection).document(document)
50
+ doc = doc_ref.get()
51
+ docid = ("The id is: ", doc.id)
52
+ contents = ("The contents are: ", doc.to_dict())
53
+ return contents
54
+
55
+ def selectall(text):
56
+ docs = db.collection('Text2SpeechSentimentSave').stream()
57
+ doclist=''
58
+ for doc in docs:
59
+ #docid=doc.id
60
+ #dict=doc.to_dict()
61
+ #doclist+=doc.to_dict()
62
+ r=(f'{doc.id} => {doc.to_dict()}')
63
+ doclist += r
64
+ return doclist
65
+
66
+ demo = gr.Blocks()
67
+
68
+ with demo:
69
+ #audio_file = gr.Audio(type="filepath")
70
+ audio_file = gr.inputs.Audio(source="microphone", type="filepath")
71
+ text = gr.Textbox()
72
+ label = gr.Label()
73
+ saved = gr.Textbox()
74
+ savedAll = gr.Textbox()
75
+
76
+ b1 = gr.Button("Recognize Speech")
77
+ b2 = gr.Button("Classify Sentiment")
78
+ b3 = gr.Button("Save Speech to Text")
79
+ b4 = gr.Button("Retrieve All")
80
+
81
+ b1.click(speech_to_text, inputs=audio_file, outputs=text)
82
+ b2.click(text_to_sentiment, inputs=text, outputs=label)
83
+ b3.click(upsert, inputs=text, outputs=saved)
84
+ b4.click(selectall, inputs=text, outputs=savedAll)
85
+
86
+ demo.launch(share=True)
87
+
88
+
89
+
90
+
91
+
92
+
93
+ MODEL_NAMES = [
94
+ # "en/ek1/tacotron2",
95
+ "en/ljspeech/tacotron2-DDC",
96
+ # "en/ljspeech/tacotron2-DDC_ph",
97
+ # "en/ljspeech/glow-tts",
98
+ # "en/ljspeech/tacotron2-DCA",
99
+ # "en/ljspeech/speedy-speech-wn",
100
+ # "en/ljspeech/vits",
101
+ # "en/vctk/sc-glow-tts",
102
+ # "en/vctk/vits",
103
+ # "en/sam/tacotron-DDC",
104
+ # "es/mai/tacotron2-DDC",
105
+ "fr/mai/tacotron2-DDC",
106
+ "zh-CN/baker/tacotron2-DDC-GST",
107
+ "nl/mai/tacotron2-DDC",
108
+ "de/thorsten/tacotron2-DCA",
109
+ # "ja/kokoro/tacotron2-DDC",
110
+ ]
111
+ MODELS = {}
112
+
113
+ manager = ModelManager()
114
+
115
+ for MODEL_NAME in MODEL_NAMES:
116
+ print(f"downloading {MODEL_NAME}")
117
+ model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
118
+ vocoder_name: Optional[str] = model_item["default_vocoder"]
119
+ vocoder_path = None
120
+ vocoder_config_path = None
121
+ if vocoder_name is not None:
122
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
123
+
124
+ synthesizer = Synthesizer(
125
+ model_path, config_path, None, vocoder_path, vocoder_config_path,
126
+ )
127
+ MODELS[MODEL_NAME] = synthesizer
128
+
129
+
130
+ def tts(text: str, model_name: str):
131
+ print(text, model_name)
132
+ synthesizer = MODELS.get(model_name, None)
133
+ if synthesizer is None:
134
+ raise NameError("model not found")
135
+ wavs = synthesizer.tts(text)
136
+ # output = (synthesizer.output_sample_rate, np.array(wavs))
137
+ # return output
138
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
139
+ synthesizer.save_wav(wavs, fp)
140
+ return fp.name
141
+
142
+
143
+
144
+ iface = gr.Interface(
145
+ fn=tts,
146
+ inputs=[
147
+ gr.inputs.Textbox(
148
+ label="Input",
149
+ default="Hello, how are you?",
150
+ ),
151
+ gr.inputs.Radio(
152
+ label="Pick a TTS Model",
153
+ choices=MODEL_NAMES,
154
+ ),
155
+ ],
156
+ outputs=gr.outputs.Audio(label="Output"),
157
+ title="🐸💬 - Coqui TTS",
158
+ theme="huggingface",
159
+ description="🐸💬 - a deep learning toolkit for Text-to-Speech, battle-tested in research and production",
160
+ article="more info at https://github.com/coqui-ai/TTS",
161
+ )
162
+ iface.launch()