awacke1 commited on
Commit
aa86ec4
β€’
1 Parent(s): fb160d7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import firebase_admin
3
+ from firebase_admin import credentials
4
+ from firebase_admin import firestore
5
+ import datetime
6
+ from transformers import pipeline
7
+ import gradio as gr
8
+
9
+ import tempfile
10
+ from typing import Optional
11
+ import numpy as np
12
+ from TTS.utils.manage import ModelManager
13
+ from TTS.utils.synthesizer import Synthesizer
14
+
15
+ @st.experimental_singleton
16
+ def get_db_firestore():
17
+ cred = credentials.Certificate('test.json')
18
+ firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
19
+ db = firestore.client()
20
+ return db
21
+
22
+ db = get_db_firestore()
23
+ asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
24
+
25
+ MODEL_NAMES = [
26
+ "en/ljspeech/tacotron2-DDC",
27
+ "en/ljspeech/glow-tts",
28
+ "en/ljspeech/speedy-speech-wn",
29
+ "en/ljspeech/vits",
30
+ "en/sam/tacotron-DDC",
31
+ "fr/mai/tacotron2-DDC",
32
+ "de/thorsten/tacotron2-DCA",
33
+ ]
34
+ MODELS = {}
35
+ manager = ModelManager()
36
+ for MODEL_NAME in MODEL_NAMES:
37
+ print(f"downloading {MODEL_NAME}")
38
+ model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
39
+ vocoder_name: Optional[str] = model_item["default_vocoder"]
40
+ vocoder_path = None
41
+ vocoder_config_path = None
42
+ if vocoder_name is not None:
43
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
44
+
45
+ synthesizer = Synthesizer(
46
+ model_path, config_path, None, vocoder_path, vocoder_config_path,
47
+ )
48
+ MODELS[MODEL_NAME] = synthesizer
49
+
50
+ def transcribe(audio):
51
+ text = asr(audio)["text"]
52
+ return text
53
+
54
+ classifier = pipeline("text-classification")
55
+
56
+ def speech_to_text(speech):
57
+ text = asr(speech)["text"]
58
+ return text
59
+
60
+ def text_to_sentiment(text):
61
+ sentiment = classifier(text)[0]["label"]
62
+ return sentiment
63
+
64
+ def upsert(text):
65
+ date_time =str(datetime.datetime.today())
66
+ doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
67
+ doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
68
+ saved = select('TTS-STT', date_time)
69
+ # check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
70
+ return saved
71
+
72
+ def select(collection, document):
73
+ doc_ref = db.collection(collection).document(document)
74
+ doc = doc_ref.get()
75
+ docid = ("The id is: ", doc.id)
76
+ contents = ("The contents are: ", doc.to_dict())
77
+ return contents
78
+
79
+ def selectall(text):
80
+ docs = db.collection('Text2SpeechSentimentSave').stream()
81
+ doclist=''
82
+ for doc in docs:
83
+ r=(f'{doc.id} => {doc.to_dict()}')
84
+ doclist += r
85
+ return doclist
86
+
87
+ def tts(text: str, model_name: str):
88
+ print(text, model_name)
89
+ synthesizer = MODELS.get(model_name, None)
90
+ if synthesizer is None:
91
+ raise NameError("model not found")
92
+ wavs = synthesizer.tts(text)
93
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
94
+ synthesizer.save_wav(wavs, fp)
95
+ return fp.name
96
+
97
+ demo = gr.Blocks()
98
+ with demo:
99
+ audio_file = gr.inputs.Audio(source="microphone", type="filepath")
100
+ text = gr.Textbox()
101
+ label = gr.Label()
102
+ saved = gr.Textbox()
103
+ savedAll = gr.Textbox()
104
+ TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
105
+ audio = gr.Audio(label="Output", interactive=False)
106
+
107
+ b1 = gr.Button("Recognize Speech")
108
+ b2 = gr.Button("Classify Sentiment")
109
+ b3 = gr.Button("Save Speech to Text")
110
+ b4 = gr.Button("Retrieve All")
111
+ b5 = gr.Button("Read It Back Aloud")
112
+
113
+ b1.click(speech_to_text, inputs=audio_file, outputs=text)
114
+ b2.click(text_to_sentiment, inputs=text, outputs=label)
115
+ b3.click(upsert, inputs=text, outputs=saved)
116
+ b4.click(selectall, inputs=text, outputs=savedAll)
117
+ b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
118
+
119
+ demo.launch(share=True)