File size: 4,079 Bytes
aa86ec4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877f42e
 
aa86ec4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39c9462
 
aa86ec4
39c9462
 
aa86ec4
39c9462
 
aa86ec4
39c9462
 
 
aa86ec4
 
 
 
 
 
 
 
ab68def
5fdaa5f
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import streamlit as st
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime
from transformers import pipeline
import gradio as gr

import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer

@st.experimental_singleton
def get_db_firestore():
    cred = credentials.Certificate('test.json')
    firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
    db = firestore.client()
    return db

db = get_db_firestore()
#asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
asr = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")

MODEL_NAMES = [
    "en/ljspeech/tacotron2-DDC",
    "en/ljspeech/glow-tts",
    "en/ljspeech/speedy-speech-wn",
    "en/ljspeech/vits",
    "en/sam/tacotron-DDC",
    "fr/mai/tacotron2-DDC",
    "de/thorsten/tacotron2-DCA",
]
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
    print(f"downloading {MODEL_NAME}")
    model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
    vocoder_name: Optional[str] = model_item["default_vocoder"]
    vocoder_path = None
    vocoder_config_path = None
    if vocoder_name is not None:
        vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

    synthesizer = Synthesizer(
        model_path, config_path, None, vocoder_path, vocoder_config_path,
    )
    MODELS[MODEL_NAME] = synthesizer

def transcribe(audio):
    text = asr(audio)["text"]
    return text

classifier = pipeline("text-classification")

def speech_to_text(speech):
    text = asr(speech)["text"]
    return text

def text_to_sentiment(text):
    sentiment = classifier(text)[0]["label"]
    return sentiment 

def upsert(text):
    date_time =str(datetime.datetime.today())
    doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
    doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
    saved = select('TTS-STT', date_time)
    # check it here:  https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
    return saved
      
def select(collection, document):
    doc_ref = db.collection(collection).document(document)
    doc = doc_ref.get()
    docid = ("The id is: ", doc.id)
    contents = ("The contents are: ", doc.to_dict())
    return contents
          
def selectall(text):
    docs = db.collection('Text2SpeechSentimentSave').stream()
    doclist=''
    for doc in docs:
        r=(f'{doc.id} => {doc.to_dict()}')
        doclist += r
    return doclist 
    
def tts(text: str, model_name: str):
    print(text, model_name)
    synthesizer = MODELS.get(model_name, None)
    if synthesizer is None:
        raise NameError("model not found")
    wavs = synthesizer.tts(text)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
        synthesizer.save_wav(wavs, fp)
        return fp.name

demo = gr.Blocks()
with demo:
    audio_file = gr.inputs.Audio(source="microphone", type="filepath")
    text = gr.Textbox()
    b1 = gr.Button("Recognize Speech")

    label = gr.Label()
    b2 = gr.Button("Classify Sentiment")

    saved = gr.Textbox()
    b3 = gr.Button("Save Speech to Text")

    savedAll = gr.Textbox()
    b4 = gr.Button("Retrieve All")

    TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES,   )
    audio = gr.Audio(label="Output", interactive=False)  
    b5 = gr.Button("Read It Back Aloud")

    b1.click(speech_to_text, inputs=audio_file, outputs=text)
    b2.click(text_to_sentiment, inputs=text, outputs=label)
    b3.click(upsert, inputs=text, outputs=saved)
    b4.click(selectall, inputs=text, outputs=savedAll)
    b5.click(tts,  inputs=[text,TTSchoice], outputs=audio)
    
    
#demo.launch(share=True) - remove share=true to avoid runtime error in spaces
demo.launch()