Spaces:
Runtime error
Runtime error
File size: 4,079 Bytes
aa86ec4 877f42e aa86ec4 39c9462 aa86ec4 39c9462 aa86ec4 39c9462 aa86ec4 39c9462 aa86ec4 ab68def 5fdaa5f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
import streamlit as st
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import datetime
from transformers import pipeline
import gradio as gr
import tempfile
from typing import Optional
import numpy as np
from TTS.utils.manage import ModelManager
from TTS.utils.synthesizer import Synthesizer
@st.experimental_singleton
def get_db_firestore():
cred = credentials.Certificate('test.json')
firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
db = firestore.client()
return db
db = get_db_firestore()
#asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
asr = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")
MODEL_NAMES = [
"en/ljspeech/tacotron2-DDC",
"en/ljspeech/glow-tts",
"en/ljspeech/speedy-speech-wn",
"en/ljspeech/vits",
"en/sam/tacotron-DDC",
"fr/mai/tacotron2-DDC",
"de/thorsten/tacotron2-DCA",
]
MODELS = {}
manager = ModelManager()
for MODEL_NAME in MODEL_NAMES:
print(f"downloading {MODEL_NAME}")
model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
vocoder_name: Optional[str] = model_item["default_vocoder"]
vocoder_path = None
vocoder_config_path = None
if vocoder_name is not None:
vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
synthesizer = Synthesizer(
model_path, config_path, None, vocoder_path, vocoder_config_path,
)
MODELS[MODEL_NAME] = synthesizer
def transcribe(audio):
text = asr(audio)["text"]
return text
classifier = pipeline("text-classification")
def speech_to_text(speech):
text = asr(speech)["text"]
return text
def text_to_sentiment(text):
sentiment = classifier(text)[0]["label"]
return sentiment
def upsert(text):
date_time =str(datetime.datetime.today())
doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
saved = select('TTS-STT', date_time)
# check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
return saved
def select(collection, document):
doc_ref = db.collection(collection).document(document)
doc = doc_ref.get()
docid = ("The id is: ", doc.id)
contents = ("The contents are: ", doc.to_dict())
return contents
def selectall(text):
docs = db.collection('Text2SpeechSentimentSave').stream()
doclist=''
for doc in docs:
r=(f'{doc.id} => {doc.to_dict()}')
doclist += r
return doclist
def tts(text: str, model_name: str):
print(text, model_name)
synthesizer = MODELS.get(model_name, None)
if synthesizer is None:
raise NameError("model not found")
wavs = synthesizer.tts(text)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
synthesizer.save_wav(wavs, fp)
return fp.name
demo = gr.Blocks()
with demo:
audio_file = gr.inputs.Audio(source="microphone", type="filepath")
text = gr.Textbox()
b1 = gr.Button("Recognize Speech")
label = gr.Label()
b2 = gr.Button("Classify Sentiment")
saved = gr.Textbox()
b3 = gr.Button("Save Speech to Text")
savedAll = gr.Textbox()
b4 = gr.Button("Retrieve All")
TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
audio = gr.Audio(label="Output", interactive=False)
b5 = gr.Button("Read It Back Aloud")
b1.click(speech_to_text, inputs=audio_file, outputs=text)
b2.click(text_to_sentiment, inputs=text, outputs=label)
b3.click(upsert, inputs=text, outputs=saved)
b4.click(selectall, inputs=text, outputs=savedAll)
b5.click(tts, inputs=[text,TTSchoice], outputs=audio)
#demo.launch(share=True) - remove share=true to avoid runtime error in spaces
demo.launch() |