import streamlit as st import firebase_admin import datetime import gradio as gr import numpy as np import tempfile from firebase_admin import credentials from firebase_admin import firestore from transformers import pipeline from typing import Optional from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer from gradio import inputs from gradio.inputs import Textbox from gradio import outputs #Persistence via Cloud Store @st.experimental_singleton def get_db_firestore(): cred = credentials.Certificate('test.json') firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',}) db = firestore.client() return db db = get_db_firestore() asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") #STT Models MODEL_NAMES = [ "en/ljspeech/tacotron2-DDC", "en/ljspeech/glow-tts", "en/ljspeech/speedy-speech-wn", "en/ljspeech/vits", #"en/sam/tacotron-DDC", #"fr/mai/tacotron2-DDC", #"de/thorsten/tacotron2-DCA", ] MODELS = {} manager = ModelManager() for MODEL_NAME in MODEL_NAMES: print(f"downloading {MODEL_NAME}") model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}") vocoder_name: Optional[str] = model_item["default_vocoder"] vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) synthesizer = Synthesizer( model_path, config_path, None, vocoder_path, vocoder_config_path, ) MODELS[MODEL_NAME] = synthesizer GEN_NAMES = [ "huggingface/EleutherAI/gpt-neo-2.7B", "huggingface/EleutherAI/gpt-j-6B", "huggingface/gpt2-large" ] #ASR def transcribe(audio): text = asr(audio)["text"] return text #Sentiment Classifier classifier = pipeline("text-classification") # GPT-J: Story Generation Pipeline story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator") #STT def speech_to_text(speech): text = asr(speech)["text"] return text #TTSentiment def text_to_sentiment(text): sentiment = classifier(text)[0]["label"] return sentiment #Save def upsert(text): date_time =str(datetime.datetime.today()) doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time) doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,}) saved = select('TTS-STT', date_time) # check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces return saved #OpenLast def select(collection, document): doc_ref = db.collection(collection).document(document) doc = doc_ref.get() docid = ("The id is: ", doc.id) contents = ("The contents are: ", doc.to_dict()) return contents #OpenAll def selectall(text): docs = db.collection('Text2SpeechSentimentSave').stream() doclist='' for doc in docs: r=(f'{doc.id} => {doc.to_dict()}') doclist += r return doclist #TTS def tts(text: str, model_name: str): print(text, model_name) synthesizer = MODELS.get(model_name, None) if synthesizer is None: raise NameError("model not found") wavs = synthesizer.tts(text) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name #Blocks Rock It demo = gr.Blocks() with demo: #UI audio_file = gr.inputs.Audio(source="microphone", type="filepath") text = gr.Textbox() label = gr.Label() saved = gr.Textbox() savedAll = gr.Textbox() TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, ) audio = gr.Audio(label="Output", interactive=False) #Buttons b1 = gr.Button("Recognize Speech") b2 = gr.Button("Classify Sentiment") b3 = gr.Button("Save Speech to Text") b4 = gr.Button("Retrieve All") b5 = gr.Button("Read It Back Aloud") #Event Model Chains b1.click(speech_to_text, inputs=audio_file, outputs=text) b2.click(text_to_sentiment, inputs=text, outputs=label) b3.click(upsert, inputs=text, outputs=saved) b4.click(selectall, inputs=text, outputs=savedAll) b5.click(tts, inputs=[text,TTSchoice], outputs=audio) # Lets Do It demo.launch(share=True) title = "Story Generators" examples = [ ["At which point do we invent Love?"], ["Love is a capacity more than consciousness is universal."], ["See the grace of god in eachother."], ["Love is a capacity more than consciousness is universal."], ["Love is generativity when there is more energy than what they need for equilibrium."], ["Collections of people have agency and mass having agency at the mesoscopic level"], ["Having a deep human connection is an interface problem to solve."], ["Having a collective creates agency since we build trust in eachother."] ]