Spaces:

awacke1
/

STT-TTS-ASR-AI-NLP-Pipeline

Runtime error

App Files Files Community

STT-TTS-ASR-AI-NLP-Pipeline / app.py

awacke1

Update app.py

877f42e almost 2 years ago

raw history blame contribute delete

No virus

4.08 kB

	import streamlit as st
	import firebase_admin
	from firebase_admin import credentials
	from firebase_admin import firestore
	import datetime
	from transformers import pipeline
	import gradio as gr

	import tempfile
	from typing import Optional
	import numpy as np
	from TTS.utils.manage import ModelManager
	from TTS.utils.synthesizer import Synthesizer

	@st.experimental_singleton
	def get_db_firestore():
	cred = credentials.Certificate('test.json')
	firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
	db = firestore.client()
	return db

	db = get_db_firestore()
	#asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")
	asr = pipeline("automatic-speech-recognition", "jonatasgrosman/wav2vec2-large-xlsr-53-english")

	MODEL_NAMES = [
	"en/ljspeech/tacotron2-DDC",
	"en/ljspeech/glow-tts",
	"en/ljspeech/speedy-speech-wn",
	"en/ljspeech/vits",
	"en/sam/tacotron-DDC",
	"fr/mai/tacotron2-DDC",
	"de/thorsten/tacotron2-DCA",
	]
	MODELS = {}
	manager = ModelManager()
	for MODEL_NAME in MODEL_NAMES:
	print(f"downloading {MODEL_NAME}")
	model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
	vocoder_name: Optional[str] = model_item["default_vocoder"]
	vocoder_path = None
	vocoder_config_path = None
	if vocoder_name is not None:
	vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

	synthesizer = Synthesizer(
	model_path, config_path, None, vocoder_path, vocoder_config_path,
	)
	MODELS[MODEL_NAME] = synthesizer

	def transcribe(audio):
	text = asr(audio)["text"]
	return text

	classifier = pipeline("text-classification")

	def speech_to_text(speech):
	text = asr(speech)["text"]
	return text

	def text_to_sentiment(text):
	sentiment = classifier(text)[0]["label"]
	return sentiment

	def upsert(text):
	date_time =str(datetime.datetime.today())
	doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
	doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
	saved = select('TTS-STT', date_time)
	# check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
	return saved

	def select(collection, document):
	doc_ref = db.collection(collection).document(document)
	doc = doc_ref.get()
	docid = ("The id is: ", doc.id)
	contents = ("The contents are: ", doc.to_dict())
	return contents

	def selectall(text):
	docs = db.collection('Text2SpeechSentimentSave').stream()
	doclist=''
	for doc in docs:
	r=(f'{doc.id} => {doc.to_dict()}')
	doclist += r
	return doclist

	def tts(text: str, model_name: str):
	print(text, model_name)
	synthesizer = MODELS.get(model_name, None)
	if synthesizer is None:
	raise NameError("model not found")
	wavs = synthesizer.tts(text)
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	synthesizer.save_wav(wavs, fp)
	return fp.name

	demo = gr.Blocks()
	with demo:
	audio_file = gr.inputs.Audio(source="microphone", type="filepath")
	text = gr.Textbox()
	b1 = gr.Button("Recognize Speech")

	label = gr.Label()
	b2 = gr.Button("Classify Sentiment")

	saved = gr.Textbox()
	b3 = gr.Button("Save Speech to Text")

	savedAll = gr.Textbox()
	b4 = gr.Button("Retrieve All")

	TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
	audio = gr.Audio(label="Output", interactive=False)
	b5 = gr.Button("Read It Back Aloud")

	b1.click(speech_to_text, inputs=audio_file, outputs=text)
	b2.click(text_to_sentiment, inputs=text, outputs=label)
	b3.click(upsert, inputs=text, outputs=saved)
	b4.click(selectall, inputs=text, outputs=savedAll)
	b5.click(tts, inputs=[text,TTSchoice], outputs=audio)


	#demo.launch(share=True) - remove share=true to avoid runtime error in spaces
	demo.launch()