Spaces:

awacke1
/

SpeechStoryReadAloud

Build error

App Files Files Community

SpeechStoryReadAloud / app.py

awacke1

Update app.py

13b6982 about 3 years ago

raw

history blame contribute delete

5.03 kB

	import streamlit as st
	import firebase_admin
	import datetime
	import gradio as gr
	import numpy as np
	import tempfile

	from firebase_admin import credentials
	from firebase_admin import firestore
	from transformers import pipeline
	from typing import Optional
	from TTS.utils.manage import ModelManager
	from TTS.utils.synthesizer import Synthesizer
	from gradio import inputs
	from gradio.inputs import Textbox
	from gradio import outputs

	#Persistence via Cloud Store
	@st.experimental_singleton
	def get_db_firestore():
	cred = credentials.Certificate('test.json')
	firebase_admin.initialize_app(cred, {'projectId': u'clinical-nlp-b9117',})
	db = firestore.client()
	return db
	db = get_db_firestore()
	asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h")

	#STT Models
	MODEL_NAMES = [
	"en/ljspeech/tacotron2-DDC",
	"en/ljspeech/glow-tts",
	"en/ljspeech/speedy-speech-wn",
	"en/ljspeech/vits",
	#"en/sam/tacotron-DDC",
	#"fr/mai/tacotron2-DDC",
	#"de/thorsten/tacotron2-DCA",
	]
	MODELS = {}
	manager = ModelManager()
	for MODEL_NAME in MODEL_NAMES:
	print(f"downloading {MODEL_NAME}")
	model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}")
	vocoder_name: Optional[str] = model_item["default_vocoder"]
	vocoder_path = None
	vocoder_config_path = None
	if vocoder_name is not None:
	vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)

	synthesizer = Synthesizer(
	model_path, config_path, None, vocoder_path, vocoder_config_path,
	)
	MODELS[MODEL_NAME] = synthesizer

	GEN_NAMES = [
	"huggingface/EleutherAI/gpt-neo-2.7B",
	"huggingface/EleutherAI/gpt-j-6B",
	"huggingface/gpt2-large"
	]


	#ASR
	def transcribe(audio):
	text = asr(audio)["text"]
	return text

	#Sentiment Classifier
	classifier = pipeline("text-classification")

	# GPT-J: Story Generation Pipeline
	story_gen = pipeline("text-generation", "pranavpsv/gpt2-genre-story-generator")


	#STT
	def speech_to_text(speech):
	text = asr(speech)["text"]
	return text

	#TTSentiment
	def text_to_sentiment(text):
	sentiment = classifier(text)[0]["label"]
	return sentiment

	#Save
	def upsert(text):
	date_time =str(datetime.datetime.today())
	doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time)
	doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,})
	saved = select('TTS-STT', date_time)
	# check it here: https://console.firebase.google.com/u/0/project/clinical-nlp-b9117/firestore/data/~2FStreamlitSpaces
	return saved

	#OpenLast
	def select(collection, document):
	doc_ref = db.collection(collection).document(document)
	doc = doc_ref.get()
	docid = ("The id is: ", doc.id)
	contents = ("The contents are: ", doc.to_dict())
	return contents

	#OpenAll
	def selectall(text):
	docs = db.collection('Text2SpeechSentimentSave').stream()
	doclist=''
	for doc in docs:
	r=(f'{doc.id} => {doc.to_dict()}')
	doclist += r
	return doclist

	#TTS
	def tts(text: str, model_name: str):
	print(text, model_name)
	synthesizer = MODELS.get(model_name, None)
	if synthesizer is None:
	raise NameError("model not found")
	wavs = synthesizer.tts(text)
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
	synthesizer.save_wav(wavs, fp)
	return fp.name


	#Blocks Rock It
	demo = gr.Blocks()
	with demo:
	#UI
	audio_file = gr.inputs.Audio(source="microphone", type="filepath")
	text = gr.Textbox()
	label = gr.Label()
	saved = gr.Textbox()
	savedAll = gr.Textbox()
	TTSchoice = gr.inputs.Radio( label="Pick a TTS Model", choices=MODEL_NAMES, )
	audio = gr.Audio(label="Output", interactive=False)


	#Buttons
	b1 = gr.Button("Recognize Speech")
	b2 = gr.Button("Classify Sentiment")
	b3 = gr.Button("Save Speech to Text")
	b4 = gr.Button("Retrieve All")
	b5 = gr.Button("Read It Back Aloud")

	#Event Model Chains
	b1.click(speech_to_text, inputs=audio_file, outputs=text)
	b2.click(text_to_sentiment, inputs=text, outputs=label)
	b3.click(upsert, inputs=text, outputs=saved)
	b4.click(selectall, inputs=text, outputs=savedAll)
	b5.click(tts, inputs=[text,TTSchoice], outputs=audio)

	# Lets Do It
	demo.launch(share=True)

	title = "Story Generators"
	examples = [
	["At which point do we invent Love?"],
	["Love is a capacity more than consciousness is universal."],
	["See the grace of god in eachother."],
	["Love is a capacity more than consciousness is universal."],
	["Love is generativity when there is more energy than what they need for equilibrium."],
	["Collections of people have agency and mass having agency at the mesoscopic level"],
	["Having a deep human connection is an interface problem to solve."],
	["Having a collective creates agency since we build trust in eachother."]
	]