import streamlit as st import datetime from transformers import pipeline import gradio as gr import tempfile from typing import Optional import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer # PersistDataset ----- import os import csv import gradio as gr from gradio import inputs, outputs import huggingface_hub from huggingface_hub import Repository, hf_hub_download, upload_file from datetime import datetime # created new dataset as awacke1/MindfulStory.csv DATASET_REPO_URL = "https://huggingface.co/datasets/awacke1/MindfulStory.csv" DATASET_REPO_ID = "awacke1/MindfulStory.csv" DATA_FILENAME = "MindfulStory.csv" DATA_FILE = os.path.join("data", DATA_FILENAME) HF_TOKEN = os.environ.get("HF_TOKEN") # Download dataset repo using hub download try: hf_hub_download( repo_id=DATASET_REPO_ID, filename=DATA_FILENAME, cache_dir=DATA_DIRNAME, force_filename=DATA_FILENAME ) except: print("file not found") def AIMemory(name: str, message: str): if name and message: with open(DATA_FILE, "a") as csvfile: writer = csv.DictWriter(csvfile, fieldnames=["name", "message", "time"]) writer.writerow({"name": name, "message": message, "time": str(datetime.now())}) commit_url = repo.push_to_hub() return {"name": name, "message": message, "time": str(datetime.now())} with open('Mindfulness.txt', 'r') as file: context = file.read() # Set up cloned dataset from repo for operations repo = Repository( local_dir="data", clone_from=DATASET_REPO_URL, use_auth_token=HF_TOKEN) # set up ASR asr = pipeline("automatic-speech-recognition", "facebook/wav2vec2-base-960h") # set up TTS MODEL_NAMES = [ "en/ljspeech/tacotron2-DDC", "en/ljspeech/glow-tts", "en/ljspeech/speedy-speech-wn", "en/ljspeech/vits", "en/sam/tacotron-DDC", "fr/mai/tacotron2-DDC", "de/thorsten/tacotron2-DCA", ] # Use Model Manager to load vocoders MODELS = {} manager = ModelManager() for MODEL_NAME in MODEL_NAMES: print(f"downloading {MODEL_NAME}") model_path, config_path, model_item = manager.download_model(f"tts_models/{MODEL_NAME}") vocoder_name: Optional[str] = model_item["default_vocoder"] vocoder_path = None vocoder_config_path = None if vocoder_name is not None: vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) synthesizer = Synthesizer( model_path, config_path, None, vocoder_path, vocoder_config_path, ) MODELS[MODEL_NAME] = synthesizer # transcribe def transcribe(audio): text = asr(audio)["text"] return text #text classifier classifier = pipeline("text-classification") def speech_to_text(speech): text = asr(speech)["text"] #rMem = AIMemory("STT", text) return text def text_to_sentiment(text): sentiment = classifier(text)[0]["label"] #rMem = AIMemory(text, sentiment) return sentiment def upsert(text): date_time =str(datetime.datetime.today()) doc_ref = db.collection('Text2SpeechSentimentSave').document(date_time) doc_ref.set({u'firefield': 'Recognize Speech', u'first': 'https://huggingface.co/spaces/awacke1/TTS-STT-Blocks/', u'last': text, u'born': date_time,}) saved = select('TTS-STT', date_time) return saved def select(collection, document): doc_ref = db.collection(collection).document(document) doc = doc_ref.get() docid = ("The id is: ", doc.id) contents = ("The contents are: ", doc.to_dict()) return contents def selectall(text): docs = db.collection('Text2SpeechSentimentSave').stream() doclist='' for doc in docs: r=(f'{doc.id} => {doc.to_dict()}') doclist += r return doclist def tts(text: str, model_name: str): print(text, model_name) synthesizer = MODELS.get(model_name, None) if synthesizer is None: raise NameError("model not found") wavs = synthesizer.tts(text) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) #rMem = AIMemory("TTS", text + model_name) return fp.name demo = gr.Blocks() with demo: audio_file = gr.inputs.Audio(source="microphone", type="filepath") text = gr.Textbox(label="Speech to Text") #label = gr.Label() #saved = gr.Textbox(label="Saved") #savedAll = gr.Textbox(label="SavedAll") TTSchoice = gr.inputs.Radio( label="Pick a Text to Speech Model", choices=MODEL_NAMES, ) audio = gr.Audio(label="Output", interactive=False) b1 = gr.Button("Recognize Speech") #b2 = gr.Button("Classify Sentiment") #b3 = gr.Button("Save Speech to Text") #b4 = gr.Button("Retrieve All") b5 = gr.Button("Read It Back Aloud") b1.click(speech_to_text, inputs=audio_file, outputs=text) #b2.click(text_to_sentiment, inputs=text, outputs=label) #b3.click(upsert, inputs=text, outputs=saved) #b4.click(selectall, inputs=text, outputs=savedAll) b5.click(tts, inputs=[text,TTSchoice], outputs=audio) demo.launch(share=True)