Spaces:
Runtime error
Runtime error
File size: 5,811 Bytes
2682f2f 0059280 9d10166 2682f2f 4946ad9 2682f2f 1557704 511d264 3ea2f71 511d264 9917453 c68ba3a 2682f2f 511d264 2682f2f cd49d70 2682f2f 511d264 2682f2f d296e9c e14e08f 44db170 e14e08f d296e9c 511d264 a3b9251 511d264 a3b9251 511d264 3ea2f71 d296e9c b1b5c4b 28bc405 b1b5c4b f5ef1bf 44db170 511d264 21bdb69 b1b5c4b 2682f2f 4455967 2682f2f 1a9ba6f 2682f2f cd49d70 2682f2f 4455967 2682f2f 4455967 2682f2f b1b5c4b 2682f2f 71843eb 2682f2f 18e8c99 2682f2f a4fd732 71843eb 4455967 2682f2f d296e9c 71843eb 511d264 4455967 d296e9c 3ea2f71 18e8c99 3ea2f71 2682f2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
os.system("pip install git+https://github.com/openai/whisper.git")
os.system("pip install neon-tts-plugin-coqui==0.6.0")
import gradio as gr
import whisper
import requests
import tempfile
from neon_tts_plugin_coqui import CoquiTTS
from datasets import load_dataset
import random
dataset = load_dataset("ysharma/short_jokes", split="train")
filtered_dataset = dataset.filter(
lambda x: (True not in [nsfw in x["Joke"].lower() for nsfw in ["warning", "fuck", "dead", "nsfw","69", "sex"]])
)
# Model 2: Sentence Transformer
API_URL = "https://api-inference.huggingface.co/models/sentence-transformers/msmarco-distilbert-base-tas-b"
HF_TOKEN = os.environ["HF_TOKEN"]
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
# Language common in both the multilingual models - English, Chinese, Spanish, and French etc
# Model 1: Whisper: Speech-to-text
model = whisper.load_model("base")
#Model 2: Text-to-Speech
LANGUAGES = list(CoquiTTS.langs.keys())
coquiTTS = CoquiTTS()
#Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
# Driver function
def driver_fun(audio, text) :
print("*********** Inside Driver ************")
if (text == 'dummy') and (audio is not None) :
print(f"Audio is {audio}")
translation, lang = whisper_stt(audio)
else:
translation = text
random_val = random.randrange(0,231657)
if random_val < 226657:
lower_limit = random_val
upper_limit = random_val + 4000
else:
lower_limit = random_val - 4000
upper_limit = random_val
print(f"lower_limit : upper_limit = {lower_limit} : {upper_limit}")
dataset_subset = filtered_dataset['Joke'][lower_limit : upper_limit]
data = query({"inputs": {"source_sentence": translation ,"sentences": dataset_subset} } ) #"That is a happy person"
if 'error' in data:
print(f"Error is : {data}")
return 'Error in model inference - Run Again Please', 'Error in model inference - Run Again Please', None
print(f"type(data) : {type(data)}")
#print(f"data : {data} ")
max_match_score = max(data)
indx_score = data.index(max_match_score)
joke = dataset_subset[indx_score]
print(f"Joke is : {joke}")
speech = tts(joke, 'en')
return translation, joke, speech
# Whisper - speech-to-text
def whisper_stt(audio):
print("Inside Whisper TTS")
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
_, probs = model.detect_language(mel)
lang = max(probs, key=probs.get)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options_transl = whisper.DecodingOptions(fp16 = False, language='en', task='translate') #lang
result_transl = whisper.decode(model, mel, options_transl) #model_med
# print the transcribed text
print(f"translation is : {result_transl.text}")
return result_transl.text, lang
# Coqui - Text-to-Speech
def tts(text, language):
print(f"Inside tts - language is : {language}")
print(f"Text is : {text}")
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
return fp.name
demo = gr.Blocks()
with demo:
gr.Markdown("<h1><center>AI Assistant - Voice to Joke</center></h1>")
gr.Markdown(
"""<center>Just record <i><b>"Hey Whisper can you tell me a joke on X please?"</i></b>, X = anything you would wish.</center><br><center>Or, press record and just utter a theme. If you see the message 'Error in model inference - Run Again Please', just press the button again every time!</center>
""")
with gr.Row():
with gr.Column():
in_audio = gr.Audio(source="microphone", type="filepath", label='Record your voice command here in English -') #type='filepath'
b1 = gr.Button("AI Response")
out_transcript = gr.Textbox(label= 'Transcript of your Audio using OpenAI Whisper')
with gr.Column():
in_text = gr.Textbox(label='Or enter any text here..', value='dummy')
out_audio = gr.Audio(label='Audio response form CoquiTTS')
out_generated_joke = gr.Textbox(label= 'Joke returned! ')
b1.click(driver_fun,inputs=[in_audio, in_text], outputs=[out_transcript, out_generated_joke, out_audio]) #out_translation_en, out_generated_text,out_generated_text_en,
with gr.Row():
gr.Markdown(
"""Model pipeline consisting of - <br>- [**Whisper**](https://github.com/openai/whisper) for Speech-to-text, <br>- [**CoquiTTS**](https://huggingface.co/coqui) for Text-To-Speech.<br>- [Sentence Transformers](https://huggingface.co/models?library=sentence-transformers&sort=downloads)<br>- Front end is built using [**Gradio Block API**](https://gradio.app/docs/#blocks).<br><be>If you want to reuse the App, simply click on the small cross button in the top right corner of your voice record panel, and then press record again! <br><br> Few Caveats:<br>1. Please note that sometimes the joke might be NSFW. Although, I have tried putting in filters to not have that experience, but they seem non-exhaustive.<br>2. Sometimes the joke might not match your theme, please bear with the limited capabilities of free open-source ML prototypes.<br>3. Much like real life, sometimes the joke might just not land, haha!<br>4. Repeating this: If you see the message 'Error in model inference - Run Again Please', just press the button again every time!
""")
demo.launch(enable_queue=True, debug=True) |