ysharma's picture
ysharma HF staff
k
0071143
raw
history blame
No virus
2.28 kB
import os
#import numpy as np
import gradio as gr
import whisper
import requests
import tempfile
from neon_tts_plugin_coqui import CoquiTTS
# Whisper: Speech-to-text
model = whisper.load_model("base")
# The LLM : Bloom
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
HF_TOKEN = os.environ["HF_TOKEN"]
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# Text-to-Speech
LANGUAGES = list(CoquiTTS.langs.keys())
coquiTTS = CoquiTTS()
# Processing input Audio
def fun(audio) :
text1 = model.transcribe(audio)["text"]
text2 = lang_model_response(text1)
speech = tts(text, language)
return text1, text2, speech
def lang_model_response(prompt):
print(f"*****Inside meme_generate - Prompt is :{prompt}")
if len(prompt) == 0:
prompt = """Can you help me please?"""
json_ = {"inputs": prompt,
"parameters":
{
"top_p": 0.90, #0.90 default
"max_new_tokens": 64,
"temperature": 1.1, #1.1 default
"return_full_text": True,
"do_sample": True,
},
"options":
{"use_cache": True,
"wait_for_model": True,
},}
response = requests.post(API_URL, headers=headers, json=json_)
print(f"Response is : {response}")
output = response.json()
print(f"output is : {output}")
output_tmp = output[0]['generated_text']
print(f"output_tmp is: {output_tmp}")
solution = output_tmp.split(".")[1]
print(f"Final response after splits is: {solution}")
return solution
#Text-to-Speech
def tts(text, language):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
return fp.name
#inputs = [gr.Textbox(label="Input", value=CoquiTTS.langs["en"]["sentence"], max_lines=3),
# gr.Radio(label="Language", choices=LANGUAGES, value="en")]
#outputs = gr.Audio(label="Output")
demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs)
demo.launch()
gr.Interface(
title = 'Testing Whisper',
fn=fun,
inputs=[
gr.Audio(source="microphone", type="filepath"), #streaming = True,
# "state"
],
outputs=[
"textbox", "textbox", "audio",
],
live=True).launch()