Spaces:
Build error
Build error
import os | |
#import numpy as np | |
import gradio as gr | |
import whisper | |
import requests | |
import tempfile | |
from neon_tts_plugin_coqui import CoquiTTS | |
# Whisper: Speech-to-text | |
model = whisper.load_model("base") | |
# The LLM : Bloom | |
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom" | |
HF_TOKEN = os.environ["HF_TOKEN"] | |
headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
# Text-to-Speech | |
LANGUAGES = list(CoquiTTS.langs.keys()) | |
coquiTTS = CoquiTTS() | |
# Processing input Audio | |
def fun(audio) : | |
text1 = model.transcribe(audio)["text"] | |
text2 = lang_model_response(text1) | |
speech = tts(text, language) | |
return text1, text2, speech | |
def lang_model_response(prompt): | |
print(f"*****Inside meme_generate - Prompt is :{prompt}") | |
if len(prompt) == 0: | |
prompt = """Can you help me please?""" | |
json_ = {"inputs": prompt, | |
"parameters": | |
{ | |
"top_p": 0.90, #0.90 default | |
"max_new_tokens": 64, | |
"temperature": 1.1, #1.1 default | |
"return_full_text": True, | |
"do_sample": True, | |
}, | |
"options": | |
{"use_cache": True, | |
"wait_for_model": True, | |
},} | |
response = requests.post(API_URL, headers=headers, json=json_) | |
print(f"Response is : {response}") | |
output = response.json() | |
print(f"output is : {output}") | |
output_tmp = output[0]['generated_text'] | |
print(f"output_tmp is: {output_tmp}") | |
solution = output_tmp.split(".")[1] | |
print(f"Final response after splits is: {solution}") | |
return solution | |
#Text-to-Speech | |
def tts(text, language): | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
coquiTTS.get_tts(text, fp, speaker = {"language" : language}) | |
return fp.name | |
#inputs = [gr.Textbox(label="Input", value=CoquiTTS.langs["en"]["sentence"], max_lines=3), | |
# gr.Radio(label="Language", choices=LANGUAGES, value="en")] | |
#outputs = gr.Audio(label="Output") | |
demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs) | |
demo.launch() | |
gr.Interface( | |
title = 'Testing Whisper', | |
fn=fun, | |
inputs=[ | |
gr.Audio(source="microphone", type="filepath"), #streaming = True, | |
# "state" | |
], | |
outputs=[ | |
"textbox", "textbox", "audio", | |
], | |
live=True).launch() | |