Spaces:
Build error
Build error
File size: 3,522 Bytes
5b5d4af 11f7102 5b5d4af cfc38a8 fe33c17 5b5d4af fe33c17 006d225 fe33c17 6e17fca 6c3ad82 6e17fca 6c3ad82 fe33c17 7f1ab16 6e17fca fe33c17 7a9df81 ee1afde 7a9df81 ee1afde 7a9df81 ea180c8 71471a7 7a9df81 71471a7 8f045d7 71471a7 8f045d7 71471a7 7a9df81 71471a7 ea180c8 ee1afde 6e17fca 5e7882a 6c3ad82 ee1afde 6c3ad82 03606ff 6c3ad82 03606ff 34c2304 6c3ad82 ee1afde 6c3ad82 ee1afde b068279 ee1afde 6c3ad82 fe33c17 ea180c8 fe33c17 ee1afde fe33c17 d325a90 5b5d4af ee1afde 5b5d4af 9ca6873 5b5d4af fe33c17 5b5d4af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import os
import gradio as gr
import whisper
import requests
import tempfile
from neon_tts_plugin_coqui import CoquiTTS
# Whisper: Speech-to-text
model = whisper.load_model("base")
# LLM : Bloom as inference
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
HF_TOKEN = os.environ["HF_TOKEN"]
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
#Language covered in Bloom : en, fr, esp, arb, hn, portu, Indonesian, Vietnamese, Chinese, tamil, telugu, bengali
# Text-to-Speech
LANGUAGES = list(CoquiTTS.langs.keys())
print(f"Languages for Coqui are: {LANGUAGES}")
#Languages for Coqui are: ['en', 'es', 'fr', 'de', 'pl', 'uk', 'ro', 'hu', 'el', 'bg', 'nl', 'fi', 'sl', 'lv', 'ga']
coquiTTS = CoquiTTS()
# Driver function
def driver_fun(audio) :
text1, lang = whisper_stt(audio)
#text1 = model.transcribe(audio)["text"]
text2 = lang_model_response(text1, lang)
speech = tts(text2, lang) #'en')
return text1, text2, speech
# Whisper - speeech-to-text
def whisper_stt(audio):
print("Inside Whisper TTS")
# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio(audio)
audio = whisper.pad_or_trim(audio)
# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)
# detect the spoken language
_, probs = model.detect_language(mel)
lang = max(probs, key=probs.get)
print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
options = whisper.DecodingOptions(fp16 = False, language=lang)
result = whisper.decode(model, mel, options)
# print the recognized text
print(f"transcript is : {result.text}")
return result.text, lang
# LLM - Bloom Response
def lang_model_response(prompt, language):
print(f"*****Inside lang_model_response - Prompt is :{prompt}")
p = """Question: How are you doing today?
Answer: I am doing good, thanks.
Question: """
if len(prompt) == 0:
prompt = """Question: Can you help me please?
Answer: Sure, I am here for you.
Question: """
prompt = p + prompt + "\n" + "Answer: "
json_ = {"inputs": prompt,
"parameters":
{
"top_p": 0.90, #0.90 default
"max_new_tokens": 64,
"temperature": 1.1, #1.1 default
"return_full_text": False,
"do_sample": True,
},
"options":
{"use_cache": True,
"wait_for_model": True,
},}
response = requests.post(API_URL, headers=headers, json=json_)
#print(f"Response is : {response}")
output = response.json()
output_tmp = output[0]['generated_text']
print(f"Bloom API Response is : {output_tmp}")
solution = output_tmp.split("Answer: ")[2].split("\n")[0]
print(f"Final Bloom Response after splits is: {solution}")
return solution
# Coqui - Text-to-Speech
def tts(text, language):
print(f"Inside tts - language is : {language}")
coqui_langs = ['en' ,'es' ,'fr' ,'de' ,'pl' ,'uk' ,'ro' ,'hu' ,'bg' ,'nl' ,'fi' ,'sl' ,'lv' ,'ga']
if language not in coqui_langs:
language = 'en'
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
return fp.name
gr.Interface(
title = 'Testing Whisper',
fn=driver_fun,
inputs=[
gr.Audio(source="microphone", type="filepath"), #streaming = True,
# "state"
],
outputs=[
"textbox", "textbox", "audio",
],
live=True).launch()
|