Spaces:
Build error
Build error
File size: 2,280 Bytes
5b5d4af fe33c17 11f7102 5b5d4af cfc38a8 fe33c17 5b5d4af fe33c17 006d225 fe33c17 6c3ad82 fe33c17 5b5d4af fe33c17 6c3ad82 a1f8c85 0071143 fe33c17 6c3ad82 03606ff 6c3ad82 03606ff 6c3ad82 fe33c17 6c3ad82 fe33c17 5b5d4af 9ca6873 5b5d4af fe33c17 5b5d4af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
#import numpy as np
import gradio as gr
import whisper
import requests
import tempfile
from neon_tts_plugin_coqui import CoquiTTS
# Whisper: Speech-to-text
model = whisper.load_model("base")
# The LLM : Bloom
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
HF_TOKEN = os.environ["HF_TOKEN"]
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
# Text-to-Speech
LANGUAGES = list(CoquiTTS.langs.keys())
coquiTTS = CoquiTTS()
# Processing input Audio
def fun(audio) :
text1 = model.transcribe(audio)["text"]
text2 = lang_model_response(text1)
speech = tts(text, language)
return text1, text2, speech
def lang_model_response(prompt):
print(f"*****Inside meme_generate - Prompt is :{prompt}")
if len(prompt) == 0:
prompt = """Can you help me please?"""
json_ = {"inputs": prompt,
"parameters":
{
"top_p": 0.90, #0.90 default
"max_new_tokens": 64,
"temperature": 1.1, #1.1 default
"return_full_text": True,
"do_sample": True,
},
"options":
{"use_cache": True,
"wait_for_model": True,
},}
response = requests.post(API_URL, headers=headers, json=json_)
print(f"Response is : {response}")
output = response.json()
print(f"output is : {output}")
output_tmp = output[0]['generated_text']
print(f"output_tmp is: {output_tmp}")
solution = output_tmp.split(".")[1]
print(f"Final response after splits is: {solution}")
return solution
#Text-to-Speech
def tts(text, language):
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(text, fp, speaker = {"language" : language})
return fp.name
#inputs = [gr.Textbox(label="Input", value=CoquiTTS.langs["en"]["sentence"], max_lines=3),
# gr.Radio(label="Language", choices=LANGUAGES, value="en")]
#outputs = gr.Audio(label="Output")
demo = gr.Interface(fn=tts, inputs=inputs, outputs=outputs)
demo.launch()
gr.Interface(
title = 'Testing Whisper',
fn=fun,
inputs=[
gr.Audio(source="microphone", type="filepath"), #streaming = True,
# "state"
],
outputs=[
"textbox", "textbox", "audio",
],
live=True).launch()
|