Spaces:
Sleeping
Sleeping
import gradio as gr | |
import whisper | |
import numpy as np | |
import openai | |
def greet(name): | |
return "Hello " + name + "!!" | |
with open('app.css','r') as f: | |
css_file = f.read() | |
markdown=""" | |
# Polish ASR BIGOS workspace | |
""" | |
def whisper_model_change(radio_whisper_model): | |
whisper_model = whisper.load_model(radio_whisper_model) | |
return(whisper_model) | |
def prompt_gpt(input_text): | |
messages = [ | |
{"role": "system", "content": "You are a helpful assistant."}] | |
if input_text: | |
messages.append( | |
{"role": "user", "content": input_text}, | |
) | |
chat_completion = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", messages=messages | |
) | |
reply = chat_completion.choices[0].message.content | |
return reply | |
def process_pipeline(audio): | |
asr_out = transcribe(audio) | |
gpt_out = prompt_gpt(asr_out) | |
tts_out = synthesize_speech(gpt_out) | |
return(tts_out) | |
def transcribe(audio, language, whisper_model, whisper_model_type): | |
if not whisper_model: | |
whisper_model=init_whisper_model(whisper_model_type) | |
print(f"Transcribing {audio} for language {language} and model {whisper_model_type}") | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
mel = whisper.log_mel_spectrogram(audio) | |
options = whisper.DecodingOptions(language=language, without_timestamps=True, fp16=False) | |
result = whisper.decode(whisper_model, mel, options) | |
result_text = result.text | |
return result_text | |
def init_whisper_model(whisper_model_type): | |
print("Initializing whisper model") | |
print(whisper_model_type) | |
whisper_model = whisper.load_model(whisper_model_type) | |
return whisper_model | |
def synthesize_speech(text): | |
audioobj = gTTS(text = out_result, | |
lang = lang, | |
slow = False) | |
audioobj.save("Temp.mp3") | |
return("Temp.mp3") | |
block = gr.Blocks(css=css_file) | |
with block: | |
#state variables | |
language = gr.State("en") | |
whisper_model_type = gr.State("base") | |
whisper_model = gr.State() | |
# state handling functions | |
def change_language(choice): | |
if choice == "Polish": | |
language="pl" | |
print("Switching to Polish") | |
print("language") | |
print(language) | |
elif choice == "English": | |
language="en" | |
print("Switching to English") | |
print("language") | |
print(language) | |
return(language) | |
def change_whisper_model(choice): | |
whisper_model_type = choice | |
print("Switching Whisper model") | |
print(whisper_model_type) | |
whisper_model = init_whisper_model(whisper_model_type) | |
return [whisper_model_type, whisper_model] | |
gr.Markdown(markdown) | |
with gr.Tabs(): | |
with gr.TabItem('Voicebot playground'): | |
with gr.Box(): | |
gr.HTML("<p class=\"apikey\">API Key:</p>") | |
# API key textbox (password-style) | |
api_key = gr.Textbox(label="", elem_id="pw") | |
radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none selected, English is used") | |
#radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money") | |
#radio_cloud_asr = gr.Radio(["Whisper", "Google", "Azure"], label="Select Cloud ASR provider", info="You need to provide API keys for specific service") | |
radio_whisper_model = gr.Radio(["tiny", "base", "small", "medium", "large"], label="Whisper ASR model (local)", info="Larger models are better, but slower. Default - base") | |
mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice') | |
out_asr = gr.Textbox(placeholder="ASR output", | |
lines=5, | |
max_lines=10, | |
show_label=False) | |
out_gpt = gr.Textbox(placeholder="ChatGPT output", | |
lines=10, | |
max_lines=25, | |
show_label=False) | |
button_transcribe = gr.Button("Transcribe") | |
button_prompt_gpt = gr.Button("Prompt ChatGPT") | |
button_transcribe.click(transcribe, inputs=[mic_recording,language, whisper_model,whisper_model_type], outputs=out_asr) | |
button_prompt_gpt.click(prompt_gpt, inputs=out_asr, outputs=out_gpt) | |
radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language) | |
radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model]) | |
block.launch() |