Spaces:

amu-cai
/

amu-bigos-audio-recorder

Sleeping

mj-new

Baseline local whisper model and language selection

0587641 over 1 year ago

4.76 kB

	import gradio as gr
	import whisper
	import numpy as np
	import openai

	def greet(name):
	return "Hello " + name + "!!"

	with open('app.css','r') as f:
	css_file = f.read()

	markdown="""
	# Polish ASR BIGOS workspace
	"""
	def whisper_model_change(radio_whisper_model):
	whisper_model = whisper.load_model(radio_whisper_model)
	return(whisper_model)

	def prompt_gpt(input_text):
	messages = [
	{"role": "system", "content": "You are a helpful assistant."}]

	if input_text:
	messages.append(
	{"role": "user", "content": input_text},
	)
	chat_completion = openai.ChatCompletion.create(
	model="gpt-3.5-turbo", messages=messages
	)

	reply = chat_completion.choices[0].message.content
	return reply

	def process_pipeline(audio):
	asr_out = transcribe(audio)
	gpt_out = prompt_gpt(asr_out)
	tts_out = synthesize_speech(gpt_out)
	return(tts_out)

	def transcribe(audio, language, whisper_model, whisper_model_type):
	if not whisper_model:
	whisper_model=init_whisper_model(whisper_model_type)

	print(f"Transcribing {audio} for language {language} and model {whisper_model_type}")
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	mel = whisper.log_mel_spectrogram(audio)

	options = whisper.DecodingOptions(language=language, without_timestamps=True, fp16=False)
	result = whisper.decode(whisper_model, mel, options)
	result_text = result.text
	return result_text

	def init_whisper_model(whisper_model_type):
	print("Initializing whisper model")
	print(whisper_model_type)
	whisper_model = whisper.load_model(whisper_model_type)
	return whisper_model

	def synthesize_speech(text):
	audioobj = gTTS(text = out_result,
	lang = lang,
	slow = False)

	audioobj.save("Temp.mp3")
	return("Temp.mp3")

	block = gr.Blocks(css=css_file)
	with block:

	#state variables
	language = gr.State("en")
	whisper_model_type = gr.State("base")
	whisper_model = gr.State()

	# state handling functions
	def change_language(choice):
	if choice == "Polish":
	language="pl"
	print("Switching to Polish")
	print("language")
	print(language)
	elif choice == "English":
	language="en"
	print("Switching to English")
	print("language")
	print(language)
	return(language)

	def change_whisper_model(choice):
	whisper_model_type = choice
	print("Switching Whisper model")
	print(whisper_model_type)
	whisper_model = init_whisper_model(whisper_model_type)
	return [whisper_model_type, whisper_model]

	gr.Markdown(markdown)
	with gr.Tabs():
	with gr.TabItem('Voicebot playground'):
	with gr.Box():
	gr.HTML("<p class=\"apikey\">API Key:</p>")
	# API key textbox (password-style)
	api_key = gr.Textbox(label="", elem_id="pw")

	radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none selected, English is used")
	#radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
	#radio_cloud_asr = gr.Radio(["Whisper", "Google", "Azure"], label="Select Cloud ASR provider", info="You need to provide API keys for specific service")
	radio_whisper_model = gr.Radio(["tiny", "base", "small", "medium", "large"], label="Whisper ASR model (local)", info="Larger models are better, but slower. Default - base")

	mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')

	out_asr = gr.Textbox(placeholder="ASR output",
	lines=5,
	max_lines=10,
	show_label=False)
	out_gpt = gr.Textbox(placeholder="ChatGPT output",
	lines=10,
	max_lines=25,
	show_label=False)

	button_transcribe = gr.Button("Transcribe")
	button_prompt_gpt = gr.Button("Prompt ChatGPT")

	button_transcribe.click(transcribe, inputs=[mic_recording,language, whisper_model,whisper_model_type], outputs=out_asr)
	button_prompt_gpt.click(prompt_gpt, inputs=out_asr, outputs=out_gpt)

	radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language)
	radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])

	block.launch()