Spaces:

gospacedev
/

friday

Sleeping

App Files Files Community

friday / app.py

gospacedev

add transcription box to outputs

98bbe93 4 months ago

raw

history blame

No virus

2.69 kB

	import torch
	import spaces
	import numpy as np
	import gradio as gr
	from gtts import gTTS
	from transformers import pipeline
	from huggingface_hub import InferenceClient


	ASR_MODEL_NAME = "openai/whisper-small"
	LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"


	system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant and You respond with one to two sentences. [/INST] Hello there! I'm friday how can I help you?</s>"""

	instruct_history = system_prompt + """"""

	formatted_history = """"""

	client = InferenceClient(LLM_MODEL_NAME)

	device = 0 if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=ASR_MODEL_NAME,
	device=device,
	)


	def generate(user_prompt, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
	global instruct_history

	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)

	instruct_history += f"""<s>[INST] {user_prompt} [/INST] """

	output = client.text_generation(
	instruct_history, **generate_kwargs, stream=False, details=False, return_full_text=False)

	return output


	@spaces.GPU(duration=60)
	def transcribe(audio):
	global instruct_history, formatted_history
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	inputs = pipe({"sampling_rate": sr, "raw": y})["text"]

	formatted_history += f"""Human: {inputs}\n"""

	llm_response = generate(inputs)

	instruct_history += f""" {llm_response}</s>"""

	formatted_history += f"""Friday: {llm_response}\n"""

	audio_response = gTTS(llm_response)
	audio_response.save("response.mp3")

	print(instruct_history)

	return "response.mp3", formatted_history


	with gr.Blocks() as demo:
	gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")

	with gr.Row():
	audio_input = gr.Audio(label="Human", sources="microphone")
	output_audio = gr.Audio(label="Friday", type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio")

	transcribe_btn = gr.Button("Transcribe")

	transcription_box = gr.Textbox(label="Transcription")

	transcribe_btn.click(fn=transcribe, inputs=[audio_input],
	outputs=[output_audio, transcription_box])

	if __name__ == "__main__":
	demo.queue()
	demo.launch()