Spaces:

gospacedev
/

friday

Running on Zero

App Files Files Community

friday / app.py

gospacedev

update spelling

9022329 2 months ago

raw

history blame

No virus

2.75 kB

	import torch
	import spaces
	import numpy as np
	import gradio as gr
	from gtts import gTTS
	from transformers import pipeline
	from huggingface_hub import InferenceClient


	ASR_MODEL_NAME = "openai/whisper-small"
	LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"


	system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant and You respond with one to two sentences. [/INST] Hello there! I'm friday how can I help you?</s>"""

	instruct_history = system_prompt + """"""

	formatted_history = """"""

	client = InferenceClient(LLM_MODEL_NAME)

	device = 0 if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=ASR_MODEL_NAME,
	device=device,
	)


	def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)

	output = client.text_generation(
	instruct_history, **generate_kwargs, stream=False, details=False, return_full_text=False)

	return output


	@spaces.GPU(duration=60)
	def transcribe(audio, instruct_history=instruct_history, formatted_history=formatted_history):
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]

	formatted_history += f"""Human: {transcribed_user_audio}\n\n"""

	instruct_history += f"""<s>[INST] {transcribed_user_audio} [/INST] """

	llm_response = generate(instruct_history)

	instruct_history += f""" {llm_response}</s>"""

	formatted_history += f"""Friday: {llm_response}\n\n"""

	audio_response = gTTS(llm_response)
	audio_response.save("response.mp3")

	print(instruct_history)

	return "response.mp3", formatted_history


	with gr.Blocks() as demo:
	gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")

	with gr.Row():
	audio_input = gr.Audio(label="Human", sources="microphone")
	output_audio = gr.Audio(label="Friday", type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio")

	transcribe_btn = gr.Button("Transcribe")

	transcription_box = gr.Textbox(label="Transcription")

	transcribe_btn.click(fn=transcribe, inputs=[audio_input],
	outputs=[output_audio, transcription_box])

	if __name__ == "__main__":
	demo.queue()
	demo.launch()