Spaces:

gospacedev
/

friday

Sleeping

App Files Files Community

friday / app.py

gospacedev

globalized formetted history

3cbdf32 5 months ago

raw

history blame

No virus

2.61 kB

	import torch
	import spaces
	import numpy as np
	import gradio as gr
	from gtts import gTTS
	from transformers import pipeline
	from huggingface_hub import InferenceClient


	ASR_MODEL_NAME = "openai/whisper-small"
	LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"


	system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant and You respond with one to two sentences. [/INST] Hello there! I'm friday how can I help you?</s>"""

	chat_history = system_prompt + """"""

	global formatted_history

	client = InferenceClient(LLM_MODEL_NAME)

	device = 0 if torch.cuda.is_available() else "cpu"

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=ASR_MODEL_NAME,
	device=device,
	)


	def generate(user_prompt, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
	temperature = float(temperature)
	if temperature < 1e-2:
	temperature = 1e-2
	top_p = float(top_p)

	generate_kwargs = dict(
	temperature=temperature,
	max_new_tokens=max_new_tokens,
	top_p=top_p,
	repetition_penalty=repetition_penalty,
	do_sample=True,
	seed=42,
	)

	chat_history += f"""<s>[INST] {user_prompt} [/INST] """

	output = client.text_generation(
	chat_history, **generate_kwargs, stream=False, details=False, return_full_text=False)

	print(output)
	return output


	@spaces.GPU(duration=60)
	def transcribe(audio):
	sr, y = audio
	y = y.astype(np.float32)
	y /= np.max(np.abs(y))

	inputs = pipe({"sampling_rate": sr, "raw": y})["text"]

	formatted_history += f"""Human: {inputs}\n"""

	llm_response = generate(inputs)

	chat_history += f""" {llm_response}</s>"""

	formatted_history += f"""Friday: {llm_response}\n"""

	audio_response = gTTS(llm_response)
	audio_response.save("response.mp3")

	print(formatted_history)

	print(chat_history)

	return "response.mp3"


	with gr.Blocks() as demo:
	gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>")

	with gr.Row():
	audio_input = gr.Audio(label="Human", sources="microphone")
	output_audio = gr.Audio(label="Friday", type="filepath",
	interactive=False,
	autoplay=True,
	elem_classes="audio")

	transcribe_btn = gr.Button("Transcribe")
	transcribe_btn.click(fn=transcribe, inputs=audio_input,
	outputs=output_audio)

	transcription_box = gr.Textbox(formatted_history, label="Transcription", interactive=True)


	demo.queue()
	demo.launch()