Spaces:

mrolando
/

asistente_voz

Sleeping

asistente_voz / app.py

mrolando

added state

56cf024 about 1 year ago

2.93 kB

	from transformers import pipeline
	import torch

	import os
	import openai
	from dotenv import load_dotenv
	from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub
	from fairseq.models.text_to_speech.hub_interface import TTSHubInterface

	model_id = "openai/whisper-base"
	pipe = pipeline("automatic-speech-recognition", model=model_id)

	def transcribe_speech(filepath):
	output = pipe(
	filepath,
	max_new_tokens=256,
	generate_kwargs={
	"task": "transcribe",
	"language": "spanish",
	}, # update with the language you've fine-tuned on
	chunk_length_s=30,
	batch_size=8,
	)
	return output["text"]


	# Load environment variables from the .env file de forma local
	load_dotenv()
	openai.api_key = os.environ['OPENAI_API_KEY']


	def clear_chat():
	global chat_history
	chat_history=[]


	def query_chatgpt(message,chat_history):
	chat_history.append({'role': 'user', 'content': '{}'.format(message)})
	print("Preguntando "+message)
	print("historial", chat_history)
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages= chat_history,
	temperature=0.5,
	max_tokens=256
	).choices[0].message.content
	chat_history.append({'role': 'assistant', 'content': '{}'.format(response)})
	return response, chat_history




	# models, cfg, task = load_model_ensemble_and_task_from_hf_hub(
	# "facebook/tts_transformer-es-css10",
	# arg_overrides={"vocoder": "hifigan", "fp16": False}
	# )
	# model = models[0]
	# TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg)
	# generator = task.build_generator([model], cfg)

	# text = "Había una vez."

	# sample = TTSHubInterface.get_model_input(task, text)
	# wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample)

	# ipd.Audio(wav, rate=rate)
	from tts import synthesize


	# def syn_facebookmms(text):
	# sample = TTSHubInterface.get_model_input(task, text)
	# wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample)
	# return wav,rate

	def answer_question(filepath,chat_history):
	transcription = transcribe_speech(filepath)
	response,chat_history = query_chatgpt(transcription,chat_history)
	print("historial",chat_history)
	# audio = synthesise(response)
	# audio, rate = syn_facebookmms(response)
	rate,audio = synthesize(response,1,"spa")
	print(audio)
	return rate,audio

	def reset_state(chat_history):
	chat_history = []
	return chat_history


	import gradio as gr
	with gr.Blocks() as demo:
	chat_history = gr.State([])
	entrada = gr.Audio(source="microphone",type="filepath")
	boton = gr.Button("Responder")
	button = gr.Button("Reset State")
	salida = gr.Audio()
	boton.click(answer_question,[entrada,chat_history],salida)
	button.click(reset_state,chat_history,chat_history)

	demo.launch(debug=True)