from transformers import pipeline import torch import os import openai from dotenv import load_dotenv from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub from fairseq.models.text_to_speech.hub_interface import TTSHubInterface model_id = "openai/whisper-base" pipe = pipeline("automatic-speech-recognition", model=model_id) def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "spanish", }, # update with the language you've fine-tuned on chunk_length_s=30, batch_size=8, ) return output["text"] # Load environment variables from the .env file de forma local load_dotenv() openai.api_key = os.environ['OPENAI_API_KEY'] def clear_chat(): global chat_history chat_history=[] def query_chatgpt(message,chat_history): chat_history.append({'role': 'user', 'content': '{}'.format(message)}) print("Preguntando "+message) print("historial", chat_history) response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages= chat_history, temperature=0.5, max_tokens=256 ).choices[0].message.content chat_history.append({'role': 'assistant', 'content': '{}'.format(response)}) return response, chat_history # models, cfg, task = load_model_ensemble_and_task_from_hf_hub( # "facebook/tts_transformer-es-css10", # arg_overrides={"vocoder": "hifigan", "fp16": False} # ) # model = models[0] # TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) # generator = task.build_generator([model], cfg) # text = "HabĂ­a una vez." # sample = TTSHubInterface.get_model_input(task, text) # wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) # ipd.Audio(wav, rate=rate) from tts import synthesize # def syn_facebookmms(text): # sample = TTSHubInterface.get_model_input(task, text) # wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample) # return wav,rate def answer_question(filepath,chat_history): transcription = transcribe_speech(filepath) response,chat_history = query_chatgpt(transcription,chat_history) print("historial",chat_history) # audio = synthesise(response) # audio, rate = syn_facebookmms(response) rate,audio = synthesize(response,1,"spa") print(audio) return rate,audio def reset_state(chat_history): chat_history = [] return chat_history import gradio as gr with gr.Blocks() as demo: chat_history = gr.State([]) entrada = gr.Audio(source="microphone",type="filepath") boton = gr.Button("Responder") button = gr.Button("Reset State") salida = gr.Audio() boton.click(answer_question,[entrada,chat_history],salida) button.click(reset_state,chat_history,chat_history) demo.launch(debug=True)