Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import torch | |
import os | |
import openai | |
from dotenv import load_dotenv | |
from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub | |
from fairseq.models.text_to_speech.hub_interface import TTSHubInterface | |
model_id = "openai/whisper-base" | |
pipe = pipeline("automatic-speech-recognition", model=model_id) | |
def transcribe_speech(filepath): | |
output = pipe( | |
filepath, | |
max_new_tokens=256, | |
generate_kwargs={ | |
"task": "transcribe", | |
"language": "spanish", | |
}, # update with the language you've fine-tuned on | |
chunk_length_s=30, | |
batch_size=8, | |
) | |
return output["text"] | |
# Load environment variables from the .env file de forma local | |
load_dotenv() | |
openai.api_key = os.environ['OPENAI_API_KEY'] | |
def clear_chat(): | |
global chat_history | |
chat_history=[] | |
def query_chatgpt(message,chat_history): | |
chat_history.append({'role': 'user', 'content': '{}'.format(message)}) | |
print("Preguntando "+message) | |
print("historial", chat_history) | |
response = openai.ChatCompletion.create( | |
model="gpt-3.5-turbo", | |
messages= chat_history, | |
temperature=0.5, | |
max_tokens=256 | |
).choices[0].message.content | |
chat_history.append({'role': 'assistant', 'content': '{}'.format(response)}) | |
return response, chat_history | |
# models, cfg, task = load_model_ensemble_and_task_from_hf_hub( | |
# "facebook/tts_transformer-es-css10", | |
# arg_overrides={"vocoder": "hifigan", "fp16": False} | |
# ) | |
# model = models[0] | |
# TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) | |
# generator = task.build_generator([model], cfg) | |
# text = "Había una vez." | |
# sample = TTSHubInterface.get_model_input(task, text) | |
# wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
# ipd.Audio(wav, rate=rate) | |
from tts import synthesize | |
# def syn_facebookmms(text): | |
# sample = TTSHubInterface.get_model_input(task, text) | |
# wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
# return wav,rate | |
def answer_question(filepath,chat_history): | |
transcription = transcribe_speech(filepath) | |
response,chat_history = query_chatgpt(transcription,chat_history) | |
print("historial",chat_history) | |
# audio = synthesise(response) | |
# audio, rate = syn_facebookmms(response) | |
rate,audio = synthesize(response,1,"spa") | |
print(audio) | |
return rate,audio | |
def reset_state(chat_history): | |
chat_history = [] | |
return chat_history | |
import gradio as gr | |
with gr.Blocks() as demo: | |
chat_history = gr.State([]) | |
entrada = gr.Audio(source="microphone",type="filepath") | |
boton = gr.Button("Responder") | |
button = gr.Button("Reset State") | |
salida = gr.Audio() | |
boton.click(answer_question,[entrada,chat_history],salida) | |
button.click(reset_state,chat_history,chat_history) | |
demo.launch(debug=True) |