Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import torch | |
| import os | |
| import openai | |
| from dotenv import load_dotenv | |
| from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub | |
| from fairseq.models.text_to_speech.hub_interface import TTSHubInterface | |
| model_id = "openai/whisper-base" | |
| pipe = pipeline("automatic-speech-recognition", model=model_id) | |
| def transcribe_speech(filepath): | |
| output = pipe( | |
| filepath, | |
| max_new_tokens=256, | |
| generate_kwargs={ | |
| "task": "transcribe", | |
| "language": "spanish", | |
| }, # update with the language you've fine-tuned on | |
| chunk_length_s=30, | |
| batch_size=8, | |
| ) | |
| return output["text"] | |
| # Load environment variables from the .env file de forma local | |
| load_dotenv() | |
| openai.api_key = os.environ['OPENAI_API_KEY'] | |
| def clear_chat(): | |
| global chat_history | |
| chat_history=[] | |
| def query_chatgpt(message,chat_history): | |
| chat_history.append({'role': 'user', 'content': '{}'.format(message)}) | |
| print("Preguntando "+message) | |
| print("historial", chat_history) | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", | |
| messages= chat_history, | |
| temperature=0.5, | |
| max_tokens=256 | |
| ).choices[0].message.content | |
| chat_history.append({'role': 'assistant', 'content': '{}'.format(response)}) | |
| return response, chat_history | |
| # models, cfg, task = load_model_ensemble_and_task_from_hf_hub( | |
| # "facebook/tts_transformer-es-css10", | |
| # arg_overrides={"vocoder": "hifigan", "fp16": False} | |
| # ) | |
| # model = models[0] | |
| # TTSHubInterface.update_cfg_with_data_cfg(cfg, task.data_cfg) | |
| # generator = task.build_generator([model], cfg) | |
| # text = "Había una vez." | |
| # sample = TTSHubInterface.get_model_input(task, text) | |
| # wav, rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
| # ipd.Audio(wav, rate=rate) | |
| from tts import synthesize | |
| # def syn_facebookmms(text): | |
| # sample = TTSHubInterface.get_model_input(task, text) | |
| # wav,rate = TTSHubInterface.get_prediction(task, model, generator, sample) | |
| # return wav,rate | |
| def answer_question(filepath,chat_history): | |
| transcription = transcribe_speech(filepath) | |
| response,chat_history = query_chatgpt(transcription,chat_history) | |
| print("historial",chat_history) | |
| # audio = synthesise(response) | |
| # audio, rate = syn_facebookmms(response) | |
| rate,audio = synthesize(response,1,"spa") | |
| print(audio) | |
| return rate,audio | |
| def reset_state(chat_history): | |
| chat_history = [] | |
| return chat_history | |
| import gradio as gr | |
| with gr.Blocks() as demo: | |
| chat_history = gr.State([]) | |
| entrada = gr.Audio(source="microphone",type="filepath") | |
| boton = gr.Button("Responder") | |
| button = gr.Button("Reset State") | |
| salida = gr.Audio() | |
| boton.click(answer_question,[entrada,chat_history],salida) | |
| button.click(reset_state,chat_history,chat_history) | |
| demo.launch(debug=True) |