T5unami-small / app.py
NickyNicky's picture
Update app.py
76829db
import time
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import speech_recognition as sr
from math import log2, pow
import os
#from scipy.fftpack import fft
import gc
peft_model_id='hackathon-somos-nlp-2023/T5unami-small-v1'
config = PeftConfig.from_pretrained(peft_model_id)
model2 = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, return_dict=True,
# load_in_8bit=True,
# load_in_8bit_fp32_cpu_offload=True,
device_map='auto')
tokenizer2 = AutoTokenizer.from_pretrained(peft_model_id)
model2 = PeftModel.from_pretrained(model2, peft_model_id)
Problema_tarjetaCredito= os.path.abspath("Problema_tarjetaCredito.ogg")
list_audios= [[Problema_tarjetaCredito]]
def gen_conversation(text,max_new_tokens=100):
text = "<SN>instruction: " + text + "\n "
batch = tokenizer2(text, return_tensors='pt')
output_tokens = model2.generate(**batch,
max_new_tokens=max_new_tokens,
eos_token_id= tokenizer2.eos_token_id,
pad_token_id= tokenizer2.pad_token_id,
bos_token_id= tokenizer2.bos_token_id,
early_stopping = True,
no_repeat_ngram_size=2,
repetition_penalty=1.2,
temperature=.9,
num_beams=3
)
gc.collect()
return tokenizer2.decode(output_tokens[0], skip_special_tokens=True).split("\n")[-1].replace("output:","")
conversacion = ""
def speech_to_text(audio_file, texto_adicional):
global conversacion
if audio_file is not None:
# Lógica para entrada de audio
r = sr.Recognizer()
audio_data = sr.AudioFile(audio_file)
with audio_data as source:
audio = r.record(source)
text_enrada=""
texto_generado = r.recognize_google(audio, language="es-ES")
texto_generado= f"[|Audio a texto|]:{texto_generado}\n" + "<br>[AGENTE]:"+gen_conversation(texto_generado,max_new_tokens=500)
texto_generado = "<div style='color: #66b3ff;'>" + texto_generado + "</div><br>"
else:
texto_generado= f"[|Solo texto|]:{texto_adicional}\n" + "<br>[AGENTE]:"+gen_conversation(texto_adicional,max_new_tokens=500)
texto_generado = "<div style='color: #66b3ff;'> " + texto_generado + "</div><br>"
conversacion += texto_generado
return conversacion
iface = gr.Interface(
fn=speech_to_text,
inputs=[gr.inputs.Audio(label="Voz", type="filepath"), gr.inputs.Textbox(label="Texto adicional")],
outputs=gr.outputs.HTML(label=["chatbot","state"]),
title="Chat bot para empresas.",
description="Este modelo convierte la entrada de voz a texto e inferencia, texto a inferencia",
examples=list_audios,
theme="default",
layout="vertical",
allow_flagging=False,
flagging_dir=None,
server_name=None,
server_port=None,
live=False,
capture_session=False
)
iface.launch()