NickyNicky's picture
Update app.py
628ab0d
raw history blame
No virus
3.51 kB
import time
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoModelForSeq2SeqLM
import gradio as gr
import speech_recognition as sr
from math import log2, pow
import os
#from scipy.fftpack import fft
import gc
peft_model_id="hackathon-somos-nlp-2023/SalpiBloomZ_15949_input_512-1b7"
config = PeftConfig.from_pretrained(peft_model_id)
model2 = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True,
load_in_8bit=True,
# load_in_8bit_fp32_cpu_offload=True,
device_map='auto')
tokenizer2 = AutoTokenizer.from_pretrained(peft_model_id)
model2 = PeftModel.from_pretrained(model2, peft_model_id)
Problema_tarjetaCredito= os.path.abspath("Problema_tarjetaCredito.ogg")
list_audios= [[Problema_tarjetaCredito]]
def gen_conversation(text,max_new_tokens=100):
text = "<SN>instruction: " + text + "\n "
batch = tokenizer2(text, return_tensors='pt')
batch = {k: v.to('cuda') for k, v in batch.items()}
with torch.cuda.amp.autocast():
output_tokens = model2.generate(**batch,
max_new_tokens=max_new_tokens,
eos_token_id= tokenizer2.eos_token_id,
pad_token_id= tokenizer2.pad_token_id,
bos_token_id= tokenizer2.bos_token_id,
early_stopping = True,
no_repeat_ngram_size=2,
repetition_penalty=1.2,
temperature=.69,
num_beams=3
)
gc.collect()
torch.cuda.memory_summary(device=None, abbreviated=False)
return tokenizer2.decode(output_tokens[0], skip_special_tokens=True).split("\n")[-1].replace("output:","")
conversacion = ""
def speech_to_text(audio_file, texto_adicional):
global conversacion
if audio_file is not None:
# Lógica para entrada de audio
r = sr.Recognizer()
audio_data = sr.AudioFile(audio_file)
with audio_data as source:
audio = r.record(source)
text_enrada=""
texto_generado = r.recognize_google(audio, language="es-ES")
texto_generado= f"[|Audio a texto|]:{texto_generado}\n" + "<br>[AGENTE]:"+gen_conversation(texto_generado,max_new_tokens=500)
texto_generado = "<div style='color: #66b3ff;'>" + texto_generado + "</div><br>"
else:
texto_generado= f"[|Solo texto|]:{texto_adicional}\n" + "<br>[AGENTE]:"+gen_conversation(texto_adicional,max_new_tokens=500)
texto_generado = "<div style='color: #66b3ff;'> " + texto_generado + "</div><br>"
conversacion += texto_generado
return conversacion
iface = gr.Interface(
fn=speech_to_text,
inputs=[gr.inputs.Audio(label="Voz", type="filepath"), gr.inputs.Textbox(label="Texto adicional")],
outputs=gr.outputs.HTML(label=["chatbot","state"]),
title="Chat bot para empresas.",
description="Este modelo convierte la entrada de voz o texto y hace inferencia",
examples=list_audios,
theme="default",
layout="vertical",
allow_flagging=False,
flagging_dir=None,
server_name=None,
server_port=None,
live=False,
capture_session=False
)
iface.launch()