Teste

import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForSequenceClassification
import torch.nn.functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

llama_models = {
    "Chat-IPT 3.2": "meta-llama/Llama-3.2-1B-Instruct",
}

def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
    return generator

model_cache = {}

def predict(model, prompt, response=None):
    device = model.device
    if response is None:
        inputs = tokenizer(prompt, return_tensors="pt")
    else:
        inputs = tokenizer(prompt, response, return_tensors="pt", padding=True, truncation=True)

    inputs = inputs.to(device)

    with torch.no_grad():
        outputs = model(**inputs)
        unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1]

    return unsafe_prob.item()

tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard")
model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard")
model = model.to(device)
model.eval()

def generate_chat(user_input, history, model_choice):
    if model_choice not in model_cache:
        model_cache[model_choice] = load_model(llama_models[model_choice])
    generator = model_cache[model_choice]

    system_prompt = {"role": "system", "content": "Você é um ótimo assistente"}

    if history is None:
        history = [system_prompt]

    history.append({"role": "user", "content": user_input})

    if len(history) > 5:
        history = history[-5:]

    response = generator(
        user_input,
        max_length=1024,
        pad_token_id=generator.tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.55,
        top_p=0.7
    )[0]["generated_text"]

    unsafe_score = predict(model, user_input, response)
    safety_threshold = 0.85

    if unsafe_score > safety_threshold:
        response = "Desculpe, mas não posso fornecer ajuda com essa solicitação devido a questões de segurança ou ética."

    history.append({"role": "assistant", "content": response})

    return history


with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("<h1><center>Teste</center></h1>")

    model_choice = gr.Dropdown(list(llama_models.keys()), label="Selecione o Modelo.")

    chatbot = gr.Chatbot(label=" ", type = "messages")
    txt_input = gr.Textbox(show_label=False, placeholder="Escreva a sua mensagem aqui...")

    def respond(user_input, chat_history, model_choice):
        if model_choice is None:
            model_choice = list(llama_models.keys())[0]
        updated_history = generate_chat(user_input, chat_history, model_choice)
        return "", updated_history

    txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

    submit_btn = gr.Button("Enviar")
    submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

demo.launch(debug=False, show_error=True, share=True)