|
import gradio as gr
|
|
import torch
|
|
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoModelForSequenceClassification
|
|
import torch.nn.functional as F
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
|
|
|
llama_models = {
|
|
"Chat-IPT 3.2": "meta-llama/Llama-3.2-1B-Instruct",
|
|
}
|
|
|
|
def load_model(model_name):
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
|
|
generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
|
|
return generator
|
|
|
|
model_cache = {}
|
|
|
|
def predict(model, prompt, response=None):
|
|
device = model.device
|
|
if response is None:
|
|
inputs = tokenizer(prompt, return_tensors="pt")
|
|
else:
|
|
inputs = tokenizer(prompt, response, return_tensors="pt", padding=True, truncation=True)
|
|
|
|
inputs = inputs.to(device)
|
|
|
|
with torch.no_grad():
|
|
outputs = model(**inputs)
|
|
unsafe_prob = F.softmax(outputs.logits, dim=-1)[:, 1]
|
|
|
|
return unsafe_prob.item()
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("hbseong/HarmAug-Guard")
|
|
model = AutoModelForSequenceClassification.from_pretrained("hbseong/HarmAug-Guard")
|
|
model = model.to(device)
|
|
model.eval()
|
|
|
|
def generate_chat(user_input, history, model_choice):
|
|
if model_choice not in model_cache:
|
|
model_cache[model_choice] = load_model(llama_models[model_choice])
|
|
generator = model_cache[model_choice]
|
|
|
|
system_prompt = {"role": "system", "content": "Você é um ótimo assistente"}
|
|
|
|
if history is None:
|
|
history = [system_prompt]
|
|
|
|
history.append({"role": "user", "content": user_input})
|
|
|
|
if len(history) > 5:
|
|
history = history[-5:]
|
|
|
|
response = generator(
|
|
user_input,
|
|
max_length=1024,
|
|
pad_token_id=generator.tokenizer.eos_token_id,
|
|
do_sample=True,
|
|
temperature=0.55,
|
|
top_p=0.7
|
|
)[0]["generated_text"]
|
|
|
|
unsafe_score = predict(model, user_input, response)
|
|
safety_threshold = 0.85
|
|
|
|
if unsafe_score > safety_threshold:
|
|
response = "Desculpe, mas não posso fornecer ajuda com essa solicitação devido a questões de segurança ou ética."
|
|
|
|
history.append({"role": "assistant", "content": response})
|
|
|
|
return history
|
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
gr.Markdown("<h1><center>Teste</center></h1>")
|
|
|
|
model_choice = gr.Dropdown(list(llama_models.keys()), label="Selecione o Modelo.")
|
|
|
|
chatbot = gr.Chatbot(label=" ", type = "messages")
|
|
txt_input = gr.Textbox(show_label=False, placeholder="Escreva a sua mensagem aqui...")
|
|
|
|
def respond(user_input, chat_history, model_choice):
|
|
if model_choice is None:
|
|
model_choice = list(llama_models.keys())[0]
|
|
updated_history = generate_chat(user_input, chat_history, model_choice)
|
|
return "", updated_history
|
|
|
|
txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])
|
|
|
|
submit_btn = gr.Button("Enviar")
|
|
submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])
|
|
|
|
demo.launch(debug=False, show_error=True, share=True) |