|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import os |
|
|
import traceback |
|
|
import logging |
|
|
from typing import List, Dict, Any, Tuple |
|
|
|
|
|
import gradio as gr |
|
|
from huggingface_hub import InferenceClient |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
|
logger = logging.getLogger("cascade_chatbot") |
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
DEFAULT_LLAMA_MODEL = os.environ.get("LLAMA_MODEL", "meta-llama/Llama-3.1-8B-Instruct") |
|
|
DEFAULT_AUX1 = os.environ.get("AUX1_MODEL", "google/flan-t5-large") |
|
|
DEFAULT_AUX2 = os.environ.get("AUX2_MODEL", "facebook/bart-large-cnn") |
|
|
|
|
|
if not HF_TOKEN: |
|
|
logger.warning("HF_TOKEN não encontrado nas variáveis de ambiente. Configure nos Secrets do Space ou no ambiente local.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
client_main = InferenceClient(token=HF_TOKEN, model=DEFAULT_LLAMA_MODEL) |
|
|
client_aux1 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX1) |
|
|
client_aux2 = InferenceClient(token=HF_TOKEN, model=DEFAULT_AUX2) |
|
|
except Exception: |
|
|
|
|
|
logger.exception("Falha ao inicializar InferenceClient(s). Verifique HF_TOKEN e nomes dos modelos.") |
|
|
|
|
|
client_main = None |
|
|
client_aux1 = None |
|
|
client_aux2 = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _messages_to_prompt(messages: List[Dict[str, str]]) -> str: |
|
|
lines = [] |
|
|
for m in messages: |
|
|
role = m.get("role", "user") |
|
|
content = m.get("content", "") |
|
|
lines.append(f"{role.upper()}: {content}") |
|
|
lines.append("ASSISTANT:") |
|
|
return "\n".join(lines) |
|
|
|
|
|
def _extract_text_from_response(obj: Any) -> str: |
|
|
if obj is None: |
|
|
return "" |
|
|
|
|
|
for attr in ("content", "text", "generated_text", "generation_text"): |
|
|
if hasattr(obj, attr): |
|
|
try: |
|
|
v = getattr(obj, attr) |
|
|
if isinstance(v, str): |
|
|
return v |
|
|
return str(v) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
choices = None |
|
|
if hasattr(obj, "choices"): |
|
|
choices = obj.choices |
|
|
elif isinstance(obj, dict) and "choices" in obj: |
|
|
choices = obj["choices"] |
|
|
if choices: |
|
|
first = choices[0] |
|
|
if isinstance(first, dict): |
|
|
if "message" in first and isinstance(first["message"], dict) and "content" in first["message"]: |
|
|
return first["message"]["content"] |
|
|
if "text" in first: |
|
|
return first["text"] |
|
|
if "content" in first: |
|
|
return first["content"] |
|
|
if hasattr(first, "message"): |
|
|
msg = first.message |
|
|
if isinstance(msg, dict) and "content" in msg: |
|
|
return msg["content"] |
|
|
if hasattr(first, "text"): |
|
|
return first.text |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
if hasattr(obj, "generations") and len(obj.generations) > 0: |
|
|
g = obj.generations[0] |
|
|
if isinstance(g, dict) and "text" in g: |
|
|
return g["text"] |
|
|
if hasattr(g, "text"): |
|
|
return g.text |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
if isinstance(obj, dict): |
|
|
for k in ("text", "content", "generated_text"): |
|
|
if k in obj and isinstance(obj[k], str): |
|
|
return obj[k] |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
try: |
|
|
return str(obj) |
|
|
except Exception: |
|
|
return "" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def call_model_with_messages(client: InferenceClient, messages: List[Dict[str, str]], |
|
|
max_new_tokens: int = 512, temperature: float = 0.7, top_p: float = 0.95) -> Any: |
|
|
""" |
|
|
Tenta múltiplas assinaturas (chat_completion, client.chat, text_generation, etc). |
|
|
Registra exceções completas para diagnóstico. |
|
|
""" |
|
|
|
|
|
def try_call(method, /, *pos_args, **kw_args): |
|
|
try: |
|
|
|
|
|
safe_kw = {k: ("[MESSAGES]" if k == "messages" else v) for k, v in kw_args.items()} |
|
|
logger.info("Tentando %s pos=%s kwargs=%s", getattr(method, "__name__", str(method)), pos_args, safe_kw) |
|
|
return method(*pos_args, **kw_args) |
|
|
except Exception: |
|
|
logger.exception("Falha ao chamar %s", getattr(method, "__name__", str(method))) |
|
|
return None |
|
|
|
|
|
|
|
|
model_name = getattr(client, "model", None) or DEFAULT_LLAMA_MODEL |
|
|
|
|
|
|
|
|
try: |
|
|
cc = getattr(client, "chat_completion", None) |
|
|
if cc: |
|
|
|
|
|
res = try_call(cc, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
|
|
|
res = try_call(cc, messages=messages, model=model_name, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
|
|
|
if hasattr(cc, "create"): |
|
|
res = try_call(cc.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
|
|
|
res = try_call(cc, messages) |
|
|
if res is not None: |
|
|
return res |
|
|
except Exception: |
|
|
logger.exception("Erro no bloco chat_completion") |
|
|
|
|
|
|
|
|
try: |
|
|
chat_ns = getattr(client, "chat", None) |
|
|
if chat_ns: |
|
|
if hasattr(chat_ns, "create"): |
|
|
res = try_call(chat_ns.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
if hasattr(chat_ns, "chat_completion") and hasattr(chat_ns.chat_completion, "create"): |
|
|
res = try_call(chat_ns.chat_completion.create, model=model_name, messages=messages, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
res = try_call(chat_ns, model_name, messages) |
|
|
if res is not None: |
|
|
return res |
|
|
except Exception: |
|
|
logger.exception("Erro no bloco chat namespace") |
|
|
|
|
|
|
|
|
prompt = _messages_to_prompt(messages) |
|
|
try: |
|
|
if hasattr(client, "text_generation"): |
|
|
res = try_call(client.text_generation, prompt=prompt, max_new_tokens=max_new_tokens, temperature=temperature) |
|
|
if res is not None: |
|
|
return res |
|
|
if hasattr(client, "generate") and callable(client.generate): |
|
|
res = try_call(client.generate, prompt=prompt, max_new_tokens=max_new_tokens) |
|
|
if res is not None: |
|
|
return res |
|
|
except Exception: |
|
|
logger.exception("Erro no bloco text_generation/generate") |
|
|
|
|
|
|
|
|
candidate_methods = [m for m in dir(client) if any(k in m for k in ("create", "generate", "complete", "run"))] |
|
|
for name in candidate_methods: |
|
|
try: |
|
|
method = getattr(client, name) |
|
|
if callable(method): |
|
|
res = try_call(method, messages=messages) |
|
|
if res is not None: |
|
|
return res |
|
|
res = try_call(method, prompt) |
|
|
if res is not None: |
|
|
return res |
|
|
res = try_call(method, messages) |
|
|
if res is not None: |
|
|
return res |
|
|
except Exception: |
|
|
logger.exception("Erro testando candidato %s", name) |
|
|
|
|
|
|
|
|
debug = {"available_attrs": dir(client), "messages_sample": messages[:3]} |
|
|
logger.error("Todas as tentativas falharam. Debug: %s", debug) |
|
|
raise RuntimeError(f"Não foi possível chamar o cliente HF com as assinaturas testadas. Debug: {debug}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def pipeline_cascade(user_message: str, system_message: str, |
|
|
max_tokens: int, temperature: float, top_p: float) -> Tuple[str, List[str]]: |
|
|
""" |
|
|
Executa a cascata: Llama (client_main) -> FLAN (client_aux1) -> BART (client_aux2). |
|
|
Retorna o texto final e um log de passos. |
|
|
""" |
|
|
logs = [] |
|
|
|
|
|
messages = [{"role": "system", "content": system_message or ""}, {"role": "user", "content": user_message}] |
|
|
try: |
|
|
logs.append("1) Chamando Llama (entrada)") |
|
|
response_main_obj = call_model_with_messages(client_main, messages, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p) |
|
|
response_main = _extract_text_from_response(response_main_obj) |
|
|
logs.append(f"-> Llama respondeu (resumo): {response_main[:300]}") |
|
|
|
|
|
|
|
|
logs.append("2) Chamando FLAN-T5 (reformular)") |
|
|
prompt_aux1 = f"Reformule este texto de forma clara e concisa:\n{response_main}" |
|
|
try: |
|
|
if client_aux1 and hasattr(client_aux1, "text_generation"): |
|
|
res_a1 = client_aux1.text_generation(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4)) |
|
|
elif client_aux1 and hasattr(client_aux1, "completions") and hasattr(client_aux1.completions, "create"): |
|
|
res_a1 = client_aux1.completions.create(prompt=prompt_aux1, max_new_tokens=max(128, max_tokens // 4)) |
|
|
else: |
|
|
res_a1 = None |
|
|
response_aux1 = _extract_text_from_response(res_a1) if res_a1 is not None else response_main |
|
|
logs.append(f"-> FLAN-T5 respondeu (resumo): {response_aux1[:300]}") |
|
|
except Exception: |
|
|
logs.append("FLAN-T5 falhou; usando resposta do Llama") |
|
|
response_aux1 = response_main |
|
|
|
|
|
|
|
|
logs.append("3) Chamando BART (resumo em 3 frases)") |
|
|
prompt_aux2 = f"Resuma este texto em 3 frases:\n{response_aux1}" |
|
|
try: |
|
|
if client_aux2 and hasattr(client_aux2, "text_generation"): |
|
|
res_a2 = client_aux2.text_generation(prompt=prompt_aux2, max_new_tokens=150) |
|
|
elif client_aux2 and hasattr(client_aux2, "completions") and hasattr(client_aux2.completions, "create"): |
|
|
res_a2 = client_aux2.completions.create(prompt=prompt_aux2, max_new_tokens=150) |
|
|
else: |
|
|
res_a2 = None |
|
|
response_aux2 = _extract_text_from_response(res_a2) if res_a2 is not None else response_aux1 |
|
|
logs.append(f"-> BART respondeu (resumo): {response_aux2[:300]}") |
|
|
except Exception: |
|
|
logs.append("BART falhou; usando resposta do passo anterior") |
|
|
response_aux2 = response_aux1 |
|
|
|
|
|
except Exception as e: |
|
|
tb = traceback.format_exc(limit=5) |
|
|
logger.exception("Erro pipeline principal: %s", e) |
|
|
response_aux2 = f"Erro ao gerar resposta: {e}\n\nTraceback (curto):\n{tb}" |
|
|
logs.append("Erro no pipeline: " + str(e)) |
|
|
|
|
|
return response_aux2, logs |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="Chatbot em Cascata - Llama + FLAN + BART") as demo: |
|
|
gr.Markdown("## Trabalho Acadêmico FMU - Chatbot em Cascata\n" |
|
|
"Fluxo: **Llama (entrada)** → **FLAN-T5 (reformulação)** → **BART(resumo)**\n\n" |
|
|
"Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=2): |
|
|
system_message = gr.Textbox(value="Você é um chatbot racional e alegre.", |
|
|
label="System Message", lines=2) |
|
|
chatbot = gr.Chatbot(label="Chat") |
|
|
user_input = gr.Textbox(label="Digite sua mensagem", placeholder="Digite aqui...") |
|
|
max_tokens = gr.Slider(50, 2048, value=512, step=50, label="Max Tokens") |
|
|
temperature = gr.Slider(0.0, 1.0, value=0.7, step=0.05, label="Temperature") |
|
|
top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") |
|
|
|
|
|
history = gr.State([]) |
|
|
|
|
|
def submit_handler(msg, history, system_message, max_tokens, temperature, top_p): |
|
|
|
|
|
out_text, logs = pipeline_cascade(msg, system_message, int(max_tokens), float(temperature), float(top_p)) |
|
|
history.append({"role": "user", "content": msg}) |
|
|
history.append({"role": "assistant", "content": out_text}) |
|
|
|
|
|
logger.info("Pipeline logs:\n%s", "\n".join(logs)) |
|
|
return history, history |
|
|
|
|
|
user_input.submit(submit_handler, |
|
|
inputs=[user_input, history, system_message, max_tokens, temperature, top_p], |
|
|
outputs=[chatbot, history]) |
|
|
|
|
|
btn_send = gr.Button("Enviar") |
|
|
btn_send.click(submit_handler, |
|
|
inputs=[user_input, history, system_message, max_tokens, temperature, top_p], |
|
|
outputs=[chatbot, history]) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### Informações sobre o Projeto\n" |
|
|
"Painel feito para descrever as **configurações**, **testar a geração** e sobre os **envolvidos**:") |
|
|
|
|
|
model_info_md = f""" |
|
|
**Modelos usados:** |
|
|
|
|
|
- Llama (input): `{DEFAULT_LLAMA_MODEL}` |
|
|
- Aux 1 (reformulação): `{DEFAULT_AUX1}` |
|
|
- Aux 2 (resumo): `{DEFAULT_AUX2}` |
|
|
|
|
|
**Como foram configurados:** |
|
|
|
|
|
- Cada modelo é instanciado via `InferenceClient(token=HF_TOKEN, model=<model_name>)`. |
|
|
- Chamadas preferenciais: |
|
|
- Para chat: `client.chat_completion(messages=..., model=...)` (quando disponível) |
|
|
- Fallback: `client.text_generation(prompt=...)` |
|
|
- Ajustes de inferência controlados pelo usuário: `max_tokens`, `temperature`, `top_p`. |
|
|
- Logs de diagnóstico são gravados (úteis se houver erros de assinatura/permissão). |
|
|
""" |
|
|
gr.Markdown(model_info_md) |
|
|
|
|
|
|
|
|
test_output = gr.Textbox(label="Resultado do Self-Test", lines=12, interactive=False) |
|
|
|
|
|
def run_self_test(system_message, max_tokens, temperature, top_p): |
|
|
msgs = [ |
|
|
"Explique resumidamente o que é a técnica de regressão linear.", |
|
|
"Resuma em 1 frase as vantagens de usar validação cruzada.", |
|
|
"Como posso autenticar usuários em uma aplicação web?" |
|
|
] |
|
|
accumulated = [] |
|
|
for m in msgs: |
|
|
out, logs = pipeline_cascade(m, system_message, int(max_tokens), float(temperature), float(top_p)) |
|
|
accumulated.append("INPUT: " + m) |
|
|
accumulated.append("OUTPUT: " + out) |
|
|
accumulated.append("LOGS: " + " | ".join(logs)) |
|
|
accumulated.append("-" * 40) |
|
|
return "\n".join(accumulated) |
|
|
|
|
|
btn_test = gr.Button("Run self-test") |
|
|
btn_test.click(run_self_test, inputs=[system_message, max_tokens, temperature, top_p], outputs=[test_output]) |
|
|
|
|
|
gr.Markdown( |
|
|
"### Disciplina: INTELIGÊNCIA ARTIFICIAL E APRENDIZADO DE MÁQUINA\n" |
|
|
"- Trabalho N2\n" |
|
|
"- Turma Noturna de Bacharelado em Ciências da Computação 2025.\n" |
|
|
"- Integrantes:\n " |
|
|
"- Lucas Antonini - 1722631\n " |
|
|
"- Carlos Eduardo da Silva - 1961011\n " |
|
|
"- Felipe Rios Amaral - 1847080 \n" |
|
|
"- Kawrê Britto de Oliveira - 2260931\n" |
|
|
"- Miguel Putini Alfano - 2879347 ") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|