penseai / app.py
joseassuno1's picture
Create app.py
62f0df0 verified
"""
PenseAI - Chat Simples com Roteamento e Memoria
Login + Chat + Roteamento + Fluxo Complexo + Memoria Funcional
"""
import gradio as gr
import os
import json
import uuid
from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple
# Importacoes da Groq e tiktoken
try:
from groq import Groq
import tiktoken
DEPENDENCIES_OK = True
except ImportError as e:
print(f"Erro ao importar dependencias: {e}")
DEPENDENCIES_OK = False
# Configuracao da API
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Sistema de usuarios
USERS = {
"Alexsandra": os.getenv("password1", ""),
"Ana Julia": os.getenv("password2", ""),
"Elisa": os.getenv("password3", ""),
"Jose Vitor": os.getenv("password4", ""),
"Sabrina": os.getenv("password5", "")
}
# Configuracao de modelos
MODEL_MAPPING = {
"chat_geral": "llama-3.3-70b-versatile",
"pesquisa_simples": "compound-beta-mini",
"pesquisa_complexa": "compound-beta",
"decompositor_tarefas": "deepseek-r1-distill-llama-70b",
"ultimo_recurso": "moonshotai/kimi-k2-instruct",
"roteador": "llama-3.3-70b-versatile",
"sumarizador": "llama-3.1-8b-instant"
}
FALLBACK_MODEL = "llama-3.1-8b-instant"
class MemoryManager:
"""Gerenciador de memoria com tiktoken - CORRIGIDO."""
def __init__(self, groq_client):
self.groq_client = groq_client
self.max_history = 20 # 20 primeiras perguntas e respostas
self.max_tokens = 3000 # Limite de tokens para contexto
try:
self.encoding = tiktoken.get_encoding("cl100k_base")
except:
self.encoding = None
def count_tokens(self, text: str) -> int:
"""Conta tokens usando tiktoken."""
if self.encoding:
try:
return len(self.encoding.encode(text))
except:
pass
# Fallback: estimativa simples
return int(len(text.split()) * 1.3)
def format_history_for_context(self, history: List[List[str]]) -> str:
"""Formata historico para contexto."""
if not history:
return ""
context_parts = ["HISTORICO DA CONVERSA:"]
for i, (user_msg, assistant_msg) in enumerate(history, 1):
context_parts.append(f"[{i}] Usuario: {user_msg}")
context_parts.append(f"[{i}] Assistente: {assistant_msg}")
return "\n".join(context_parts)
def summarize_old_messages(self, old_history: List[List[str]]) -> str:
"""Resume mensagens antigas usando instant."""
if not old_history:
return ""
# Formata historico para resumo
history_text = ""
for user_msg, assistant_msg in old_history:
history_text += f"Usuario: {user_msg}\nAssistente: {assistant_msg}\n\n"
summary_prompt = f"""Resume o seguinte historico de conversa de forma concisa, mantendo informacoes importantes sobre o usuario e contexto relevante:
{history_text}
Crie um resumo que preserve:
- Informacoes pessoais do usuario (nome, preferencias, etc.)
- Topicos principais discutidos
- Contexto importante para conversas futuras
Resumo:"""
try:
response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["sumarizador"],
system_prompt="Voce e um especialista em resumir conversas mantendo informacoes importantes.",
user_prompt=summary_prompt,
temperature=0.3
)
if success:
return f"RESUMO DE CONVERSAS ANTERIORES:\n{response}\n\n"
else:
return ""
except:
return ""
def manage_memory(self, history: List[List[str]]) -> Tuple[List[List[str]], str]:
"""Gerencia memoria mantendo 20 mensagens e resumindo antigas - CORRIGIDO."""
if len(history) <= self.max_history:
# Se dentro do limite, retorna historico completo
context = self.format_history_for_context(history)
return history, context
# Separa mensagens antigas das recentes
old_messages = history[:-self.max_history]
recent_messages = history[-self.max_history:]
# Resume mensagens antigas
summary = self.summarize_old_messages(old_messages)
# Formata contexto com resumo + mensagens recentes
recent_context = self.format_history_for_context(recent_messages)
full_context = summary + recent_context
# Verifica se contexto nao esta muito longo
if self.count_tokens(full_context) > self.max_tokens:
# Se ainda muito longo, usa apenas mensagens recentes
context = recent_context
else:
context = full_context
return recent_messages, context
class GroqClient:
"""Cliente Groq simplificado."""
def __init__(self, api_key: str):
if not api_key:
raise ValueError("GROQ_API_KEY nao configurada")
self.client = Groq(api_key=api_key)
def clean_compound_response(self, response: str) -> str:
"""Remove metadados dos modelos compound."""
if not response:
return ""
import re
# Remove secoes de thinking
response = re.sub(r'<thinking>.*?</thinking>', '', response, flags=re.DOTALL)
response = re.sub(r'Thinking:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
# Remove informacoes de ferramentas
response = re.sub(r'Tool used:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
response = re.sub(r'Using tool:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
return response.strip()
def call_llm(self, model: str, system_prompt: str, user_prompt: str,
temperature: float = 0.7) -> Tuple[str, bool]:
"""Chama LLM com fallback."""
try:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
]
response = self.client.chat.completions.create(
model=model,
messages=messages,
temperature=temperature,
max_tokens=4000
)
content = response.choices[0].message.content
# Limpa resposta de modelos compound
if "compound" in model:
content = self.clean_compound_response(content)
return content, True
except Exception as e:
# Fallback
if model != FALLBACK_MODEL:
try:
response = self.client.chat.completions.create(
model=FALLBACK_MODEL,
messages=messages,
temperature=temperature,
max_tokens=4000
)
content = response.choices[0].message.content
return f"{content}\n\n*Via fallback: {FALLBACK_MODEL}*", True
except:
pass
return f"Erro na API: {str(e)}", False
class Router:
"""Roteador inteligente."""
def __init__(self, groq_client: GroqClient):
self.groq_client = groq_client
def route_query(self, query: str) -> Tuple[str, bool]:
"""Roteia query para IA apropriada."""
router_prompt = """Voce e um roteador inteligente de IA. Analise a pergunta do usuario e classifique em uma das categorias:
CATEGORIAS DISPONIVEIS:
- chat_geral: Conversas normais, perguntas gerais, explicacoes simples
- pesquisa_simples: Perguntas que precisam de informacoes atuais simples
- pesquisa_complexa: Pesquisas que precisam de multiplas fontes ou analise profunda
- complexo: Perguntas que precisam de raciocinio complexo ou decomposicao em subtarefas
ANALISE DE INTENCOES:
- Analise a INTENCAO REAL por tras da pergunta
- Considere o CONTEXTO COMPLETO da solicitacao
- Interprete alem das palavras literais
Responda APENAS com JSON:
{"ia": "categoria", "confianca": 0.95}
Se confianca < 0.80, use "complexo"."""
response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["roteador"],
system_prompt=router_prompt,
user_prompt=f"Pergunta: {query}",
temperature=0.3
)
if not success:
return "chat_geral", False
try:
result = json.loads(response)
category = result.get("ia", "chat_geral")
confidence = result.get("confianca", 0.5)
# Se confianca baixa, usa complexo
if confidence < 0.80:
category = "complexo"
return category, True
except:
return "chat_geral", False
def create_complex_plan(self, query: str) -> Tuple[Dict[str, Any], bool]:
"""Cria plano para query complexa."""
complex_prompt = """Voce e um planejador de IA especializado em decompor tarefas complexas.
Analise a tarefa e decomponha em subtarefas simples. Responda APENAS com JSON:
{
"objetivo_final": "descricao do objetivo",
"subgoals": [
{
"subgoal": "meta intermediaria",
"subtasks": [
{
"subtask": "tarefa especifica",
"ai": "chat_geral|pesquisa_simples|pesquisa_complexa",
"prompt": "prompt especifico para a IA"
}
]
}
],
"complexidade_extrema": false
}
Se extremamente complexo, use "complexidade_extrema": true"""
response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["decompositor_tarefas"],
system_prompt=complex_prompt,
user_prompt=f"Tarefa complexa: {query}",
temperature=0.4
)
if not success:
return {}, False
try:
plan = json.loads(response)
return plan, True
except:
return {}, False
class PenseAICore:
"""Sistema principal com memoria - CORRIGIDO."""
def __init__(self):
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY nao configurada")
self.groq_client = GroqClient(GROQ_API_KEY)
self.router = Router(self.groq_client)
self.memory_manager = MemoryManager(self.groq_client)
def process_chat(self, message: str, history: List[List[str]]) -> List[List[str]]:
"""Processa mensagem do chat com memoria - CORRIGIDO."""
try:
# Gerencia memoria ANTES de processar
managed_history, context = self.memory_manager.manage_memory(history)
# Roteamento inteligente
category, route_success = self.router.route_query(message)
if route_success and category != "complexo":
# Fluxo simples com contexto
model = MODEL_MAPPING.get(category, MODEL_MAPPING["chat_geral"])
# Monta prompt com contexto se disponivel
if context:
system_prompt = f"""Voce e um assistente inteligente. Responda de forma util e precisa.
{context}
INSTRUCOES:
- Use o historico acima para manter contexto da conversa
- Lembre-se de informacoes anteriores sobre o usuario
- Responda de forma clara e objetiva
- Seja informativo e util
- Mantenha consistencia com conversas anteriores"""
else:
system_prompt = """Voce e um assistente inteligente. Responda de forma util e precisa.
INSTRUCOES:
- Analise a INTENCAO REAL por tras da pergunta
- Responda de forma clara e objetiva
- Seja informativo e util"""
response, success = self.groq_client.call_llm(
model=model,
system_prompt=system_prompt,
user_prompt=message,
temperature=0.7
)
if not success:
response = "Desculpe, ocorreu um erro ao processar sua solicitacao."
else:
# Fluxo complexo com contexto
response = self.handle_complex_flow(message, context)
# Adiciona a historia ORIGINAL (nao a gerenciada)
history.append([message, response])
return history
except Exception as e:
error_msg = f"Erro no sistema: {str(e)}"
history.append([message, error_msg])
return history
def handle_complex_flow(self, query: str, context: str = "") -> str:
"""Processa fluxo complexo com contexto."""
plan, plan_success = self.router.create_complex_plan(query)
if not plan_success:
# Fallback para ultimo recurso com contexto
if context:
system_prompt = f"""Voce e um assistente avancado. Responda de forma completa e inteligente.
{context}
Use o contexto acima se relevante para sua resposta."""
else:
system_prompt = "Voce e um assistente avancado. Responda de forma completa e inteligente."
response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["ultimo_recurso"],
system_prompt=system_prompt,
user_prompt=query,
temperature=0.8
)
return response if success else "Nao foi possivel processar a solicitacao complexa."
if plan.get("complexidade_extrema"):
# Usa ultimo recurso com contexto
if context:
system_prompt = f"""Voce e um assistente avancado para casos extremamente complexos.
{context}
Use o contexto acima se relevante para sua resposta."""
else:
system_prompt = "Voce e um assistente avancado para casos extremamente complexos."
response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["ultimo_recurso"],
system_prompt=system_prompt,
user_prompt=query,
temperature=0.8
)
return response if success else "Nao foi possivel processar a solicitacao extremamente complexa."
# Executa plano
results = []
for subgoal in plan.get("subgoals", []):
subgoal_results = []
for subtask in subgoal.get("subtasks", []):
ai_type = subtask.get("ai", "chat_geral")
model = MODEL_MAPPING.get(ai_type, MODEL_MAPPING["chat_geral"])
prompt = subtask.get("prompt", subtask.get("subtask", ""))
# Adiciona contexto se relevante
if context:
enhanced_prompt = f"{context}\n\nTarefa: {prompt}"
else:
enhanced_prompt = prompt
result, success = self.groq_client.call_llm(
model=model,
system_prompt="Voce e um assistente especializado. Responda de forma precisa e util.",
user_prompt=enhanced_prompt,
temperature=0.7
)
if success:
subgoal_results.append(f"- {subtask.get('subtask', 'Tarefa')}: {result}")
else:
subgoal_results.append(f"- {subtask.get('subtask', 'Tarefa')}: [Erro na execucao]")
if subgoal_results:
results.append(f"**{subgoal.get('subgoal', 'Meta')}:**\n" + "\n".join(subgoal_results))
# Sintese final
if results:
synthesis_prompt = f"""Sintetize os seguintes resultados em uma resposta coesa para o usuario:
PERGUNTA ORIGINAL: {query}
RESULTADOS:
{chr(10).join(results)}
Crie uma resposta final clara, bem estruturada e util."""
if context:
synthesis_prompt = f"{context}\n\n{synthesis_prompt}"
final_response, success = self.groq_client.call_llm(
model=MODEL_MAPPING["chat_geral"],
system_prompt="Voce e um sintetizador de informacoes. Crie respostas claras e bem estruturadas.",
user_prompt=synthesis_prompt,
temperature=0.6
)
return final_response if success else "\n\n".join(results)
return "Nao foi possivel executar o plano complexo."
# Funcao de autenticacao
def authenticate(username: str, password: str) -> Tuple[bool, str]:
"""Autentica usuario."""
if username in USERS and USERS[username] == password:
return True, f"Login realizado com sucesso! Bem-vindo(a), {username}!"
else:
return False, "Credenciais invalidas. Tente novamente."
# Interface Gradio LIMPA E FOCADA
def create_interface():
"""Cria interface limpa e focada."""
if not DEPENDENCIES_OK:
return gr.Interface(
fn=lambda: "Erro: Dependencias nao instaladas (groq, tiktoken)",
inputs=[],
outputs="text",
title="Erro de Configuracao"
)
if not GROQ_API_KEY:
return gr.Interface(
fn=lambda: "ERRO: GROQ_API_KEY nao configurada!",
inputs=[],
outputs="text",
title="Erro de Configuracao"
)
# Sistema principal
system = PenseAICore()
# CSS personalizado para layout limpo
custom_css = """
.main-container {
max-width: 1000px;
margin: 0 auto;
padding: 20px;
}
.login-container {
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
border-radius: 15px;
padding: 30px;
margin: 20px 0;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
}
.chat-container {
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
border-radius: 15px;
padding: 20px;
margin: 20px 0;
box-shadow: 0 8px 25px rgba(0,0,0,0.15);
}
"""
# Interface limpa
with gr.Blocks(
title="PenseAI - Chat Inteligente",
theme=gr.themes.Soft(),
css=custom_css
) as demo:
# Header principal
gr.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #4facfe 0%, #00f2fe 100%); border-radius: 15px; margin-bottom: 20px;">
<h1 style="color: white; margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
PenseAI - Chat Inteligente
</h1>
<p style="color: rgba(255,255,255,0.9); margin: 10px 0 0 0; font-size: 1.2em;">
Roteamento Automatico + Fluxo Complexo + Memoria Contextual
</p>
</div>
""")
# Estados
authenticated = gr.State(False)
current_user = gr.State("")
# Secao de Login
with gr.Group(elem_classes=["login-container"]) as login_section:
gr.HTML("""
<div style="text-align: center; margin-bottom: 20px;">
<h2 style="color: white; margin: 0;">Acesso ao Sistema</h2>
<p style="color: rgba(255,255,255,0.8); margin: 5px 0 0 0;">Faca login para acessar o chat</p>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
pass
with gr.Column(scale=3):
username_input = gr.Dropdown(
choices=list(USERS.keys()),
label="Usuario",
value=None
)
password_input = gr.Textbox(
label="Senha",
type="password",
max_lines=1
)
login_button = gr.Button("Entrar", variant="primary", size="lg")
login_status = gr.HTML("")
with gr.Column(scale=1):
pass
# Interface principal
with gr.Group(visible=False, elem_classes=["chat-container"]) as main_section:
# Header do chat
with gr.Row():
with gr.Column(scale=3):
gr.HTML("""
<div style="color: white;">
<h2 style="margin: 0;">Chat com Roteamento e Memoria</h2>
<p style="margin: 5px 0 0 0; opacity: 0.9;">Sistema inteligente que lembra do contexto</p>
</div>
""")
with gr.Column(scale=1):
logout_button = gr.Button("Sair", variant="secondary", size="sm")
# Area do chat
chatbot = gr.Chatbot(
height=500,
show_label=False,
container=True,
bubble_full_width=False
)
# Controles de entrada
with gr.Row():
text_input = gr.Textbox(
label="",
placeholder="Digite sua mensagem... (o sistema lembra do contexto)",
scale=5,
max_lines=3,
container=False
)
send_button = gr.Button("Enviar", variant="primary", scale=1, size="lg")
# Controles
with gr.Row():
clear_button = gr.Button("Limpar Chat", variant="secondary")
# Informacoes do sistema
gr.HTML("""
<div style="color: white; text-align: center; background: rgba(255,255,255,0.1); border-radius: 10px; padding: 15px; margin-top: 15px;">
<h3 style="margin: 0 0 10px 0;">Sistema de Roteamento Inteligente:</h3>
<div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
<div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
<strong>Chat Geral</strong><br>
<small>Conversas e perguntas simples</small>
</div>
<div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
<strong>Pesquisa Simples</strong><br>
<small>Informacoes atuais basicas</small>
</div>
<div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
<strong>Pesquisa Complexa</strong><br>
<small>Analises profundas</small>
</div>
<div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
<strong>Fluxo Complexo</strong><br>
<small>Decomposicao de tarefas</small>
</div>
</div>
<p style="margin: 15px 0 0 0; opacity: 0.8;">
<strong>Memoria:</strong> Lembra das ultimas 20 conversas e resume automaticamente conversas antigas
</p>
</div>
""")
# Funcoes de evento
def handle_login(username, password):
success, message = authenticate(username, password)
if success:
status_html = f"""
<div style="color: #4CAF50; text-align: center; padding: 15px; background: rgba(76, 175, 80, 0.1); border-radius: 10px; margin-top: 15px;">
<strong>{message}</strong>
</div>
"""
return (
gr.update(visible=False),
gr.update(visible=True),
status_html,
True,
username
)
else:
status_html = f"""
<div style="color: #f44336; text-align: center; padding: 15px; background: rgba(244, 67, 54, 0.1); border-radius: 10px; margin-top: 15px;">
<strong>{message}</strong>
</div>
"""
return (
gr.update(visible=True),
gr.update(visible=False),
status_html,
False,
""
)
def handle_logout():
return (
gr.update(visible=True),
gr.update(visible=False),
"",
False,
"",
[]
)
def handle_chat(text, history, auth_state):
if not auth_state or not text.strip():
return history, ""
new_history = system.process_chat(text.strip(), history)
return new_history, ""
# Conectar eventos
login_button.click(
handle_login,
inputs=[username_input, password_input],
outputs=[login_section, main_section, login_status, authenticated, current_user]
)
logout_button.click(
handle_logout,
outputs=[login_section, main_section, login_status, authenticated, current_user, chatbot]
)
send_button.click(
handle_chat,
inputs=[text_input, chatbot, authenticated],
outputs=[chatbot, text_input]
)
text_input.submit(
handle_chat,
inputs=[text_input, chatbot, authenticated],
outputs=[chatbot, text_input]
)
clear_button.click(
lambda: [],
outputs=[chatbot]
)
return demo
# Execucao
if __name__ == "__main__":
print("===== PenseAI Chat Inteligente =====")
print("Data:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
if not GROQ_API_KEY:
print("ERRO: GROQ_API_KEY nao configurada!")
exit(1)
if not any(USERS.values()):
print("ERRO: Nenhuma senha configurada!")
exit(1)
try:
demo = create_interface()
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
show_error=True
)
except Exception as e:
print(f"Erro ao iniciar: {e}")
exit(1)