"""
PenseAI - Chat Simples com Roteamento e Memoria
Login + Chat + Roteamento + Fluxo Complexo + Memoria Funcional
"""

import gradio as gr
import os
import json
import uuid
from datetime import datetime
from typing import Dict, Any, Optional, List, Tuple

# Importacoes da Groq e tiktoken
try:
    from groq import Groq
    import tiktoken
    DEPENDENCIES_OK = True
except ImportError as e:
    print(f"Erro ao importar dependencias: {e}")
    DEPENDENCIES_OK = False

# Configuracao da API
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Sistema de usuarios
USERS = {
    "Alexsandra": os.getenv("password1", ""),
    "Ana Julia": os.getenv("password2", ""),
    "Elisa": os.getenv("password3", ""),
    "Jose Vitor": os.getenv("password4", ""),
    "Sabrina": os.getenv("password5", "")
}

# Configuracao de modelos
MODEL_MAPPING = {
    "chat_geral": "llama-3.3-70b-versatile",
    "pesquisa_simples": "compound-beta-mini", 
    "pesquisa_complexa": "compound-beta",
    "decompositor_tarefas": "deepseek-r1-distill-llama-70b",
    "ultimo_recurso": "moonshotai/kimi-k2-instruct",
    "roteador": "llama-3.3-70b-versatile",
    "sumarizador": "llama-3.1-8b-instant"
}

FALLBACK_MODEL = "llama-3.1-8b-instant"

class MemoryManager:
    """Gerenciador de memoria com tiktoken - CORRIGIDO."""
    
    def __init__(self, groq_client):
        self.groq_client = groq_client
        self.max_history = 20  # 20 primeiras perguntas e respostas
        self.max_tokens = 3000  # Limite de tokens para contexto
        try:
            self.encoding = tiktoken.get_encoding("cl100k_base")
        except:
            self.encoding = None
    
    def count_tokens(self, text: str) -> int:
        """Conta tokens usando tiktoken."""
        if self.encoding:
            try:
                return len(self.encoding.encode(text))
            except:
                pass
        # Fallback: estimativa simples
        return int(len(text.split()) * 1.3)
    
    def format_history_for_context(self, history: List[List[str]]) -> str:
        """Formata historico para contexto."""
        if not history:
            return ""
        
        context_parts = ["HISTORICO DA CONVERSA:"]
        for i, (user_msg, assistant_msg) in enumerate(history, 1):
            context_parts.append(f"[{i}] Usuario: {user_msg}")
            context_parts.append(f"[{i}] Assistente: {assistant_msg}")
        
        return "\n".join(context_parts)
    
    def summarize_old_messages(self, old_history: List[List[str]]) -> str:
        """Resume mensagens antigas usando instant."""
        if not old_history:
            return ""
        
        # Formata historico para resumo
        history_text = ""
        for user_msg, assistant_msg in old_history:
            history_text += f"Usuario: {user_msg}\nAssistente: {assistant_msg}\n\n"
        
        summary_prompt = f"""Resume o seguinte historico de conversa de forma concisa, mantendo informacoes importantes sobre o usuario e contexto relevante:

{history_text}

Crie um resumo que preserve:
- Informacoes pessoais do usuario (nome, preferencias, etc.)
- Topicos principais discutidos
- Contexto importante para conversas futuras

Resumo:"""
        
        try:
            response, success = self.groq_client.call_llm(
                model=MODEL_MAPPING["sumarizador"],
                system_prompt="Voce e um especialista em resumir conversas mantendo informacoes importantes.",
                user_prompt=summary_prompt,
                temperature=0.3
            )
            
            if success:
                return f"RESUMO DE CONVERSAS ANTERIORES:\n{response}\n\n"
            else:
                return ""
        except:
            return ""
    
    def manage_memory(self, history: List[List[str]]) -> Tuple[List[List[str]], str]:
        """Gerencia memoria mantendo 20 mensagens e resumindo antigas - CORRIGIDO."""
        
        if len(history) <= self.max_history:
            # Se dentro do limite, retorna historico completo
            context = self.format_history_for_context(history)
            return history, context
        
        # Separa mensagens antigas das recentes
        old_messages = history[:-self.max_history]
        recent_messages = history[-self.max_history:]
        
        # Resume mensagens antigas
        summary = self.summarize_old_messages(old_messages)
        
        # Formata contexto com resumo + mensagens recentes
        recent_context = self.format_history_for_context(recent_messages)
        full_context = summary + recent_context
        
        # Verifica se contexto nao esta muito longo
        if self.count_tokens(full_context) > self.max_tokens:
            # Se ainda muito longo, usa apenas mensagens recentes
            context = recent_context
        else:
            context = full_context
        
        return recent_messages, context

class GroqClient:
    """Cliente Groq simplificado."""
    
    def __init__(self, api_key: str):
        if not api_key:
            raise ValueError("GROQ_API_KEY nao configurada")
        self.client = Groq(api_key=api_key)
    
    def clean_compound_response(self, response: str) -> str:
        """Remove metadados dos modelos compound."""
        if not response:
            return ""
        
        import re
        # Remove secoes de thinking
        response = re.sub(r'<thinking>.*?</thinking>', '', response, flags=re.DOTALL)
        response = re.sub(r'Thinking:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
        
        # Remove informacoes de ferramentas
        response = re.sub(r'Tool used:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
        response = re.sub(r'Using tool:.*?(?=\n\n|\n[A-Z]|$)', '', response, flags=re.DOTALL)
        
        return response.strip()
    
    def call_llm(self, model: str, system_prompt: str, user_prompt: str, 
                 temperature: float = 0.7) -> Tuple[str, bool]:
        """Chama LLM com fallback."""
        
        try:
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
            
            response = self.client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=temperature,
                max_tokens=4000
            )
            
            content = response.choices[0].message.content
            
            # Limpa resposta de modelos compound
            if "compound" in model:
                content = self.clean_compound_response(content)
            
            return content, True
            
        except Exception as e:
            # Fallback
            if model != FALLBACK_MODEL:
                try:
                    response = self.client.chat.completions.create(
                        model=FALLBACK_MODEL,
                        messages=messages,
                        temperature=temperature,
                        max_tokens=4000
                    )
                    content = response.choices[0].message.content
                    return f"{content}\n\n*Via fallback: {FALLBACK_MODEL}*", True
                except:
                    pass
            
            return f"Erro na API: {str(e)}", False

class Router:
    """Roteador inteligente."""
    
    def __init__(self, groq_client: GroqClient):
        self.groq_client = groq_client
    
    def route_query(self, query: str) -> Tuple[str, bool]:
        """Roteia query para IA apropriada."""
        
        router_prompt = """Voce e um roteador inteligente de IA. Analise a pergunta do usuario e classifique em uma das categorias:

CATEGORIAS DISPONIVEIS:
- chat_geral: Conversas normais, perguntas gerais, explicacoes simples
- pesquisa_simples: Perguntas que precisam de informacoes atuais simples
- pesquisa_complexa: Pesquisas que precisam de multiplas fontes ou analise profunda
- complexo: Perguntas que precisam de raciocinio complexo ou decomposicao em subtarefas

ANALISE DE INTENCOES:
- Analise a INTENCAO REAL por tras da pergunta
- Considere o CONTEXTO COMPLETO da solicitacao
- Interprete alem das palavras literais

Responda APENAS com JSON:
{"ia": "categoria", "confianca": 0.95}

Se confianca < 0.80, use "complexo"."""
        
        response, success = self.groq_client.call_llm(
            model=MODEL_MAPPING["roteador"],
            system_prompt=router_prompt,
            user_prompt=f"Pergunta: {query}",
            temperature=0.3
        )
        
        if not success:
            return "chat_geral", False
        
        try:
            result = json.loads(response)
            category = result.get("ia", "chat_geral")
            confidence = result.get("confianca", 0.5)
            
            # Se confianca baixa, usa complexo
            if confidence < 0.80:
                category = "complexo"
            
            return category, True
        except:
            return "chat_geral", False
    
    def create_complex_plan(self, query: str) -> Tuple[Dict[str, Any], bool]:
        """Cria plano para query complexa."""
        
        complex_prompt = """Voce e um planejador de IA especializado em decompor tarefas complexas.

Analise a tarefa e decomponha em subtarefas simples. Responda APENAS com JSON:

{
  "objetivo_final": "descricao do objetivo",
  "subgoals": [
    {
      "subgoal": "meta intermediaria",
      "subtasks": [
        {
          "subtask": "tarefa especifica",
          "ai": "chat_geral|pesquisa_simples|pesquisa_complexa",
          "prompt": "prompt especifico para a IA"
        }
      ]
    }
  ],
  "complexidade_extrema": false
}

Se extremamente complexo, use "complexidade_extrema": true"""
        
        response, success = self.groq_client.call_llm(
            model=MODEL_MAPPING["decompositor_tarefas"],
            system_prompt=complex_prompt,
            user_prompt=f"Tarefa complexa: {query}",
            temperature=0.4
        )
        
        if not success:
            return {}, False
        
        try:
            plan = json.loads(response)
            return plan, True
        except:
            return {}, False

class PenseAICore:
    """Sistema principal com memoria - CORRIGIDO."""
    
    def __init__(self):
        if not GROQ_API_KEY:
            raise ValueError("GROQ_API_KEY nao configurada")
        
        self.groq_client = GroqClient(GROQ_API_KEY)
        self.router = Router(self.groq_client)
        self.memory_manager = MemoryManager(self.groq_client)
    
    def process_chat(self, message: str, history: List[List[str]]) -> List[List[str]]:
        """Processa mensagem do chat com memoria - CORRIGIDO."""
        
        try:
            # Gerencia memoria ANTES de processar
            managed_history, context = self.memory_manager.manage_memory(history)
            
            # Roteamento inteligente
            category, route_success = self.router.route_query(message)
            
            if route_success and category != "complexo":
                # Fluxo simples com contexto
                model = MODEL_MAPPING.get(category, MODEL_MAPPING["chat_geral"])
                
                # Monta prompt com contexto se disponivel
                if context:
                    system_prompt = f"""Voce e um assistente inteligente. Responda de forma util e precisa.

{context}

INSTRUCOES:
- Use o historico acima para manter contexto da conversa
- Lembre-se de informacoes anteriores sobre o usuario
- Responda de forma clara e objetiva
- Seja informativo e util
- Mantenha consistencia com conversas anteriores"""
                else:
                    system_prompt = """Voce e um assistente inteligente. Responda de forma util e precisa.

INSTRUCOES:
- Analise a INTENCAO REAL por tras da pergunta
- Responda de forma clara e objetiva
- Seja informativo e util"""
                
                response, success = self.groq_client.call_llm(
                    model=model,
                    system_prompt=system_prompt,
                    user_prompt=message,
                    temperature=0.7
                )
                
                if not success:
                    response = "Desculpe, ocorreu um erro ao processar sua solicitacao."
            else:
                # Fluxo complexo com contexto
                response = self.handle_complex_flow(message, context)
            
            # Adiciona a historia ORIGINAL (nao a gerenciada)
            history.append([message, response])
            
            return history
            
        except Exception as e:
            error_msg = f"Erro no sistema: {str(e)}"
            history.append([message, error_msg])
            return history
    
    def handle_complex_flow(self, query: str, context: str = "") -> str:
        """Processa fluxo complexo com contexto."""
        
        plan, plan_success = self.router.create_complex_plan(query)
        
        if not plan_success:
            # Fallback para ultimo recurso com contexto
            if context:
                system_prompt = f"""Voce e um assistente avancado. Responda de forma completa e inteligente.

{context}

Use o contexto acima se relevante para sua resposta."""
            else:
                system_prompt = "Voce e um assistente avancado. Responda de forma completa e inteligente."
            
            response, success = self.groq_client.call_llm(
                model=MODEL_MAPPING["ultimo_recurso"],
                system_prompt=system_prompt,
                user_prompt=query,
                temperature=0.8
            )
            return response if success else "Nao foi possivel processar a solicitacao complexa."
        
        if plan.get("complexidade_extrema"):
            # Usa ultimo recurso com contexto
            if context:
                system_prompt = f"""Voce e um assistente avancado para casos extremamente complexos.

{context}

Use o contexto acima se relevante para sua resposta."""
            else:
                system_prompt = "Voce e um assistente avancado para casos extremamente complexos."
            
            response, success = self.groq_client.call_llm(
                model=MODEL_MAPPING["ultimo_recurso"],
                system_prompt=system_prompt,
                user_prompt=query,
                temperature=0.8
            )
            return response if success else "Nao foi possivel processar a solicitacao extremamente complexa."
        
        # Executa plano
        results = []
        
        for subgoal in plan.get("subgoals", []):
            subgoal_results = []
            
            for subtask in subgoal.get("subtasks", []):
                ai_type = subtask.get("ai", "chat_geral")
                model = MODEL_MAPPING.get(ai_type, MODEL_MAPPING["chat_geral"])
                prompt = subtask.get("prompt", subtask.get("subtask", ""))
                
                # Adiciona contexto se relevante
                if context:
                    enhanced_prompt = f"{context}\n\nTarefa: {prompt}"
                else:
                    enhanced_prompt = prompt
                
                result, success = self.groq_client.call_llm(
                    model=model,
                    system_prompt="Voce e um assistente especializado. Responda de forma precisa e util.",
                    user_prompt=enhanced_prompt,
                    temperature=0.7
                )
                
                if success:
                    subgoal_results.append(f"- {subtask.get('subtask', 'Tarefa')}: {result}")
                else:
                    subgoal_results.append(f"- {subtask.get('subtask', 'Tarefa')}: [Erro na execucao]")
            
            if subgoal_results:
                results.append(f"**{subgoal.get('subgoal', 'Meta')}:**\n" + "\n".join(subgoal_results))
        
        # Sintese final
        if results:
            synthesis_prompt = f"""Sintetize os seguintes resultados em uma resposta coesa para o usuario:

PERGUNTA ORIGINAL: {query}

RESULTADOS:
{chr(10).join(results)}

Crie uma resposta final clara, bem estruturada e util."""
            
            if context:
                synthesis_prompt = f"{context}\n\n{synthesis_prompt}"
            
            final_response, success = self.groq_client.call_llm(
                model=MODEL_MAPPING["chat_geral"],
                system_prompt="Voce e um sintetizador de informacoes. Crie respostas claras e bem estruturadas.",
                user_prompt=synthesis_prompt,
                temperature=0.6
            )
            
            return final_response if success else "\n\n".join(results)
        
        return "Nao foi possivel executar o plano complexo."

# Funcao de autenticacao
def authenticate(username: str, password: str) -> Tuple[bool, str]:
    """Autentica usuario."""
    if username in USERS and USERS[username] == password:
        return True, f"Login realizado com sucesso! Bem-vindo(a), {username}!"
    else:
        return False, "Credenciais invalidas. Tente novamente."

# Interface Gradio LIMPA E FOCADA
def create_interface():
    """Cria interface limpa e focada."""
    
    if not DEPENDENCIES_OK:
        return gr.Interface(
            fn=lambda: "Erro: Dependencias nao instaladas (groq, tiktoken)",
            inputs=[],
            outputs="text",
            title="Erro de Configuracao"
        )
    
    if not GROQ_API_KEY:
        return gr.Interface(
            fn=lambda: "ERRO: GROQ_API_KEY nao configurada!",
            inputs=[],
            outputs="text",
            title="Erro de Configuracao"
        )
    
    # Sistema principal
    system = PenseAICore()
    
    # CSS personalizado para layout limpo
    custom_css = """
    .main-container {
        max-width: 1000px;
        margin: 0 auto;
        padding: 20px;
    }
    .login-container {
        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
        border-radius: 15px;
        padding: 30px;
        margin: 20px 0;
        box-shadow: 0 10px 30px rgba(0,0,0,0.2);
    }
    .chat-container {
        background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
        border-radius: 15px;
        padding: 20px;
        margin: 20px 0;
        box-shadow: 0 8px 25px rgba(0,0,0,0.15);
    }
    """
    
    # Interface limpa
    with gr.Blocks(
        title="PenseAI - Chat Inteligente", 
        theme=gr.themes.Soft(),
        css=custom_css
    ) as demo:
        
        # Header principal
        gr.HTML("""
        <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #4facfe 0%, #00f2fe 100%); border-radius: 15px; margin-bottom: 20px;">
            <h1 style="color: white; margin: 0; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3);">
                PenseAI - Chat Inteligente
            </h1>
            <p style="color: rgba(255,255,255,0.9); margin: 10px 0 0 0; font-size: 1.2em;">
                Roteamento Automatico + Fluxo Complexo + Memoria Contextual
            </p>
        </div>
        """)
        
        # Estados
        authenticated = gr.State(False)
        current_user = gr.State("")
        
        # Secao de Login
        with gr.Group(elem_classes=["login-container"]) as login_section:
            gr.HTML("""
            <div style="text-align: center; margin-bottom: 20px;">
                <h2 style="color: white; margin: 0;">Acesso ao Sistema</h2>
                <p style="color: rgba(255,255,255,0.8); margin: 5px 0 0 0;">Faca login para acessar o chat</p>
            </div>
            """)
            
            with gr.Row():
                with gr.Column(scale=1):
                    pass
                with gr.Column(scale=3):
                    username_input = gr.Dropdown(
                        choices=list(USERS.keys()),
                        label="Usuario",
                        value=None
                    )
                    password_input = gr.Textbox(
                        label="Senha",
                        type="password",
                        max_lines=1
                    )
                    login_button = gr.Button("Entrar", variant="primary", size="lg")
                    login_status = gr.HTML("")
                with gr.Column(scale=1):
                    pass
        
        # Interface principal
        with gr.Group(visible=False, elem_classes=["chat-container"]) as main_section:
            
            # Header do chat
            with gr.Row():
                with gr.Column(scale=3):
                    gr.HTML("""
                    <div style="color: white;">
                        <h2 style="margin: 0;">Chat com Roteamento e Memoria</h2>
                        <p style="margin: 5px 0 0 0; opacity: 0.9;">Sistema inteligente que lembra do contexto</p>
                    </div>
                    """)
                with gr.Column(scale=1):
                    logout_button = gr.Button("Sair", variant="secondary", size="sm")
            
            # Area do chat
            chatbot = gr.Chatbot(
                height=500,
                show_label=False,
                container=True,
                bubble_full_width=False
            )
            
            # Controles de entrada
            with gr.Row():
                text_input = gr.Textbox(
                    label="",
                    placeholder="Digite sua mensagem... (o sistema lembra do contexto)",
                    scale=5,
                    max_lines=3,
                    container=False
                )
                send_button = gr.Button("Enviar", variant="primary", scale=1, size="lg")
            
            # Controles
            with gr.Row():
                clear_button = gr.Button("Limpar Chat", variant="secondary")
            
            # Informacoes do sistema
            gr.HTML("""
            <div style="color: white; text-align: center; background: rgba(255,255,255,0.1); border-radius: 10px; padding: 15px; margin-top: 15px;">
                <h3 style="margin: 0 0 10px 0;">Sistema de Roteamento Inteligente:</h3>
                <div style="display: flex; justify-content: space-around; flex-wrap: wrap;">
                    <div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
                        <strong>Chat Geral</strong><br>
                        <small>Conversas e perguntas simples</small>
                    </div>
                    <div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
                        <strong>Pesquisa Simples</strong><br>
                        <small>Informacoes atuais basicas</small>
                    </div>
                    <div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
                        <strong>Pesquisa Complexa</strong><br>
                        <small>Analises profundas</small>
                    </div>
                    <div style="margin: 5px; padding: 10px; background: rgba(255,255,255,0.1); border-radius: 8px; min-width: 180px;">
                        <strong>Fluxo Complexo</strong><br>
                        <small>Decomposicao de tarefas</small>
                    </div>
                </div>
                <p style="margin: 15px 0 0 0; opacity: 0.8;">
                    <strong>Memoria:</strong> Lembra das ultimas 20 conversas e resume automaticamente conversas antigas
                </p>
            </div>
            """)
        
        # Funcoes de evento
        def handle_login(username, password):
            success, message = authenticate(username, password)
            if success:
                status_html = f"""
                <div style="color: #4CAF50; text-align: center; padding: 15px; background: rgba(76, 175, 80, 0.1); border-radius: 10px; margin-top: 15px;">
                    <strong>{message}</strong>
                </div>
                """
                return (
                    gr.update(visible=False),
                    gr.update(visible=True),
                    status_html,
                    True,
                    username
                )
            else:
                status_html = f"""
                <div style="color: #f44336; text-align: center; padding: 15px; background: rgba(244, 67, 54, 0.1); border-radius: 10px; margin-top: 15px;">
                    <strong>{message}</strong>
                </div>
                """
                return (
                    gr.update(visible=True),
                    gr.update(visible=False),
                    status_html,
                    False,
                    ""
                )
        
        def handle_logout():
            return (
                gr.update(visible=True),
                gr.update(visible=False),
                "",
                False,
                "",
                []
            )
        
        def handle_chat(text, history, auth_state):
            if not auth_state or not text.strip():
                return history, ""
            
            new_history = system.process_chat(text.strip(), history)
            return new_history, ""
        
        # Conectar eventos
        login_button.click(
            handle_login,
            inputs=[username_input, password_input],
            outputs=[login_section, main_section, login_status, authenticated, current_user]
        )
        
        logout_button.click(
            handle_logout,
            outputs=[login_section, main_section, login_status, authenticated, current_user, chatbot]
        )
        
        send_button.click(
            handle_chat,
            inputs=[text_input, chatbot, authenticated],
            outputs=[chatbot, text_input]
        )
        
        text_input.submit(
            handle_chat,
            inputs=[text_input, chatbot, authenticated],
            outputs=[chatbot, text_input]
        )
        
        clear_button.click(
            lambda: [],
            outputs=[chatbot]
        )
    
    return demo

# Execucao
if __name__ == "__main__":
    print("===== PenseAI Chat Inteligente =====")
    print("Data:", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    
    if not GROQ_API_KEY:
        print("ERRO: GROQ_API_KEY nao configurada!")
        exit(1)
    
    if not any(USERS.values()):
        print("ERRO: Nenhuma senha configurada!")
        exit(1)
    
    try:
        demo = create_interface()
        demo.launch(
            server_name="0.0.0.0",
            server_port=7860,
            share=False,
            show_error=True
        )
    except Exception as e:
        print(f"Erro ao iniciar: {e}")
        exit(1)