Spaces:

Madras1
/

AetherMap

Sleeping

App Files Files Community

Madras1 commited on Dec 6, 2025

Commit

68111fb

verified ·

1 Parent(s): 0441477

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -24

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # ==============================================================================
-#  API do AetherMap — VERSÃO 7.0 (THE CONFIGURABLE COMMAND KILLER)
-#  Backend com RAG Híbrido, Citações Nativas e Stopwords via Arquivo Externo.
 # ==============================================================================
 import numpy as np
@@ -11,6 +11,7 @@ import uuid
 import os
 import json
 import logging
 import nltk
 from nltk.corpus import stopwords
@@ -28,6 +29,10 @@ from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from scipy.stats import entropy
 # A Conexão com o Oráculo
 from groq import Groq
@@ -47,6 +52,14 @@ UMAP_N_NEIGHBORS = 30
 # Cache de Sessão (Na memória RAM)
 cache: Dict[str, Any] = {}
 # Inicialização do Cliente Groq
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 try:
@@ -67,7 +80,6 @@ except Exception as e:
 def carregar_stopwords():
     """
     Carrega stop words do NLTK e combina com um arquivo externo 'stopwords.txt'.
-    Isso permite editar a lista de palavras ignoradas sem tocar no código.
     """
     logging.info("Iniciando carregamento de Stop Words...")
@@ -82,7 +94,7 @@ def carregar_stopwords():
     final_stops = set(stopwords.words('portuguese')) | set(stopwords.words('english'))
     logging.info(f"Stopwords base (NLTK) carregadas: {len(final_stops)}")
-    # 2. Base Customizada (Lendo do arquivo stopwords.txt se existir)
     arquivo_custom = "stopwords.txt"
     if os.path.exists(arquivo_custom):
@@ -91,9 +103,7 @@ def carregar_stopwords():
             count_custom = 0
             with open(arquivo_custom, "r", encoding="utf-8") as f:
                 for linha in f:
-                    # Remove comentários (#) e espaços em branco
                     palavra = linha.split('#')[0].strip().lower()
-                    # Só adiciona se não for vazia e tiver mais de 1 letra
                     if palavra and len(palavra) > 1:
                         final_stops.add(palavra)
                         count_custom += 1
@@ -101,9 +111,8 @@ def carregar_stopwords():
         except Exception as e:
             logging.error(f"Erro ao ler '{arquivo_custom}': {e}")
     else:
-        logging.warning(f"Arquivo '{arquivo_custom}' não encontrado no diretório. Usando apenas NLTK.")
-    # Converte para lista para compatibilidade com Scikit-Learn
     lista_final = list(final_stops)
     logging.info(f"Total final de Stop Words ativas: {len(lista_final)}")
     return lista_final
@@ -169,7 +178,6 @@ def calcular_metricas(textos: List[str]) -> Dict[str, Any]:
     logging.info("Calculando métricas globais...")
     if not textos: return {}
-    # Usando a lista global que combinou NLTK + Arquivo TXT
     vectorizer_count = CountVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
     vectorizer_tfidf = TfidfVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
@@ -223,7 +231,6 @@ def analisar_clusters(df: pd.DataFrame) -> Dict[str, Any]:
         textos_cluster = df[df["cluster"] == cid]["full_text"].tolist()
         if len(textos_cluster) < 2: continue
         try:
-            # Usando a lista global aqui também
             vectorizer = TfidfVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
             tfidf_matrix = vectorizer.fit_transform(textos_cluster)
             vocab = vectorizer.get_feature_names_out()
@@ -237,13 +244,18 @@ def analisar_clusters(df: pd.DataFrame) -> Dict[str, Any]:
 # ==============================================================================
-# API FASTAPI
 # ==============================================================================
-app = FastAPI(title="AetherMap API 7.0", version="7.0.0", description="Backend Semantic Search with Reranking & Configurable Stopwords")
 @app.get("/")
 async def root():
-    return {"status": "online", "message": "Aether Map API 7.0 Operacional. Use /docs para interagir."}
 @app.post("/process/")
 async def process_api(n_samples: int = Form(10000), file: UploadFile = File(...)):
@@ -287,10 +299,7 @@ async def process_api(n_samples: int = Form(10000), file: UploadFile = File(...)
 @app.post("/search/")
 async def search_api(query: str = Form(...), job_id: str = Form(...)):
     """
-    ENDPOINT DE BUSCA (RAG Híbrido)
-    1. Retrieval (Bi-Encoder) -> Top 50
-    2. Reranking (Cross-Encoder) -> Top 5
-    3. Generation (Kimi K2) -> Resposta citada [ID: X]
     """
     logging.info(f"Busca: '{query}' [Job: {job_id}]")
     if job_id not in cache:
@@ -304,18 +313,16 @@ async def search_api(query: str = Form(...), job_id: str = Form(...)):
         df = cached_data["df"]
         corpus_embeddings = cached_data["embeddings"]
-        # FASE 1: Varredura Ampla (Cosseno)
         query_embedding = model.encode([query], convert_to_numpy=True)
         similarities = cosine_similarity(query_embedding, corpus_embeddings)[0]
-        # Pega Top 50 candidatos
         top_k_retrieval = 50
         top_indices = np.argsort(similarities)[-top_k_retrieval:][::-1]
         candidate_docs = []
         candidate_indices = []
-        # Filtro de ruído (Cosseno > 0.15)
         for idx in top_indices:
             if similarities[idx] > 0.15:
                 doc_text = df.iloc[int(idx)]["full_text"]
@@ -325,7 +332,7 @@ async def search_api(query: str = Form(...), job_id: str = Form(...)):
         if not candidate_docs:
              return {"summary": "Não foram encontrados documentos relevantes.", "results": []}
-        # FASE 2: Reranking (Cross-Encoder)
         logging.info(f"Reranking {len(candidate_docs)} documentos...")
         rerank_scores = reranker.predict(candidate_docs)
@@ -335,14 +342,12 @@ async def search_api(query: str = Form(...), job_id: str = Form(...)):
             reverse=True
         )
-        # Seleciona Top 5 Campeões
         final_top_k = 5
         final_results = []
         context_parts = []
         for rank, (idx, score) in enumerate(rerank_results[:final_top_k]):
             doc_text = df.iloc[idx]["full_text"]
-            # Montagem do Contexto para Citação
             context_parts.append(f"[ID: {rank+1}] DOCUMENTO:\n{doc_text}\n---------------------")
             final_results.append({
@@ -352,7 +357,7 @@ async def search_api(query: str = Form(...), job_id: str = Form(...)):
                 "citation_id": rank + 1
             })
-        # FASE 3: Geração (Kimi K2)
         summary = ""
         if groq_client:
             context_str = "\n".join(context_parts)
@@ -370,12 +375,21 @@ async def search_api(query: str = Form(...), job_id: str = Form(...)):
             )
             try:
                 chat_completion = groq_client.chat.completions.create(
                     messages=[{"role": "user", "content": rag_prompt}],
                     model="moonshotai/kimi-k2-instruct-0905",
                     temperature=0.1,
                     max_tokens=1024
                 )
                 summary = chat_completion.choices[0].message.content.strip()
             except Exception as e:
                 logging.warning(f"Erro na geração do LLM: {e}")
@@ -425,6 +439,9 @@ async def describe_clusters_api(job_id: str = Form(...)):
             "Responda APENAS o JSON válido.\n\n" + "\n\n".join(prompt_sections)
         )
         chat_completion = groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "JSON Output Only."},
@@ -432,6 +449,10 @@ async def describe_clusters_api(job_id: str = Form(...)):
             ], model="meta-llama/llama-4-maverick-17b-128e-instruct", temperature=0.2,
         )
         response_content = chat_completion.choices[0].message.content
         insights = json.loads(response_content.strip().replace("```json", "").replace("```", ""))
         return {"insights": insights}

 # ==============================================================================
+#  API do AetherMap — VERSÃO 7.1 (OBSERVABILITY EDITION)
+#  Backend com RAG Híbrido, Citações Nativas e Monitoramento Prometheus
 # ==============================================================================
 import numpy as np
 import os
 import json
 import logging
+import time  # Adicionado para medir tempo
 import nltk
 from nltk.corpus import stopwords
 from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 from scipy.stats import entropy
+# Monitoramento (O Toque da Berta)
+from prometheus_fastapi_instrumentator import Instrumentator
+from prometheus_client import Histogram
 # A Conexão com o Oráculo
 from groq import Groq
 # Cache de Sessão (Na memória RAM)
 cache: Dict[str, Any] = {}
+# Definição de Métricas Customizadas do Prometheus
+# Isso permite separar a latência da sua lógica vs a latência da API externa
+GROQ_LATENCY = Histogram(
+    "groq_api_latency_seconds",
+    "Tempo de resposta da API externa Groq (LLM Generation)",
+    buckets=[0.1, 0.5, 1.0, 2.0, 5.0, 10.0, 20.0]
+)
 # Inicialização do Cliente Groq
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 try:
 def carregar_stopwords():
     """
     Carrega stop words do NLTK e combina com um arquivo externo 'stopwords.txt'.
     """
     logging.info("Iniciando carregamento de Stop Words...")
     final_stops = set(stopwords.words('portuguese')) | set(stopwords.words('english'))
     logging.info(f"Stopwords base (NLTK) carregadas: {len(final_stops)}")
+    # 2. Base Customizada
     arquivo_custom = "stopwords.txt"
     if os.path.exists(arquivo_custom):
             count_custom = 0
             with open(arquivo_custom, "r", encoding="utf-8") as f:
                 for linha in f:
                     palavra = linha.split('#')[0].strip().lower()
                     if palavra and len(palavra) > 1:
                         final_stops.add(palavra)
                         count_custom += 1
         except Exception as e:
             logging.error(f"Erro ao ler '{arquivo_custom}': {e}")
     else:
+        logging.warning(f"Arquivo '{arquivo_custom}' não encontrado. Usando apenas NLTK.")
     lista_final = list(final_stops)
     logging.info(f"Total final de Stop Words ativas: {len(lista_final)}")
     return lista_final
     logging.info("Calculando métricas globais...")
     if not textos: return {}
     vectorizer_count = CountVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
     vectorizer_tfidf = TfidfVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
         textos_cluster = df[df["cluster"] == cid]["full_text"].tolist()
         if len(textos_cluster) < 2: continue
         try:
             vectorizer = TfidfVectorizer(stop_words=STOP_WORDS_MULTILINGUAL, max_features=1000)
             tfidf_matrix = vectorizer.fit_transform(textos_cluster)
             vocab = vectorizer.get_feature_names_out()
 # ==============================================================================
+# API FASTAPI & INSTRUMENTAÇÃO
 # ==============================================================================
+app = FastAPI(title="AetherMap API 7.1", version="7.1.0", description="Backend Semantic Search + Prometheus Metrics")
+# --- A MÁGICA ACONTECE AQUI ---
+# Isso expõe automaticamente o endpoint /metrics para o Prometheus/Grafana
+Instrumentator().instrument(app).expose(app)
+# ------------------------------
 @app.get("/")
 async def root():
+    return {"status": "online", "message": "Aether Map API 7.1 (Observability Ready)."}
 @app.post("/process/")
 async def process_api(n_samples: int = Form(10000), file: UploadFile = File(...)):
 @app.post("/search/")
 async def search_api(query: str = Form(...), job_id: str = Form(...)):
     """
+    ENDPOINT DE BUSCA (RAG Híbrido) com Monitoramento de Latência
     """
     logging.info(f"Busca: '{query}' [Job: {job_id}]")
     if job_id not in cache:
         df = cached_data["df"]
         corpus_embeddings = cached_data["embeddings"]
+        # FASE 1: Varredura Ampla
         query_embedding = model.encode([query], convert_to_numpy=True)
         similarities = cosine_similarity(query_embedding, corpus_embeddings)[0]
         top_k_retrieval = 50
         top_indices = np.argsort(similarities)[-top_k_retrieval:][::-1]
         candidate_docs = []
         candidate_indices = []
         for idx in top_indices:
             if similarities[idx] > 0.15:
                 doc_text = df.iloc[int(idx)]["full_text"]
         if not candidate_docs:
              return {"summary": "Não foram encontrados documentos relevantes.", "results": []}
+        # FASE 2: Reranking
         logging.info(f"Reranking {len(candidate_docs)} documentos...")
         rerank_scores = reranker.predict(candidate_docs)
             reverse=True
         )
         final_top_k = 5
         final_results = []
         context_parts = []
         for rank, (idx, score) in enumerate(rerank_results[:final_top_k]):
             doc_text = df.iloc[idx]["full_text"]
             context_parts.append(f"[ID: {rank+1}] DOCUMENTO:\n{doc_text}\n---------------------")
             final_results.append({
                 "citation_id": rank + 1
             })
+        # FASE 3: Geração (Groq) com TELEMETRIA
         summary = ""
         if groq_client:
             context_str = "\n".join(context_parts)
             )
             try:
+                # --- INÍCIO DA MEDIÇÃO DA API EXTERNA ---
+                start_time_groq = time.time()
                 chat_completion = groq_client.chat.completions.create(
                     messages=[{"role": "user", "content": rag_prompt}],
                     model="moonshotai/kimi-k2-instruct-0905",
                     temperature=0.1,
                     max_tokens=1024
                 )
+                # Registra o tempo gasto apenas na chamada da API
+                duration = time.time() - start_time_groq
+                GROQ_LATENCY.observe(duration)
+                # --- FIM DA MEDIÇÃO ---
                 summary = chat_completion.choices[0].message.content.strip()
             except Exception as e:
                 logging.warning(f"Erro na geração do LLM: {e}")
             "Responda APENAS o JSON válido.\n\n" + "\n\n".join(prompt_sections)
         )
+        # --- INÍCIO DA MEDIÇÃO DA API EXTERNA ---
+        start_time_groq = time.time()
         chat_completion = groq_client.chat.completions.create(
             messages=[
                 {"role": "system", "content": "JSON Output Only."},
             ], model="meta-llama/llama-4-maverick-17b-128e-instruct", temperature=0.2,
         )
+        duration = time.time() - start_time_groq
+        GROQ_LATENCY.observe(duration)
+        # --- FIM DA MEDIÇÃO ---
         response_content = chat_completion.choices[0].message.content
         insights = json.loads(response_content.strip().replace("```json", "").replace("```", ""))
         return {"insights": insights}