| """ |
| Base de conhecimento escalável. |
| =============================== |
| Carrega KB a partir de arquivo (JSON) e opcionalmente enriquece com |
| retrieval em ChromaDB (RAG). Mantém interface termo -> grau [0,1] para L3/L4. |
| """ |
|
|
| from __future__ import annotations |
| import json |
| import os |
| import re |
| from pathlib import Path |
| from typing import Any, Dict, List, Optional |
|
|
| |
| SEED_KNOWLEDGE_BASE: Dict[str, float] = { |
| "quente": 0.85, "frio": 0.85, "morno": 0.70, "aquecido": 0.80, "gelado": 0.80, |
| "temperatura": 0.90, "graus": 0.88, "escaldante": 0.75, "tépido": 0.65, |
| "verdadeiro": 0.95, "falso": 0.95, "contradição": 0.80, "proposição": 0.85, |
| "silogismo": 0.75, "conhecimento": 0.90, "inteligência": 0.85, "consciência": 0.70, |
| "razão": 0.88, "verdade": 0.92, "água": 0.95, "líquido": 0.90, "h2o": 0.90, |
| } |
|
|
|
|
| def load_kb_from_file(path: str | Path) -> Dict[str, float]: |
| """ |
| Carrega dicionário termo -> grau [0,1] de um arquivo JSON. |
| Formato esperado: {"termo1": 0.9, "termo2": 0.8, ...} |
| """ |
| path = Path(path) |
| if not path.exists(): |
| return {} |
| try: |
| with open(path, "r", encoding="utf-8") as f: |
| data = json.load(f) |
| if not isinstance(data, dict): |
| return {} |
| return {k: float(v) for k, v in data.items() if isinstance(v, (int, float))} |
| except Exception: |
| return {} |
|
|
|
|
| def merge_kb(base: Dict[str, float], extra: Dict[str, float]) -> Dict[str, float]: |
| """Mescla extra em base; em conflito, extra prevalece.""" |
| out = dict(base) |
| for k, v in extra.items(): |
| out[k] = v |
| return out |
|
|
|
|
| def enrich_kb_from_chroma( |
| query: str, |
| chroma_path: str, |
| embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2", |
| k: int = 5, |
| score_weight: float = 0.8, |
| ) -> Dict[str, float]: |
| """ |
| Busca em ChromaDB por query e retorna um dicionário termo -> peso |
| extraído dos trechos recuperados (palavras relevantes com score_weight). |
| """ |
| try: |
| from langchain_community.vectorstores import Chroma |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| except ImportError: |
| return {} |
|
|
| chroma_path = Path(chroma_path) |
| if not chroma_path.exists() or not chroma_path.is_dir(): |
| return {} |
|
|
| try: |
| embeddings = HuggingFaceEmbeddings(model_name=embedding_model) |
| vectorstore = Chroma(persist_directory=str(chroma_path), embedding_function=embeddings) |
| docs = vectorstore.similarity_search(query, k=k) |
| except Exception: |
| return {} |
|
|
| |
| term_scores: Dict[str, float] = {} |
| for d in docs: |
| text = d.page_content if hasattr(d, "page_content") else str(d) |
| words = re.findall(r"[a-záàãâéêíóôõúüç]+", text.lower()) |
| for w in words: |
| if len(w) > 2: |
| term_scores[w] = term_scores.get(w, 0) + score_weight |
| |
| if term_scores: |
| m = max(term_scores.values()) |
| term_scores = {t: min(1.0, s / m) for t, s in term_scores.items()} |
| return term_scores |
|
|
|
|
| def get_knowledge_base( |
| config: Optional[Dict[str, Any]] = None, |
| config_path: Optional[str] = None, |
| query_for_rag: Optional[str] = None, |
| ) -> Dict[str, float]: |
| """ |
| Retorna o KB a ser usado no pipeline. |
| - Se config tem knowledge_base.path, carrega desse arquivo. |
| - Se config tem chroma_path (ou agent.vector_db_path) e query_for_rag, |
| enriquece com retrieval. |
| - Fallback: SEED_KNOWLEDGE_BASE. |
| """ |
| PROJECT_ROOT = Path(__file__).resolve().parent |
| try: |
| from config_loader import load_config, PROJECT_ROOT as _root |
| PROJECT_ROOT = _root |
| if config is None: |
| config = load_config(config_path) |
| except Exception: |
| pass |
| if config is None: |
| config = {} |
|
|
| kb_path = config.get("knowledge_base", {}).get("path") or config.get("knowledge_base", {}).get("path", "") |
| chroma_path = config.get("knowledge_base", {}).get("chroma_path") or config.get("agent", {}).get("vector_db_path", "") |
|
|
| if kb_path and os.path.isabs(kb_path): |
| base = load_kb_from_file(kb_path) |
| elif kb_path: |
| base = load_kb_from_file(PROJECT_ROOT / kb_path) |
| else: |
| base = dict(SEED_KNOWLEDGE_BASE) |
|
|
| if not base: |
| base = dict(SEED_KNOWLEDGE_BASE) |
|
|
| if chroma_path and query_for_rag: |
| extra = enrich_kb_from_chroma( |
| query_for_rag, |
| chroma_path, |
| config.get("agent", {}).get("embedding_model", "sentence-transformers/all-MiniLM-L6-v2"), |
| k=5, |
| ) |
| base = merge_kb(base, extra) |
|
|
| return base |
|
|