Spaces:
Sleeping
Sleeping
| """ | |
| Cogni-Engine v1 — 24/7 Thinking Loop | |
| The autonomous cognitive process that never stops. | |
| 8 phases of continuous reasoning that make the AI smarter over time. | |
| Phases: | |
| 1. INGEST — Scan /data/ folder, parse new JSONL files | |
| 2. CONNECT — Find hidden connections between nodes via similarity | |
| 3. INFER — Transitive & analogical inference to discover new knowledge | |
| 4. ABSTRACT — Cluster similar nodes into higher-level abstractions | |
| 5. STRENGTHEN/WEAKEN — Reinforce used edges, decay unused ones | |
| 6. COMPRESS — Merge redundant nodes, prune dead edges | |
| 7. VALIDATE — Check logical consistency, resolve contradictions | |
| 8. SELF-QUESTION — Generate and answer internal questions to find gaps | |
| """ | |
| import os | |
| import json | |
| import time | |
| import threading | |
| import traceback | |
| from typing import List, Dict, Optional, Tuple | |
| import numpy as np | |
| import config | |
| import utils | |
| from knowledge import KnowledgeGraph, Node, Edge, ReasoningChain | |
| # ═══════════════════════════════════════════════════════════ | |
| # THINKER ENGINE | |
| # ═══════════════════════════════════════════════════════════ | |
| class Thinker: | |
| """ | |
| Autonomous thinking engine. | |
| Runs in a background thread, continuously processing | |
| and enriching the knowledge graph. | |
| """ | |
| def __init__(self, graph: KnowledgeGraph): | |
| self.graph = graph | |
| self._thread: Optional[threading.Thread] = None | |
| self._running = False | |
| self._paused = False | |
| # Thinking state | |
| self._cycle_count = 0 | |
| self._total_cycles = 0 | |
| self._current_phase = "init" | |
| self._phase_index = 0 | |
| self._interval = config.THINKING_INTERVAL_FAST | |
| self._operations_this_cycle = 0 | |
| # File tracking | |
| self._file_checksums: Dict[str, str] = {} | |
| # Phase definitions (ordered) | |
| self._phases = [ | |
| ("ingest", self._phase_ingest), | |
| ("connect", self._phase_connect), | |
| ("infer", self._phase_infer), | |
| ("abstract", self._phase_abstract), | |
| ("strengthen", self._phase_strengthen_weaken), | |
| ("compress", self._phase_compress), | |
| ("validate", self._phase_validate), | |
| ("self_question", self._phase_self_question), | |
| ] | |
| # Metrics | |
| self._metrics = { | |
| "nodes_ingested": 0, | |
| "edges_ingested": 0, | |
| "connections_found": 0, | |
| "inferences_made": 0, | |
| "abstractions_created": 0, | |
| "edges_reinforced": 0, | |
| "edges_decayed": 0, | |
| "nodes_merged": 0, | |
| "edges_pruned": 0, | |
| "nodes_pruned": 0, | |
| "contradictions_resolved": 0, | |
| "self_questions_asked": 0, | |
| "self_questions_answered": 0, | |
| } | |
| # ─────────────────────────────────────────────────── | |
| # LIFECYCLE | |
| # ─────────────────────────────────────────────────── | |
| def start(self): | |
| """Start the thinking loop in a background thread.""" | |
| if self._running: | |
| print("[THINKER] Already running.") | |
| return | |
| # Load previous state | |
| self._load_state() | |
| self._running = True | |
| self._thread = threading.Thread( | |
| target=self._thinking_loop, | |
| name="CogniThinker", | |
| daemon=True | |
| ) | |
| self._thread.start() | |
| print(f"[THINKER] Started. Resuming from cycle {self._total_cycles}.") | |
| def stop(self): | |
| """Stop the thinking loop gracefully.""" | |
| if not self._running: | |
| return | |
| print("[THINKER] Stopping...") | |
| self._running = False | |
| if self._thread: | |
| self._thread.join(timeout=30) | |
| self._save_state() | |
| print("[THINKER] Stopped.") | |
| def pause(self): | |
| """Pause thinking (for heavy API load).""" | |
| self._paused = True | |
| def resume(self): | |
| """Resume thinking.""" | |
| self._paused = False | |
| def is_running(self) -> bool: | |
| return self._running | |
| def current_phase(self) -> str: | |
| return self._current_phase | |
| def total_cycles(self) -> int: | |
| return self._total_cycles | |
| def metrics(self) -> dict: | |
| return dict(self._metrics) | |
| def get_status(self) -> dict: | |
| """Get detailed thinker status.""" | |
| return { | |
| "running": self._running, | |
| "paused": self._paused, | |
| "current_phase": self._current_phase, | |
| "cycle_count": self._cycle_count, | |
| "total_cycles": self._total_cycles, | |
| "interval_seconds": self._interval, | |
| "operations_last_cycle": self._operations_this_cycle, | |
| "metrics": dict(self._metrics) | |
| } | |
| # ─────────────────────────────────────────────────── | |
| # STATE PERSISTENCE | |
| # ─────────────────────────────────────────────────── | |
| def _load_state(self): | |
| """Load thinking state from DB.""" | |
| state = self.graph.memory.load_thinking_state() | |
| self._total_cycles = state.get("total_cycles", 0) | |
| self._cycle_count = state.get("current_cycle", 0) | |
| self._current_phase = state.get("phase", "init") | |
| saved_metrics = state.get("metrics", {}) | |
| if saved_metrics: | |
| for key in self._metrics: | |
| if key in saved_metrics: | |
| self._metrics[key] = saved_metrics[key] | |
| # Load file checksums | |
| self._file_checksums = self.graph.memory.load_file_checksums() | |
| def _save_state(self): | |
| """Save thinking state to DB.""" | |
| self.graph.memory.save_thinking_state({ | |
| "current_cycle": self._cycle_count, | |
| "total_cycles": self._total_cycles, | |
| "cursor_position": "", | |
| "phase": self._current_phase, | |
| "metrics": dict(self._metrics) | |
| }) | |
| # ─────────────────────────────────────────────────── | |
| # MAIN LOOP | |
| # ─────────────────────────────────────────────────── | |
| def _thinking_loop(self): | |
| """ | |
| Main thinking loop. Runs continuously until stopped. | |
| Cycles through all 8 phases, adapting speed based on activity. | |
| """ | |
| print("[THINKER] Thinking loop started.") | |
| while self._running: | |
| try: | |
| # Wait if paused | |
| if self._paused: | |
| time.sleep(1) | |
| continue | |
| # Execute current phase | |
| self._operations_this_cycle = 0 | |
| phase_name, phase_func = self._phases[self._phase_index] | |
| self._current_phase = phase_name | |
| try: | |
| phase_func() | |
| except Exception as e: | |
| print(f"[THINKER] Error in phase '{phase_name}': {e}") | |
| if config.LOG_THINKING_DETAILS: | |
| traceback.print_exc() | |
| # Advance to next phase | |
| self._phase_index = (self._phase_index + 1) % len(self._phases) | |
| # Count cycles (one full rotation = 1 cycle) | |
| if self._phase_index == 0: | |
| self._cycle_count += 1 | |
| self._total_cycles += 1 | |
| # Periodic state save | |
| if self._total_cycles % config.SYNC_INTERVAL_CYCLES == 0: | |
| self._save_state() | |
| self.graph.sync() | |
| # Adaptive speed | |
| self._adapt_speed() | |
| # Log progress periodically | |
| if self._total_cycles % 100 == 0 and config.LOG_THINKING_DETAILS: | |
| stats = self.graph.get_stats() | |
| score = self.graph.get_intelligence_score() | |
| print( | |
| f"[THINKER] Cycle {self._total_cycles}: " | |
| f"nodes={stats['total_nodes']}, " | |
| f"edges={stats['total_edges']}, " | |
| f"inferred={stats['inferred_edges']}, " | |
| f"score={score:.2f}" | |
| ) | |
| # Sleep between phases | |
| time.sleep(self._interval / len(self._phases)) | |
| except Exception as e: | |
| print(f"[THINKER] Loop error: {e}") | |
| traceback.print_exc() | |
| time.sleep(5) # Recovery pause | |
| print("[THINKER] Thinking loop ended.") | |
| def _adapt_speed(self): | |
| """Adjust thinking speed based on activity.""" | |
| if self._operations_this_cycle > config.THINKING_STABILITY_THRESHOLD: | |
| # Active: think faster | |
| self._interval = max( | |
| config.THINKING_INTERVAL_FAST, | |
| self._interval * 0.9 | |
| ) | |
| else: | |
| # Stable: think slower | |
| self._interval = min( | |
| config.THINKING_INTERVAL_SLOW, | |
| self._interval * 1.1 | |
| ) | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 1: INGEST | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_ingest(self): | |
| """ | |
| Scan /data/ folder for new or changed JSONL files. | |
| Parse entries and create nodes + edges in graph. | |
| """ | |
| if not os.path.exists(config.DATA_DIR): | |
| return | |
| files = [] | |
| for fname in os.listdir(config.DATA_DIR): | |
| if any(fname.endswith(ext) for ext in config.SUPPORTED_DATA_EXTENSIONS): | |
| files.append(fname) | |
| if not files: | |
| return | |
| for fname in files: | |
| filepath = os.path.join(config.DATA_DIR, fname) | |
| try: | |
| with open(filepath, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| except Exception as e: | |
| print(f"[THINKER/INGEST] Error reading {fname}: {e}") | |
| continue | |
| # Check if file has changed | |
| checksum = utils.hash_file_content(content) | |
| if self._file_checksums.get(fname) == checksum: | |
| continue # File unchanged, skip | |
| print(f"[THINKER/INGEST] Processing file: {fname}") | |
| lines = content.strip().split('\n') | |
| processed = 0 | |
| for line_num, line in enumerate(lines): | |
| if processed >= config.MAX_LINES_PER_INGEST: | |
| break | |
| line = line.strip() | |
| if not line: | |
| continue | |
| try: | |
| entry = json.loads(line) | |
| except json.JSONDecodeError: | |
| continue | |
| self._ingest_entry(entry, source=fname) | |
| processed += 1 | |
| # Mark file as processed | |
| self._file_checksums[fname] = checksum | |
| self.graph.memory.save_file_checksum(fname, checksum, processed) | |
| self._operations_this_cycle += processed | |
| print(f"[THINKER/INGEST] Processed {processed} entries from {fname}") | |
| def _ingest_entry(self, entry: dict, source: str = "data"): | |
| """ | |
| Ingest a single data entry into the knowledge graph. | |
| Creates nodes and edges based on entry type and fields. | |
| """ | |
| entry_type = entry.get("type", "fact") | |
| content = entry.get("content", "").strip() | |
| if not content: | |
| return | |
| tags = entry.get("tags", []) | |
| confidence = entry.get("confidence", config.DATA_KNOWLEDGE_CONFIDENCE) | |
| domain = entry.get("domain", "") | |
| related = entry.get("related", []) | |
| # ── Create main content node ── | |
| main_node = self.graph.add_node( | |
| content=content, | |
| node_type=self._map_entry_type_to_node_type(entry_type), | |
| source="data", | |
| weight=confidence, | |
| tags=tags | |
| ) | |
| if not main_node: | |
| return | |
| self._metrics["nodes_ingested"] += 1 | |
| # ── Handle domain as a concept node ── | |
| if domain: | |
| domain_node = self.graph.add_node( | |
| content=domain, | |
| node_type="concept", | |
| source="data", | |
| weight=0.8 | |
| ) | |
| if domain_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, | |
| to_id=domain_node.id, | |
| relation="part_of", | |
| confidence=0.8, | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── Handle related topics ── | |
| for rel_topic in related: | |
| rel_node = self.graph.add_node( | |
| content=rel_topic, | |
| node_type="concept", | |
| source="data", | |
| weight=0.7 | |
| ) | |
| if rel_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, | |
| to_id=rel_node.id, | |
| relation="related_to", | |
| confidence=0.7, | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── Type-specific handling ── | |
| self._ingest_type_specific(entry, main_node, entry_type) | |
| def _ingest_type_specific(self, entry: dict, main_node: Node, entry_type: str): | |
| """Handle type-specific fields for data entries.""" | |
| # ── relation type: explicit from/to ── | |
| if entry_type == "relation": | |
| from_content = entry.get("from", "") | |
| to_content = entry.get("to", "") | |
| relation = entry.get("relation", "related_to") | |
| if from_content and to_content: | |
| from_node = self.graph.add_node( | |
| content=from_content, node_type="entity", source="data" | |
| ) | |
| to_node = self.graph.add_node( | |
| content=to_content, node_type="entity", source="data" | |
| ) | |
| if from_node and to_node: | |
| self.graph.add_edge( | |
| from_id=from_node.id, | |
| to_id=to_node.id, | |
| relation=relation, | |
| confidence=entry.get("confidence", 0.9), | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── definition / term: term node + defined_as edge ── | |
| elif entry_type in ("definition", "term"): | |
| term = entry.get("term", "") | |
| if term: | |
| term_node = self.graph.add_node( | |
| content=term, node_type="entity", source="data" | |
| ) | |
| if term_node: | |
| self.graph.add_edge( | |
| from_id=term_node.id, | |
| to_id=main_node.id, | |
| relation="defined_as", | |
| confidence=0.95, | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── cause_effect: cause → effect ── | |
| elif entry_type == "cause_effect": | |
| cause = entry.get("cause", "") | |
| effect = entry.get("effect", "") | |
| if cause and effect: | |
| cause_node = self.graph.add_node( | |
| content=cause, node_type="concept", source="data" | |
| ) | |
| effect_node = self.graph.add_node( | |
| content=effect, node_type="concept", source="data" | |
| ) | |
| if cause_node and effect_node: | |
| self.graph.add_edge( | |
| from_id=cause_node.id, | |
| to_id=effect_node.id, | |
| relation="causes", | |
| confidence=entry.get("confidence", 0.85), | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── hierarchy: parent → children ── | |
| elif entry_type == "hierarchy": | |
| parent = entry.get("parent", "") | |
| children = entry.get("children", []) | |
| if parent and children: | |
| parent_node = self.graph.add_node( | |
| content=parent, node_type="concept", source="data" | |
| ) | |
| if parent_node: | |
| for child in children: | |
| child_node = self.graph.add_node( | |
| content=child, node_type="entity", source="data" | |
| ) | |
| if child_node: | |
| self.graph.add_edge( | |
| from_id=child_node.id, | |
| to_id=parent_node.id, | |
| relation="is_a", | |
| confidence=0.9, | |
| source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── comparison: subject_a ↔ subject_b ── | |
| elif entry_type == "comparison": | |
| subj_a = entry.get("subject_a", "") | |
| subj_b = entry.get("subject_b", "") | |
| if subj_a and subj_b: | |
| node_a = self.graph.add_node( | |
| content=subj_a, node_type="entity", source="data" | |
| ) | |
| node_b = self.graph.add_node( | |
| content=subj_b, node_type="entity", source="data" | |
| ) | |
| if node_a and node_b: | |
| self.graph.add_edge( | |
| from_id=node_a.id, to_id=node_b.id, | |
| relation="related_to", confidence=0.8, source="data" | |
| ) | |
| self.graph.add_edge( | |
| from_id=node_b.id, to_id=node_a.id, | |
| relation="related_to", confidence=0.8, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 2 | |
| # ── qa: question → answer ── | |
| elif entry_type == "qa": | |
| question = entry.get("question", "") | |
| answer = entry.get("answer", "") | |
| if question and answer: | |
| q_node = self.graph.add_node( | |
| content=question, node_type="concept", source="data" | |
| ) | |
| a_node = self.graph.add_node( | |
| content=answer, node_type="fact", source="data" | |
| ) | |
| if q_node and a_node: | |
| self.graph.add_edge( | |
| from_id=q_node.id, to_id=a_node.id, | |
| relation="defined_as", confidence=0.9, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── synonym: bidirectional synonym_of ── | |
| elif entry_type == "synonym": | |
| terms = entry.get("terms", []) | |
| for i in range(len(terms)): | |
| for j in range(i + 1, len(terms)): | |
| node_i = self.graph.add_node( | |
| content=terms[i], node_type="entity", source="data" | |
| ) | |
| node_j = self.graph.add_node( | |
| content=terms[j], node_type="entity", source="data" | |
| ) | |
| if node_i and node_j: | |
| self.graph.add_edge( | |
| from_id=node_i.id, to_id=node_j.id, | |
| relation="synonym_of", confidence=0.9, source="data" | |
| ) | |
| self.graph.add_edge( | |
| from_id=node_j.id, to_id=node_i.id, | |
| relation="synonym_of", confidence=0.9, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 2 | |
| # ── process / procedure: sequential steps ── | |
| elif entry_type in ("process", "procedure"): | |
| steps = entry.get("steps", []) | |
| title = entry.get("title", "") | |
| if title: | |
| title_node = self.graph.add_node( | |
| content=title, node_type="concept", source="data" | |
| ) | |
| if title_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, to_id=title_node.id, | |
| relation="defined_as", confidence=0.85, source="data" | |
| ) | |
| prev_step_node = None | |
| for step_text in steps: | |
| step_node = self.graph.add_node( | |
| content=step_text, node_type="fact", source="data" | |
| ) | |
| if step_node: | |
| self.graph.add_edge( | |
| from_id=step_node.id, to_id=main_node.id, | |
| relation="part_of", confidence=0.8, source="data" | |
| ) | |
| if prev_step_node: | |
| self.graph.add_edge( | |
| from_id=prev_step_node.id, to_id=step_node.id, | |
| relation="follows", confidence=0.9, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| prev_step_node = step_node | |
| self._metrics["edges_ingested"] += 1 | |
| # ── quote: author + content ── | |
| elif entry_type == "quote": | |
| author = entry.get("author", "") | |
| if author: | |
| author_node = self.graph.add_node( | |
| content=author, node_type="entity", source="data" | |
| ) | |
| if author_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, to_id=author_node.id, | |
| relation="created_by", confidence=0.9, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── event: actors, location, date ── | |
| elif entry_type == "event": | |
| actors = entry.get("actors", []) | |
| location = entry.get("location", "") | |
| for actor in actors: | |
| actor_node = self.graph.add_node( | |
| content=actor, node_type="entity", source="data" | |
| ) | |
| if actor_node: | |
| self.graph.add_edge( | |
| from_id=actor_node.id, to_id=main_node.id, | |
| relation="related_to", confidence=0.85, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| if location: | |
| loc_node = self.graph.add_node( | |
| content=location, node_type="entity", source="data" | |
| ) | |
| if loc_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, to_id=loc_node.id, | |
| relation="located_in", confidence=0.85, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── analogy: subject ↔ analogy ── | |
| elif entry_type == "analogy": | |
| subject = entry.get("subject", "") | |
| analogy_text = entry.get("analogy", "") | |
| if subject and analogy_text: | |
| subj_node = self.graph.add_node( | |
| content=subject, node_type="concept", source="data" | |
| ) | |
| ana_node = self.graph.add_node( | |
| content=analogy_text, node_type="concept", source="data" | |
| ) | |
| if subj_node and ana_node: | |
| self.graph.add_edge( | |
| from_id=subj_node.id, to_id=ana_node.id, | |
| relation="analogous_to", confidence=0.75, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| # ── Paragraph: extract keywords as connected entities ── | |
| elif entry_type == "paragraph": | |
| keywords = utils.extract_keywords(entry.get("content", ""), max_keywords=10) | |
| for kw in keywords: | |
| kw_node = self.graph.add_node( | |
| content=kw, node_type="concept", source="data", | |
| weight=0.6 | |
| ) | |
| if kw_node: | |
| self.graph.add_edge( | |
| from_id=main_node.id, to_id=kw_node.id, | |
| relation="related_to", confidence=0.6, source="data" | |
| ) | |
| self._metrics["edges_ingested"] += 1 | |
| def _map_entry_type_to_node_type(self, entry_type: str) -> str: | |
| """Map data entry type to graph node type.""" | |
| type_map = { | |
| "fact": "fact", | |
| "definition": "definition", | |
| "explanation": "fact", | |
| "description": "fact", | |
| "property": "fact", | |
| "statistic": "fact", | |
| "measurement": "fact", | |
| "term": "definition", | |
| "abbreviation": "definition", | |
| "jargon": "definition", | |
| "slang": "definition", | |
| "idiom": "definition", | |
| "synonym": "entity", | |
| "antonym": "entity", | |
| "quote": "fact", | |
| "rule": "fact", | |
| "example": "fact", | |
| "analogy": "concept", | |
| "opinion": "fact", | |
| "paragraph": "fact", | |
| "relation": "fact", | |
| "cause_effect": "fact", | |
| "comparison": "fact", | |
| "hierarchy": "concept", | |
| "composition": "concept", | |
| "dependency": "fact", | |
| "contradiction": "fact", | |
| "timeline": "fact", | |
| "process": "fact", | |
| "procedure": "fact", | |
| "event": "fact", | |
| "history": "fact", | |
| "change": "fact", | |
| "qa": "fact", | |
| } | |
| if entry_type.startswith("custom_"): | |
| return "fact" | |
| return type_map.get(entry_type, "fact") | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 2: CONNECT | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_connect(self): | |
| """ | |
| Find hidden connections between nodes. | |
| Nodes with high vector similarity but no edge → create edge. | |
| Focuses on least-connected nodes first. | |
| """ | |
| candidates = self.graph.get_least_connected_nodes( | |
| limit=config.THINKING_BATCH_SIZE | |
| ) | |
| connections_made = 0 | |
| for node in candidates: | |
| if connections_made >= config.THINKING_BATCH_SIZE: | |
| break | |
| # Find similar nodes | |
| similar = self.graph.find_similar_to_node( | |
| node.id, | |
| top_k=10, | |
| min_similarity=config.SIMILARITY_THRESHOLD | |
| ) | |
| for similar_node, similarity in similar: | |
| # Skip if edge already exists (either direction) | |
| if self.graph.edge_exists(node.id, similar_node.id): | |
| continue | |
| if self.graph.edge_exists(similar_node.id, node.id): | |
| continue | |
| # Create new connection | |
| edge = self.graph.add_edge( | |
| from_id=node.id, | |
| to_id=similar_node.id, | |
| relation="similar_to", | |
| weight=similarity, | |
| confidence=similarity * 0.9, | |
| source="inferred" | |
| ) | |
| if edge: | |
| connections_made += 1 | |
| self._operations_this_cycle += 1 | |
| if connections_made > 0: | |
| self._metrics["connections_found"] += connections_made | |
| if config.LOG_THINKING_DETAILS: | |
| print(f"[THINKER/CONNECT] Found {connections_made} new connections") | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 3: INFER | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_infer(self): | |
| """ | |
| Transitive and analogical inference. | |
| If A→B and B→C, maybe A→C. | |
| Discovers knowledge not present in original data. | |
| """ | |
| inferences_made = 0 | |
| # ── Transitive Inference ── | |
| inferences_made += self._transitive_inference() | |
| # ── Analogical Inference ── | |
| inferences_made += self._analogical_inference() | |
| if inferences_made > 0: | |
| self._metrics["inferences_made"] += inferences_made | |
| self._operations_this_cycle += inferences_made | |
| if config.LOG_THINKING_DETAILS: | |
| print(f"[THINKER/INFER] Made {inferences_made} inferences") | |
| def _transitive_inference(self) -> int: | |
| """ | |
| If A→B and B→C exist, infer A→C with decayed confidence. | |
| Limited per cycle to prevent explosion. | |
| """ | |
| count = 0 | |
| # Sample a batch of nodes to check | |
| node_ids = list(self.graph.nodes.keys()) | |
| if len(node_ids) > config.THINKING_BATCH_SIZE: | |
| sample_indices = np.random.choice( | |
| len(node_ids), config.THINKING_BATCH_SIZE, replace=False | |
| ) | |
| node_ids = [node_ids[i] for i in sample_indices] | |
| for node_a_id in node_ids: | |
| if count >= config.MAX_INFERENCES_PER_CYCLE: | |
| break | |
| edges_ab = self.graph.get_edges_from(node_a_id) | |
| for edge_ab in edges_ab: | |
| if count >= config.MAX_INFERENCES_PER_CYCLE: | |
| break | |
| node_b_id = edge_ab.to_node | |
| edges_bc = self.graph.get_edges_from(node_b_id) | |
| for edge_bc in edges_bc: | |
| node_c_id = edge_bc.to_node | |
| # Skip self-loops and existing edges | |
| if node_c_id == node_a_id: | |
| continue | |
| if self.graph.edge_exists(node_a_id, node_c_id): | |
| continue | |
| # Calculate inferred confidence | |
| inferred_confidence = ( | |
| edge_ab.confidence * | |
| edge_bc.confidence * | |
| config.INFERENCE_DECAY | |
| ) | |
| if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN: | |
| continue | |
| # Determine inferred relation | |
| inferred_relation = self._infer_relation( | |
| edge_ab.relation, edge_bc.relation | |
| ) | |
| # Create inferred edge | |
| edge = self.graph.add_edge( | |
| from_id=node_a_id, | |
| to_id=node_c_id, | |
| relation=inferred_relation, | |
| weight=inferred_confidence, | |
| confidence=inferred_confidence, | |
| source="inferred" | |
| ) | |
| if edge: | |
| count += 1 | |
| if count >= config.MAX_INFERENCES_PER_CYCLE: | |
| break | |
| return count | |
| def _analogical_inference(self) -> int: | |
| """ | |
| If A relates to B like C relates to ?, find ? using vector arithmetic. | |
| A - B ≈ C - ? → ? ≈ C - A + B | |
| """ | |
| count = 0 | |
| # Find pairs with strong, specific relations | |
| strong_edges = [ | |
| e for e in self.graph.edges.values() | |
| if e.confidence > 0.7 and e.relation not in ("similar_to", "related_to") | |
| ] | |
| if len(strong_edges) < 2: | |
| return 0 | |
| # Sample pairs to compare | |
| sample_size = min(20, len(strong_edges)) | |
| sampled = np.random.choice(len(strong_edges), sample_size, replace=False) | |
| for i in sampled: | |
| if count >= config.MAX_INFERENCES_PER_CYCLE // 4: | |
| break | |
| edge = strong_edges[i] | |
| node_a = self.graph.get_node(edge.from_node) | |
| node_b = self.graph.get_node(edge.to_node) | |
| if not node_a or not node_b: | |
| continue | |
| # Find nodes similar to A (potential C candidates) | |
| similar_to_a = self.graph.find_similar_to_node( | |
| node_a.id, top_k=5, | |
| min_similarity=config.ANALOGICAL_SIMILARITY_MIN | |
| ) | |
| for node_c, sim_ac in similar_to_a: | |
| if node_c.id == node_b.id: | |
| continue | |
| # Vector arithmetic: ? ≈ C - A + B | |
| target_vector = utils.normalize( | |
| node_c.vector - node_a.vector + node_b.vector | |
| ) | |
| # Find nearest to target vector | |
| candidates = self.graph.find_similar_nodes( | |
| target_vector, top_k=3, | |
| min_similarity=config.ANALOGICAL_SIMILARITY_MIN, | |
| exclude_ids={node_a.id, node_b.id, node_c.id} | |
| ) | |
| for candidate_node, sim_score in candidates: | |
| if self.graph.edge_exists(node_c.id, candidate_node.id, edge.relation): | |
| continue | |
| inferred_confidence = sim_ac * sim_score * config.INFERENCE_DECAY | |
| if inferred_confidence < config.INFERENCE_CONFIDENCE_MIN: | |
| continue | |
| new_edge = self.graph.add_edge( | |
| from_id=node_c.id, | |
| to_id=candidate_node.id, | |
| relation=edge.relation, | |
| weight=inferred_confidence, | |
| confidence=inferred_confidence, | |
| source="inferred" | |
| ) | |
| if new_edge: | |
| count += 1 | |
| break # One analogy per C | |
| return count | |
| def _infer_relation(self, rel_ab: str, rel_bc: str) -> str: | |
| """Determine relation type for transitive inference A→C from A→B→C.""" | |
| # Same relation → same | |
| if rel_ab == rel_bc: | |
| return rel_ab | |
| # Specific known transitive patterns | |
| transitive_map = { | |
| ("is_a", "is_a"): "is_a", | |
| ("part_of", "part_of"): "part_of", | |
| ("is_a", "has"): "has", | |
| ("is_a", "located_in"): "located_in", | |
| ("part_of", "located_in"): "located_in", | |
| ("is_a", "used_for"): "used_for", | |
| ("causes", "causes"): "causes", | |
| ("follows", "follows"): "follows", | |
| ("requires", "requires"): "requires", | |
| ("instance_of", "is_a"): "instance_of", | |
| } | |
| return transitive_map.get((rel_ab, rel_bc), "inferred_relation") | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 4: ABSTRACT | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_abstract(self): | |
| """ | |
| Cluster similar nodes into abstraction nodes. | |
| Creates higher-level concepts from concrete instances. | |
| Recursive: abstractions can be abstracted further. | |
| """ | |
| # Skip if graph is too small | |
| if len(self.graph.nodes) < config.CLUSTER_MIN_SIZE * 2: | |
| return | |
| abstractions_created = 0 | |
| # ── Level 1: Concrete → Abstraction ── | |
| abstractions_created += self._create_abstractions( | |
| source_types=["entity", "fact", "concept"], | |
| abstraction_type="abstraction" | |
| ) | |
| # ── Level 2+: Abstraction → Meta-Abstraction ── | |
| if self._total_cycles % (config.COMPRESS_INTERVAL * 2) == 0: | |
| existing_abstractions = self.graph.get_nodes_by_type("abstraction") | |
| if len(existing_abstractions) >= config.CLUSTER_MIN_SIZE * 2: | |
| abstractions_created += self._create_abstractions( | |
| source_types=["abstraction"], | |
| abstraction_type="meta_abstraction" | |
| ) | |
| if abstractions_created > 0: | |
| self._metrics["abstractions_created"] += abstractions_created | |
| self._operations_this_cycle += abstractions_created | |
| if config.LOG_THINKING_DETAILS: | |
| print(f"[THINKER/ABSTRACT] Created {abstractions_created} abstractions") | |
| def _create_abstractions( | |
| self, | |
| source_types: List[str], | |
| abstraction_type: str | |
| ) -> int: | |
| """Create abstraction nodes from clusters of source-typed nodes.""" | |
| # Gather source nodes | |
| source_nodes = [] | |
| for stype in source_types: | |
| source_nodes.extend(self.graph.get_nodes_by_type(stype)) | |
| if len(source_nodes) < config.CLUSTER_MIN_SIZE: | |
| return 0 | |
| # Build vector matrix for clustering | |
| vectors = np.array( | |
| [n.vector for n in source_nodes], | |
| dtype=np.float32 | |
| ) | |
| node_ids = [n.id for n in source_nodes] | |
| # Find natural clusters | |
| clusters = utils.find_natural_clusters( | |
| vectors, | |
| similarity_threshold=config.CLUSTER_SIMILARITY_INTRA | |
| ) | |
| count = 0 | |
| for cluster_indices in clusters: | |
| if count >= config.THINKING_BATCH_SIZE // 2: | |
| break | |
| # Check if this cluster already has an abstraction | |
| member_ids = [node_ids[i] for i in cluster_indices] | |
| already_abstracted = False | |
| for mid in member_ids: | |
| for edge in self.graph.get_edges_from(mid): | |
| if edge.relation == "instance_of": | |
| already_abstracted = True | |
| break | |
| if already_abstracted: | |
| break | |
| if already_abstracted: | |
| continue | |
| # Compute centroid | |
| cluster_vectors = vectors[cluster_indices] | |
| centroid = utils.vector_mean(list(cluster_vectors)) | |
| # Generate label from common keywords | |
| all_content = " ".join( | |
| self.graph.nodes[node_ids[i]].content | |
| for i in cluster_indices | |
| if node_ids[i] in self.graph.nodes | |
| ) | |
| keywords = utils.extract_keywords(all_content, max_keywords=5) | |
| label = " + ".join(keywords[:3]) if keywords else "abstract_concept" | |
| # Check depth limit | |
| current_depth = 0 | |
| if abstraction_type == "meta_abstraction": | |
| for i in cluster_indices: | |
| nid = node_ids[i] | |
| depth = self.graph._get_abstraction_depth(nid) | |
| current_depth = max(current_depth, depth) | |
| if current_depth >= config.MAX_ABSTRACTION_DEPTH: | |
| continue | |
| # Create abstraction node | |
| abs_node = self.graph.add_node( | |
| content=f"[{abstraction_type}] {label}", | |
| node_type=abstraction_type, | |
| source="inferred", | |
| vector=utils.normalize(centroid), | |
| weight=config.ABSTRACTION_MIN_CONFIDENCE | |
| ) | |
| if not abs_node: | |
| continue | |
| # Link members to abstraction | |
| for i in cluster_indices: | |
| member_id = node_ids[i] | |
| self.graph.add_edge( | |
| from_id=member_id, | |
| to_id=abs_node.id, | |
| relation="instance_of", | |
| weight=0.8, | |
| confidence=0.8, | |
| source="inferred" | |
| ) | |
| count += 1 | |
| return count | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 5: STRENGTHEN / WEAKEN | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_strengthen_weaken(self): | |
| """ | |
| Strengthen edges that are frequently used. | |
| Weaken edges that haven't been used. | |
| Nodes with more connections get slight weight boost. | |
| """ | |
| # ── Weaken unused edges (periodic) ── | |
| if self._total_cycles % config.WEIGHT_DECAY_INTERVAL_CYCLES == 0: | |
| decay_count = 0 | |
| edges = list(self.graph.edges.values()) | |
| for edge in edges: | |
| if edge.used_count == 0 and edge.source == "inferred": | |
| self.graph.decay_edge(edge.id) | |
| decay_count += 1 | |
| self._metrics["edges_decayed"] += decay_count | |
| self._operations_this_cycle += decay_count | |
| if config.LOG_THINKING_DETAILS and decay_count > 0: | |
| print(f"[THINKER/WEAKEN] Decayed {decay_count} unused edges") | |
| # ── Boost well-connected nodes ── | |
| nodes = list(self.graph.nodes.values()) | |
| sample_size = min(config.THINKING_BATCH_SIZE, len(nodes)) | |
| if sample_size == 0: | |
| return | |
| sampled = np.random.choice(len(nodes), sample_size, replace=False) | |
| reinforced = 0 | |
| for idx in sampled: | |
| node = nodes[idx] | |
| if node.connections > 3: | |
| bonus = config.NODE_WEIGHT_CONNECTION_BONUS * min(node.connections, 20) | |
| new_weight = min(node.weight + bonus, config.WEIGHT_MAX) | |
| if new_weight != node.weight: | |
| self.graph.update_node_weight(node.id, new_weight) | |
| reinforced += 1 | |
| self._metrics["edges_reinforced"] += reinforced | |
| self._operations_this_cycle += reinforced | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 6: COMPRESS | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_compress(self): | |
| """ | |
| Merge redundant nodes. | |
| Prune dead edges. | |
| Prune orphan nodes. | |
| Keep the graph efficient and clean. | |
| """ | |
| if self._total_cycles % config.COMPRESS_INTERVAL != 0: | |
| return | |
| # ── Merge redundant nodes ── | |
| redundant_pairs = self.graph.find_redundant_pairs(limit=10) | |
| merged = 0 | |
| for id_keep, id_remove, similarity in redundant_pairs: | |
| if self.graph.merge_nodes(id_keep, id_remove): | |
| merged += 1 | |
| # ── Prune weak edges ── | |
| pruned_edges = self.graph.prune_weak_edges() | |
| # ── Prune orphan nodes ── | |
| pruned_nodes = self.graph.prune_orphan_nodes() | |
| self._metrics["nodes_merged"] += merged | |
| self._metrics["edges_pruned"] += pruned_edges | |
| self._metrics["nodes_pruned"] += pruned_nodes | |
| self._operations_this_cycle += merged + pruned_edges + pruned_nodes | |
| total_ops = merged + pruned_edges + pruned_nodes | |
| if config.LOG_THINKING_DETAILS and total_ops > 0: | |
| print( | |
| f"[THINKER/COMPRESS] Merged {merged} nodes, " | |
| f"pruned {pruned_edges} edges, {pruned_nodes} orphan nodes" | |
| ) | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 7: VALIDATE | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_validate(self): | |
| """ | |
| Check logical consistency of the graph. | |
| Resolve contradictions. | |
| Detect and break circular inferences. | |
| """ | |
| if self._total_cycles % config.VALIDATE_INTERVAL != 0: | |
| return | |
| resolved = 0 | |
| # ── Detect contradictions ── | |
| resolved += self._resolve_contradictions() | |
| # ── Detect circular inferences ── | |
| resolved += self._break_circular_inferences() | |
| if resolved > 0: | |
| self._metrics["contradictions_resolved"] += resolved | |
| self._operations_this_cycle += resolved | |
| if config.LOG_THINKING_DETAILS: | |
| print(f"[THINKER/VALIDATE] Resolved {resolved} issues") | |
| def _resolve_contradictions(self) -> int: | |
| """ | |
| Find and resolve contradictions. | |
| If A→B (positive) and A→¬B (opposite_of) exist, keep higher confidence. | |
| """ | |
| resolved = 0 | |
| # Sample nodes to check | |
| node_ids = list(self.graph.nodes.keys()) | |
| sample_size = min(config.THINKING_BATCH_SIZE, len(node_ids)) | |
| if sample_size == 0: | |
| return 0 | |
| sampled = np.random.choice(len(node_ids), sample_size, replace=False) | |
| for idx in sampled: | |
| node_id = node_ids[idx] | |
| edges_out = self.graph.get_edges_from(node_id) | |
| # Group edges by target | |
| target_edges: Dict[str, List[Edge]] = {} | |
| for edge in edges_out: | |
| key = edge.to_node | |
| if key not in target_edges: | |
| target_edges[key] = [] | |
| target_edges[key].append(edge) | |
| # Check for contradictory relations to same target | |
| for target_id, edges in target_edges.items(): | |
| if len(edges) < 2: | |
| continue | |
| # Check for opposing relations | |
| contradictory_pairs = { | |
| ("causes", "prevents"), | |
| ("is_a", "opposite_of"), | |
| ("synonym_of", "opposite_of"), | |
| ("requires", "prevents"), | |
| } | |
| for i in range(len(edges)): | |
| for j in range(i + 1, len(edges)): | |
| pair = (edges[i].relation, edges[j].relation) | |
| reverse_pair = (edges[j].relation, edges[i].relation) | |
| if pair in contradictory_pairs or reverse_pair in contradictory_pairs: | |
| # Keep higher confidence, remove lower | |
| if edges[i].confidence >= edges[j].confidence: | |
| self.graph.remove_edge(edges[j].id) | |
| else: | |
| self.graph.remove_edge(edges[i].id) | |
| resolved += 1 | |
| return resolved | |
| def _break_circular_inferences(self) -> int: | |
| """ | |
| Detect inference chains that loop back on themselves. | |
| Break the weakest link in each cycle. | |
| """ | |
| broken = 0 | |
| # Sample inferred edges | |
| inferred_edges = [ | |
| e for e in self.graph.edges.values() | |
| if e.source == "inferred" | |
| ] | |
| sample_size = min(config.THINKING_BATCH_SIZE, len(inferred_edges)) | |
| if sample_size == 0: | |
| return 0 | |
| sampled_indices = np.random.choice( | |
| len(inferred_edges), sample_size, replace=False | |
| ) | |
| for idx in sampled_indices: | |
| edge = inferred_edges[idx] | |
| # Check if this edge creates a cycle | |
| # Simple: does a path exist from to_node back to from_node? | |
| paths = self.graph.find_paths( | |
| edge.to_node, edge.from_node, | |
| max_depth=4, max_paths=1 | |
| ) | |
| if paths: | |
| # Cycle detected — remove weakest edge in cycle | |
| cycle_path = [edge.from_node, edge.id] + paths[0] | |
| weakest_edge_id = None | |
| weakest_weight = float('inf') | |
| for item_id in cycle_path: | |
| if item_id in self.graph.edges: | |
| e = self.graph.edges[item_id] | |
| if e.weight < weakest_weight and e.source == "inferred": | |
| weakest_weight = e.weight | |
| weakest_edge_id = e.id | |
| if weakest_edge_id: | |
| self.graph.remove_edge(weakest_edge_id) | |
| broken += 1 | |
| return broken | |
| # ═══════════════════════════════════════════════════ | |
| # PHASE 8: SELF-QUESTION | |
| # ═══════════════════════════════════════════════════ | |
| def _phase_self_question(self): | |
| """ | |
| Generate internal questions to fill knowledge gaps. | |
| Ask: "What connects X to Y?" where X and Y are distant but possibly related. | |
| If a new chain is found, save it. | |
| """ | |
| if self._total_cycles % config.SELF_QUESTION_INTERVAL != 0: | |
| return | |
| if len(self.graph.nodes) < 10: | |
| return | |
| questions_asked = 0 | |
| questions_answered = 0 | |
| # Strategy 1: Find disconnected clusters and try to bridge them | |
| questions_answered += self._bridge_disconnected() | |
| questions_asked += 3 | |
| # Strategy 2: Explore high-weight nodes that lack certain relation types | |
| questions_answered += self._fill_relation_gaps() | |
| questions_asked += 3 | |
| # Strategy 3: Challenge existing weak inferences | |
| questions_answered += self._challenge_weak_inferences() | |
| questions_asked += 2 | |
| self._metrics["self_questions_asked"] += questions_asked | |
| self._metrics["self_questions_answered"] += questions_answered | |
| self._operations_this_cycle += questions_answered | |
| if config.LOG_THINKING_DETAILS and questions_answered > 0: | |
| print( | |
| f"[THINKER/SELF-Q] Asked {questions_asked} questions, " | |
| f"answered {questions_answered}" | |
| ) | |
| def _bridge_disconnected(self) -> int: | |
| """Try to find connections between disconnected subgraphs.""" | |
| connected = 0 | |
| # Pick two random nodes that have no path between them | |
| node_ids = list(self.graph.nodes.keys()) | |
| if len(node_ids) < 10: | |
| return 0 | |
| for _ in range(3): | |
| idx = np.random.choice(len(node_ids), 2, replace=False) | |
| id_a, id_b = node_ids[idx[0]], node_ids[idx[1]] | |
| node_a = self.graph.get_node(id_a) | |
| node_b = self.graph.get_node(id_b) | |
| if not node_a or not node_b: | |
| continue | |
| # Are they already connected? | |
| paths = self.graph.find_paths(id_a, id_b, max_depth=4, max_paths=1) | |
| if paths: | |
| continue | |
| # Can we connect them via vector similarity? | |
| sim = utils.cosine_similarity(node_a.vector, node_b.vector) | |
| if sim > config.SIMILARITY_THRESHOLD * 0.8: | |
| # They're somewhat similar but not connected → connect | |
| edge = self.graph.add_edge( | |
| from_id=id_a, | |
| to_id=id_b, | |
| relation="inferred_relation", | |
| weight=sim * 0.7, | |
| confidence=sim * 0.6, | |
| source="inferred" | |
| ) | |
| if edge: | |
| connected += 1 | |
| return connected | |
| def _fill_relation_gaps(self) -> int: | |
| """Find high-weight nodes missing common relations and try to fill them.""" | |
| filled = 0 | |
| # Get well-known nodes | |
| important_nodes = sorted( | |
| self.graph.nodes.values(), | |
| key=lambda n: n.weight * n.connections, | |
| reverse=True | |
| )[:20] | |
| common_relations = ["is_a", "part_of", "has", "used_for", "related_to"] | |
| for node in important_nodes[:5]: | |
| existing_relations = set() | |
| for edge in self.graph.get_edges_from(node.id): | |
| existing_relations.add(edge.relation) | |
| for relation in common_relations: | |
| if relation in existing_relations: | |
| continue | |
| # Can we find a target for this relation via similarity? | |
| # Look for nodes that commonly have this relation | |
| candidates = self.graph.find_similar_to_node( | |
| node.id, top_k=5, | |
| min_similarity=config.SIMILARITY_THRESHOLD | |
| ) | |
| for candidate, sim in candidates: | |
| # Check if candidate has this relation type outgoing | |
| candidate_rels = [ | |
| e.relation for e in self.graph.get_edges_from(candidate.id) | |
| ] | |
| if relation in candidate_rels: | |
| # This candidate has the relation → node might too | |
| for edge in self.graph.get_edges_from(candidate.id): | |
| if edge.relation == relation: | |
| target = self.graph.get_node(edge.to_node) | |
| if target and not self.graph.edge_exists( | |
| node.id, target.id, relation | |
| ): | |
| confidence = sim * edge.confidence * config.INFERENCE_DECAY | |
| if confidence >= config.INFERENCE_CONFIDENCE_MIN: | |
| new_edge = self.graph.add_edge( | |
| from_id=node.id, | |
| to_id=target.id, | |
| relation=relation, | |
| weight=confidence, | |
| confidence=confidence, | |
| source="inferred" | |
| ) | |
| if new_edge: | |
| filled += 1 | |
| break | |
| break # One fill per missing relation | |
| if filled >= 5: | |
| break | |
| return filled | |
| def _challenge_weak_inferences(self) -> int: | |
| """ | |
| Re-examine weak inferred edges. | |
| If supporting evidence exists, strengthen. | |
| If contradicting evidence exists, remove. | |
| """ | |
| improved = 0 | |
| weak_edges = self.graph.get_weakest_edges( | |
| limit=20, source_filter="inferred" | |
| ) | |
| for edge in weak_edges: | |
| from_node = self.graph.get_node(edge.from_node) | |
| to_node = self.graph.get_node(edge.to_node) | |
| if not from_node or not to_node: | |
| continue | |
| # Check if there's additional evidence | |
| # (other paths between these nodes) | |
| paths = self.graph.find_paths( | |
| edge.from_node, edge.to_node, | |
| max_depth=4, max_paths=3 | |
| ) | |
| # Filter paths that don't use this edge | |
| alternative_paths = [ | |
| p for p in paths | |
| if edge.id not in p | |
| ] | |
| if alternative_paths: | |
| # Multiple paths support this edge → strengthen | |
| support_factor = 1.0 + 0.05 * len(alternative_paths) | |
| new_weight = min( | |
| edge.weight * support_factor, | |
| config.WEIGHT_MAX | |
| ) | |
| self.graph.edges[edge.id].weight = new_weight | |
| self.graph.edges[edge.id].confidence = min( | |
| edge.confidence * support_factor, 1.0 | |
| ) | |
| self.graph.edges[edge.id].mark_dirty() | |
| self.graph.memory.save_edge(edge.to_dict()) | |
| improved += 1 | |
| else: | |
| # No alternative support → further weaken | |
| if edge.weight < config.PRUNE_WEIGHT_THRESHOLD * 2: | |
| self.graph.remove_edge(edge.id) | |
| improved += 1 | |
| return improved | |
| # ═══════════════════════════════════════════════════ | |
| # USER KNOWLEDGE EXTRACTION | |
| # ═══════════════════════════════════════════════════ | |
| def extract_from_user_message(self, message: str): | |
| """ | |
| Extract knowledge from a user's chat message. | |
| Called by brain.py after processing a user request. | |
| Does NOT store the raw message — only extracted knowledge. | |
| """ | |
| if not message or len(message.strip()) < 10: | |
| return | |
| message = message.strip() | |
| # Extract keywords | |
| keywords = utils.extract_keywords(message, max_keywords=15) | |
| if len(keywords) < 2: | |
| return | |
| # Extract entities | |
| entities = utils.extract_entities_simple(message) | |
| # Create entity nodes | |
| entity_nodes = [] | |
| for entity in entities[:5]: | |
| node = self.graph.add_node( | |
| content=entity, | |
| node_type="entity", | |
| source="user_chat", | |
| weight=config.USER_KNOWLEDGE_CONFIDENCE | |
| ) | |
| if node: | |
| entity_nodes.append(node) | |
| # Create concept nodes from keywords not already entities | |
| entity_lower = {e.lower() for e in entities} | |
| for kw in keywords: | |
| if kw.lower() not in entity_lower: | |
| node = self.graph.add_node( | |
| content=kw, | |
| node_type="concept", | |
| source="user_chat", | |
| weight=config.USER_KNOWLEDGE_CONFIDENCE * 0.7 | |
| ) | |
| # If message contains informational content, create fact node | |
| if len(message) > 30 and any( | |
| p in message.lower() for p in [ | |
| "adalah", "merupakan", "yaitu", "ialah", | |
| "is", "are", "means", "defined" | |
| ] | |
| ): | |
| fact_node = self.graph.add_node( | |
| content=message[:500], | |
| node_type="fact", | |
| source="user_chat", | |
| weight=config.USER_KNOWLEDGE_CONFIDENCE | |
| ) | |
| # Connect fact to entities mentioned | |
| if fact_node: | |
| for en in entity_nodes: | |
| self.graph.add_edge( | |
| from_id=fact_node.id, | |
| to_id=en.id, | |
| relation="related_to", | |
| confidence=config.USER_KNOWLEDGE_CONFIDENCE * 0.8, | |
| source="user_chat" | |
| ) |