namish10 commited on
Commit
eba543b
·
verified ·
1 Parent(s): 22147a6

Upload app/agents/knowledge_graph_agent.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app/agents/knowledge_graph_agent.py +419 -0
app/agents/knowledge_graph_agent.py ADDED
@@ -0,0 +1,419 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Knowledge Graph Agent with GraphRAG
3
+
4
+ Manages the user's knowledge graph using GraphRAG:
5
+ - Nodes: concepts, doubts, topics, resources
6
+ - Edges: relationships, dependencies, associations
7
+ - GraphRAG for retrieval and generation
8
+ """
9
+
10
+ from typing import Dict, List, Any, Optional
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+ import json
14
+
15
+
16
+ @dataclass
17
+ class GraphNode:
18
+ """Knowledge graph node"""
19
+ node_id: str
20
+ node_type: str
21
+ label: str
22
+ properties: Dict = field(default_factory=dict)
23
+ embeddings: Optional[List[float]] = None
24
+ created_at: datetime = field(default_factory=datetime.now)
25
+
26
+
27
+ @dataclass
28
+ class GraphEdge:
29
+ """Knowledge graph edge"""
30
+ edge_id: str
31
+ source_id: str
32
+ target_id: str
33
+ relation_type: str
34
+ weight: float = 1.0
35
+ properties: Dict = field(default_factory=dict)
36
+ created_at: datetime = field(default_factory=datetime.now)
37
+
38
+
39
+ @dataclass
40
+ class Ontology:
41
+ """Domain ontology for topic structure"""
42
+ entity_types: List[Dict] = field(default_factory=list)
43
+ relation_types: List[Dict] = field(default_factory=list)
44
+
45
+
46
+ class KnowledgeGraphAgent:
47
+ """
48
+ Agent that manages the knowledge graph with GraphRAG capabilities.
49
+
50
+ Features:
51
+ - Entity extraction from doubts and notes
52
+ - Relationship discovery
53
+ - Graph-based retrieval
54
+ - Path finding between concepts
55
+ - Ontology generation
56
+ """
57
+
58
+ def __init__(self, user_id: str, config: Optional[Dict] = None):
59
+ self.user_id = user_id
60
+ self.config = config or {}
61
+
62
+ self.nodes: Dict[str, GraphNode] = {}
63
+ self.edges: Dict[str, GraphEdge] = {}
64
+
65
+ self.graph_id = f"cf_graph_{user_id}_{datetime.now().timestamp()}"
66
+
67
+ self._initialize_default_ontology()
68
+
69
+ def _initialize_default_ontology(self):
70
+ """Initialize default learning ontology"""
71
+ self.ontology = Ontology(
72
+ entity_types=[
73
+ {'name': 'Concept', 'description': 'A learning concept or topic'},
74
+ {'name': 'Doubt', 'description': 'A question or confusion point'},
75
+ {'name': 'Resource', 'description': 'Learning resource or material'},
76
+ {'name': 'Topic', 'description': 'Main subject area'},
77
+ {'name': 'Skill', 'description': 'Developed skill or competency'}
78
+ ],
79
+ relation_types=[
80
+ {'name': 'prerequisite_of', 'description': 'Is prerequisite for'},
81
+ {'name': 'related_to', 'description': 'Is related to'},
82
+ {'name': 'part_of', 'description': 'Is part of'},
83
+ {'name': 'helps_understand', 'description': 'Helps understand'},
84
+ {'name': 'contrasts_with', 'description': 'Contrasts with'}
85
+ ]
86
+ )
87
+
88
+ def add_doubt_to_graph(self, doubt_data: Dict) -> GraphNode:
89
+ """Add a captured doubt to the knowledge graph"""
90
+ node_id = f"doubt_{doubt_data.get('id', datetime.now().timestamp())}"
91
+
92
+ concept_tags = doubt_data.get('conceptTags', [])
93
+
94
+ node = GraphNode(
95
+ node_id=node_id,
96
+ node_type='Doubt',
97
+ label=doubt_data.get('formattedTitle', doubt_data.get('rawText', '')),
98
+ properties={
99
+ 'raw_text': doubt_data.get('rawText', ''),
100
+ 'summary': doubt_data.get('formattedSummary', ''),
101
+ 'doubt_type': doubt_data.get('doubtType', 'concept'),
102
+ 'concepts': concept_tags,
103
+ 'url': doubt_data.get('page', {}).get('url', ''),
104
+ 'mastered': doubt_data.get('mastered', False),
105
+ 'review_count': doubt_data.get('reviewCount', 0)
106
+ }
107
+ )
108
+
109
+ self.nodes[node_id] = node
110
+
111
+ for concept in concept_tags:
112
+ self._ensure_concept_node(concept)
113
+ self._add_edge(
114
+ source=concept,
115
+ target=node_id,
116
+ relation='part_of'
117
+ )
118
+
119
+ return node
120
+
121
+ def _ensure_concept_node(self, concept: str) -> GraphNode:
122
+ """Ensure a concept node exists in the graph"""
123
+ concept_id = f"concept_{concept.lower().replace(' ', '_')}"
124
+
125
+ if concept_id in self.nodes:
126
+ return self.nodes[concept_id]
127
+
128
+ node = GraphNode(
129
+ node_id=concept_id,
130
+ node_type='Concept',
131
+ label=concept,
132
+ properties={
133
+ 'mastery_level': 0.0,
134
+ 'importance': 0.5,
135
+ 'last_reviewed': None
136
+ }
137
+ )
138
+
139
+ self.nodes[concept_id] = node
140
+ return node
141
+
142
+ def _add_edge(
143
+ self,
144
+ source: str,
145
+ target: str,
146
+ relation: str,
147
+ weight: float = 1.0
148
+ ) -> GraphEdge:
149
+ """Add an edge between nodes"""
150
+ edge_id = f"edge_{source}_{target}_{relation}"
151
+
152
+ source_id = f"concept_{source.lower().replace(' ', '_')}" if not source.startswith('concept_') else source
153
+ target_id = f"concept_{target.lower().replace(' ', '_')}" if not target.startswith('concept_') else target
154
+
155
+ if source_id not in self.nodes or target_id not in self.nodes:
156
+ return None
157
+
158
+ edge = GraphEdge(
159
+ edge_id=edge_id,
160
+ source_id=source_id,
161
+ target_id=target_id,
162
+ relation_type=relation,
163
+ weight=weight
164
+ )
165
+
166
+ self.edges[edge_id] = edge
167
+ return edge
168
+
169
+ def add_resource(self, resource_data: Dict) -> GraphNode:
170
+ """Add a learning resource to the graph"""
171
+ node_id = f"resource_{resource_data.get('id', datetime.now().timestamp())}"
172
+
173
+ node = GraphNode(
174
+ node_id=node_id,
175
+ node_type='Resource',
176
+ label=resource_data.get('title', 'Untitled Resource'),
177
+ properties={
178
+ 'url': resource_data.get('url', ''),
179
+ 'type': resource_data.get('type', 'webpage'),
180
+ 'topics': resource_data.get('topics', []),
181
+ 'difficulty': resource_data.get('difficulty', 0.5)
182
+ }
183
+ )
184
+
185
+ self.nodes[node_id] = node
186
+
187
+ for topic in resource_data.get('topics', []):
188
+ self._ensure_concept_node(topic)
189
+ self._add_edge(topic, node_id, 'part_of')
190
+
191
+ return node
192
+
193
+ def add_topic(self, topic: str, parent: Optional[str] = None) -> GraphNode:
194
+ """Add a topic node to the graph"""
195
+ node = self._ensure_concept_node(topic)
196
+
197
+ if parent:
198
+ self._ensure_concept_node(parent)
199
+ self._add_edge(topic, parent, 'prerequisite_of')
200
+
201
+ return node
202
+
203
+ def graphrag_retrieve(
204
+ self,
205
+ query: str,
206
+ top_k: int = 5
207
+ ) -> List[Dict]:
208
+ """
209
+ GraphRAG retrieval - find relevant nodes based on query.
210
+
211
+ Uses:
212
+ - Keyword matching
213
+ - Graph traversal
214
+ - Relationship scoring
215
+ """
216
+ results = []
217
+
218
+ query_lower = query.lower()
219
+ query_terms = query_lower.split()
220
+
221
+ for node_id, node in self.nodes.items():
222
+ score = 0.0
223
+
224
+ label_lower = node.label.lower()
225
+ for term in query_terms:
226
+ if term in label_lower:
227
+ score += 1.0
228
+ if term in str(node.properties).lower():
229
+ score += 0.5
230
+
231
+ if node.node_type == 'Doubt' and 'mastered' in node.properties:
232
+ if node.properties['mastered']:
233
+ score *= 0.8
234
+
235
+ if score > 0:
236
+ results.append({
237
+ 'node': node,
238
+ 'score': score,
239
+ 'matched_terms': [t for t in query_terms if t in label_lower]
240
+ })
241
+
242
+ results.sort(key=lambda x: x['score'], reverse=True)
243
+
244
+ return [{
245
+ 'node_id': r['node'].node_id,
246
+ 'type': r['node'].node_type,
247
+ 'label': r['node'].label,
248
+ 'properties': r['node'].properties,
249
+ 'score': r['score'],
250
+ 'related': self._get_related_nodes(r['node'].node_id, limit=3)
251
+ } for r in results[:top_k]]
252
+
253
+ def _get_related_nodes(self, node_id: str, limit: int = 3) -> List[Dict]:
254
+ """Get related nodes through graph traversal"""
255
+ related = []
256
+
257
+ for edge_id, edge in self.edges.items():
258
+ if edge.source_id == node_id:
259
+ target = self.nodes.get(edge.target_id)
260
+ if target:
261
+ related.append({
262
+ 'node_id': target.node_id,
263
+ 'type': target.node_type,
264
+ 'label': target.label,
265
+ 'relation': edge.relation_type
266
+ })
267
+ elif edge.target_id == node_id:
268
+ source = self.nodes.get(edge.source_id)
269
+ if source:
270
+ related.append({
271
+ 'node_id': source.node_id,
272
+ 'type': source.node_type,
273
+ 'label': source.label,
274
+ 'relation': edge.relation_type
275
+ })
276
+
277
+ return related[:limit]
278
+
279
+ def find_learning_path(
280
+ self,
281
+ from_topic: str,
282
+ to_topic: str
283
+ ) -> List[str]:
284
+ """Find shortest path between two topics using BFS"""
285
+ from_id = f"concept_{from_topic.lower().replace(' ', '_')}"
286
+ to_id = f"concept_{to_topic.lower().replace(' ', '_')}"
287
+
288
+ if from_id not in self.nodes or to_id not in self.nodes:
289
+ return []
290
+
291
+ queue = [(from_id, [from_id])]
292
+ visited = {from_id}
293
+
294
+ while queue:
295
+ current, path = queue.pop(0)
296
+
297
+ if current == to_id:
298
+ return [self.nodes[n].label for n in path]
299
+
300
+ for edge_id, edge in self.edges.items():
301
+ neighbor = None
302
+ if edge.source_id == current:
303
+ neighbor = edge.target_id
304
+ elif edge.target_id == current:
305
+ neighbor = edge.source_id
306
+
307
+ if neighbor and neighbor not in visited:
308
+ visited.add(neighbor)
309
+ queue.append((neighbor, path + [neighbor]))
310
+
311
+ return []
312
+
313
+ def get_topic_mastery(self) -> Dict[str, float]:
314
+ """Calculate mastery level for each topic"""
315
+ mastery = {}
316
+
317
+ for node_id, node in self.nodes.items():
318
+ if node.node_type == 'Concept':
319
+ related_doubts = self._get_doubt_count(node_id)
320
+ total_doubts = len([n for n in self.nodes.values() if n.node_type == 'Doubt'])
321
+
322
+ if total_doubts > 0:
323
+ mastery[node.label] = 1.0 - (related_doubts / total_doubts)
324
+ else:
325
+ mastery[node.label] = 0.0
326
+
327
+ return mastery
328
+
329
+ def _get_doubt_count(self, concept_id: str) -> int:
330
+ """Get number of doubts associated with a concept"""
331
+ count = 0
332
+ for edge_id, edge in self.edges.items():
333
+ if edge.source_id == concept_id and edge.relation_type == 'part_of':
334
+ target = self.nodes.get(edge.target_id)
335
+ if target and target.node_type == 'Doubt':
336
+ count += 1
337
+ return count
338
+
339
+ def get_graph_stats(self) -> Dict:
340
+ """Get graph statistics"""
341
+ node_types = {}
342
+ for node in self.nodes.values():
343
+ node_types[node.node_type] = node_types.get(node.node_type, 0) + 1
344
+
345
+ relation_types = {}
346
+ for edge in self.edges.values():
347
+ relation_types[edge.relation_type] = relation_types.get(edge.relation_type, 0) + 1
348
+
349
+ return {
350
+ 'graph_id': self.graph_id,
351
+ 'total_nodes': len(self.nodes),
352
+ 'total_edges': len(self.edges),
353
+ 'node_types': node_types,
354
+ 'relation_types': relation_types,
355
+ 'mastery': self.get_topic_mastery()
356
+ }
357
+
358
+ def export_graph(self) -> Dict:
359
+ """Export graph for persistence"""
360
+ return {
361
+ 'graph_id': self.graph_id,
362
+ 'nodes': [
363
+ {
364
+ 'node_id': n.node_id,
365
+ 'node_type': n.node_type,
366
+ 'label': n.label,
367
+ 'properties': n.properties
368
+ }
369
+ for n in self.nodes.values()
370
+ ],
371
+ 'edges': [
372
+ {
373
+ 'edge_id': e.edge_id,
374
+ 'source_id': e.source_id,
375
+ 'target_id': e.target_id,
376
+ 'relation_type': e.relation_type,
377
+ 'weight': e.weight
378
+ }
379
+ for e in self.edges.values()
380
+ ],
381
+ 'ontology': {
382
+ 'entity_types': self.ontology.entity_types,
383
+ 'relation_types': self.ontology.relation_types
384
+ }
385
+ }
386
+
387
+ def import_graph(self, graph_data: Dict):
388
+ """Import graph from persistence"""
389
+ self.graph_id = graph_data.get('graph_id', self.graph_id)
390
+
391
+ self.nodes.clear()
392
+ self.edges.clear()
393
+
394
+ for node_data in graph_data.get('nodes', []):
395
+ node = GraphNode(
396
+ node_id=node_data['node_id'],
397
+ node_type=node_data['node_type'],
398
+ label=node_data['label'],
399
+ properties=node_data.get('properties', {})
400
+ )
401
+ self.nodes[node.node_id] = node
402
+
403
+ for edge_data in graph_data.get('edges', []):
404
+ edge = GraphEdge(
405
+ edge_id=edge_data['edge_id'],
406
+ source_id=edge_data['source_id'],
407
+ target_id=edge_data['target_id'],
408
+ relation_type=edge_data['relation_type'],
409
+ weight=edge_data.get('weight', 1.0)
410
+ )
411
+ self.edges[edge.edge_id] = edge
412
+
413
+ async def sync_to_zep(self):
414
+ """Sync graph to Zep Cloud for advanced GraphRAG"""
415
+ pass
416
+
417
+ async def sync_to_graph(self):
418
+ """Sync current state"""
419
+ pass