vietexob commited on
Commit
110ce02
·
1 Parent(s): 5bfc72c

Added LightRAG KG

Browse files
Files changed (4) hide show
  1. app.py +36 -23
  2. knowledge_graph.html +155 -0
  3. llm_graph.py +61 -62
  4. visualize.py +110 -0
app.py CHANGED
@@ -1,21 +1,23 @@
1
- # import spaces
2
  import os
3
  import spacy
4
  import pickle
5
  import random
6
  import logging
7
- import rapidjson
8
  import asyncio
 
9
 
10
  import gradio as gr
11
  import networkx as nx
12
 
 
13
  from llm_graph import LLMGraph, MODEL_LIST
 
14
  from pyvis.network import Network
15
  from spacy import displacy
16
  from spacy.tokens import Span
17
 
18
  logging.basicConfig(level=logging.INFO)
 
19
 
20
  # Constants
21
  TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
@@ -35,6 +37,9 @@ EXAMPLE_CACHE_FILE = os.path.join(CACHE_DIR, "first_example_cache.pkl")
35
  # Create cache directory if it doesn't exist
36
  os.makedirs(CACHE_DIR, exist_ok=True)
37
 
 
 
 
38
  def get_random_light_color():
39
  """
40
  Color utilities
@@ -57,19 +62,17 @@ def handle_text(text=""):
57
 
58
  return " ".join(text.split())
59
 
60
- # @spaces.GPU
61
- async def extract_kg(text="", model=None):
62
  """
63
  Extract knowledge graph from text
64
  """
65
 
66
  # Catch empty text
67
- if not text or not model:
68
  raise gr.Error("⚠️ Both text and model must be provided!")
69
  try:
70
- model_instance = LLMGraph(model=model)
71
- result = await model_instance.extract(text)
72
-
73
  return rapidjson.loads(result)
74
  except Exception as e:
75
  raise gr.Error(f"❌ Extraction error: {str(e)}")
@@ -223,17 +226,19 @@ def create_graph(json_data):
223
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
224
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
225
 
226
- async def process_and_visualize(text, model, progress=gr.Progress()):
227
  """
228
  Process text and visualize knowledge graph and entities
229
  """
230
 
231
- if not text or not model:
232
  raise gr.Error("⚠️ Both text and model must be provided!")
233
 
234
  # Check if we're processing the first example for caching
235
  is_first_example = text == EXAMPLES[0][0]
236
-
 
 
237
  # Try to load from cache if it's the first example
238
  if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
239
  try:
@@ -249,7 +254,7 @@ async def process_and_visualize(text, model, progress=gr.Progress()):
249
 
250
  # Continue with normal processing if cache fails
251
  progress(0, desc="Starting extraction...")
252
- json_data = await extract_kg(text, model)
253
 
254
  progress(0.5, desc="Creating entity visualization...")
255
  entities_viz = create_custom_entity_viz(json_data, text)
@@ -301,7 +306,7 @@ EXAMPLES = [
301
  les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
302
  ]
303
 
304
- async def generate_first_example_cache():
305
  """
306
  Generate cache for the first example if it doesn't exist when the app starts
307
  """
@@ -312,10 +317,10 @@ async def generate_first_example_cache():
312
 
313
  try:
314
  text = EXAMPLES[0][0]
315
- model = MODEL_LIST[0] if MODEL_LIST else None
316
 
317
  # Extract data
318
- json_data = await extract_kg(text, model)
319
  entities_viz = create_custom_entity_viz(json_data, text)
320
  graph_html = create_graph(json_data)
321
 
@@ -360,7 +365,7 @@ def create_ui():
360
  """
361
 
362
  # Try to generate/load the first example cache
363
- first_example_cache = asyncio.run(generate_first_example_cache())
364
 
365
  with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
366
  # Header
@@ -430,14 +435,14 @@ def create_ui():
430
  )
431
 
432
  # Set initial values from cache if available
433
- if first_example_cache:
434
  # Use this to set initial values when the app loads
435
  demo.load(
436
  lambda: [
437
- first_example_cache["graph_html"],
438
- first_example_cache["entities_viz"],
439
- first_example_cache["json_data"],
440
- first_example_cache["stats"]
441
  ],
442
  inputs=None,
443
  outputs=[output_graph, output_entity_viz, output_json, stats_output]
@@ -450,5 +455,13 @@ def create_ui():
450
 
451
  return demo
452
 
453
- demo = create_ui()
454
- demo.launch(share=False)
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import spacy
3
  import pickle
4
  import random
5
  import logging
 
6
  import asyncio
7
+ import rapidjson
8
 
9
  import gradio as gr
10
  import networkx as nx
11
 
12
+ # from dotenv import load_dotenv
13
  from llm_graph import LLMGraph, MODEL_LIST
14
+
15
  from pyvis.network import Network
16
  from spacy import displacy
17
  from spacy.tokens import Span
18
 
19
  logging.basicConfig(level=logging.INFO)
20
+ # load_dotenv()
21
 
22
  # Constants
23
  TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
 
37
  # Create cache directory if it doesn't exist
38
  os.makedirs(CACHE_DIR, exist_ok=True)
39
 
40
+ # Initialize the LLMGraph model
41
+ model = LLMGraph()
42
+
43
  def get_random_light_color():
44
  """
45
  Color utilities
 
62
 
63
  return " ".join(text.split())
64
 
65
+ def extract_kg(text="", model_name=None):
 
66
  """
67
  Extract knowledge graph from text
68
  """
69
 
70
  # Catch empty text
71
+ if not text or not model_name:
72
  raise gr.Error("⚠️ Both text and model must be provided!")
73
  try:
74
+ result = model.extract(text, model_name)
75
+
 
76
  return rapidjson.loads(result)
77
  except Exception as e:
78
  raise gr.Error(f"❌ Extraction error: {str(e)}")
 
226
  allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
227
  allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
228
 
229
+ def process_and_visualize(text, model_name, progress=gr.Progress()):
230
  """
231
  Process text and visualize knowledge graph and entities
232
  """
233
 
234
+ if not text or not model_name:
235
  raise gr.Error("⚠️ Both text and model must be provided!")
236
 
237
  # Check if we're processing the first example for caching
238
  is_first_example = text == EXAMPLES[0][0]
239
+
240
+ asyncio.run(model.initialize_rag()) # Ensure RAG is initialized
241
+
242
  # Try to load from cache if it's the first example
243
  if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
244
  try:
 
254
 
255
  # Continue with normal processing if cache fails
256
  progress(0, desc="Starting extraction...")
257
+ json_data = extract_kg(text, model_name)
258
 
259
  progress(0.5, desc="Creating entity visualization...")
260
  entities_viz = create_custom_entity_viz(json_data, text)
 
306
  les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
307
  ]
308
 
309
+ def generate_first_example():
310
  """
311
  Generate cache for the first example if it doesn't exist when the app starts
312
  """
 
317
 
318
  try:
319
  text = EXAMPLES[0][0]
320
+ model_name = MODEL_LIST[0] if MODEL_LIST else None
321
 
322
  # Extract data
323
+ json_data = extract_kg(text, model_name)
324
  entities_viz = create_custom_entity_viz(json_data, text)
325
  graph_html = create_graph(json_data)
326
 
 
365
  """
366
 
367
  # Try to generate/load the first example cache
368
+ first_example = generate_first_example()
369
 
370
  with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
371
  # Header
 
435
  )
436
 
437
  # Set initial values from cache if available
438
+ if first_example:
439
  # Use this to set initial values when the app loads
440
  demo.load(
441
  lambda: [
442
+ first_example["graph_html"],
443
+ first_example["entities_viz"],
444
+ first_example["json_data"],
445
+ first_example["stats"]
446
  ],
447
  inputs=None,
448
  outputs=[output_graph, output_entity_viz, output_json, stats_output]
 
455
 
456
  return demo
457
 
458
+ def main():
459
+ """
460
+ Main function to run the Gradio app
461
+ """
462
+
463
+ demo = create_ui()
464
+ demo.launch(share=False)
465
+
466
+ if __name__ == "__main__":
467
+ main()
knowledge_graph.html ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <meta charset="utf-8">
4
+
5
+ <script src="lib/bindings/utils.js"></script>
6
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
7
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
8
+
9
+
10
+ <center>
11
+ <h1></h1>
12
+ </center>
13
+
14
+ <!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
15
+ <script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
16
+ <link
17
+ href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
18
+ rel="stylesheet"
19
+ integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
20
+ crossorigin="anonymous"
21
+ />
22
+ <script
23
+ src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
24
+ integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
25
+ crossorigin="anonymous"
26
+ ></script>
27
+
28
+
29
+ <center>
30
+ <h1></h1>
31
+ </center>
32
+ <style type="text/css">
33
+
34
+ #mynetwork {
35
+ width: 100%;
36
+ height: 100vh;
37
+ background-color: #f8fafc;
38
+ border: 1px solid lightgray;
39
+ position: relative;
40
+ float: left;
41
+ }
42
+
43
+
44
+
45
+
46
+
47
+
48
+ </style>
49
+ </head>
50
+
51
+
52
+ <body>
53
+ <div class="card" style="width: 100%">
54
+
55
+
56
+ <div id="mynetwork" class="card-body"></div>
57
+ </div>
58
+
59
+
60
+
61
+
62
+ <script type="text/javascript">
63
+
64
+ // initialize global variables.
65
+ var edges;
66
+ var nodes;
67
+ var allNodes;
68
+ var allEdges;
69
+ var nodeColors;
70
+ var originalNodes;
71
+ var network;
72
+ var container;
73
+ var options, data;
74
+ var filter = {
75
+ item : '',
76
+ property : '',
77
+ value : []
78
+ };
79
+
80
+
81
+
82
+
83
+
84
+ // This method is responsible for drawing the graph, returns the drawn network
85
+ function drawGraph() {
86
+ var container = document.getElementById('mynetwork');
87
+
88
+
89
+
90
+ // parsing and collecting nodes and edges from the python
91
+ nodes = new vis.DataSet([{"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring.", "entity_id": "Aerosmith", "entity_type": "organization", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Aerosmith", "label": "Aerosmith", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring.", "entity_id": "Steven Tyler", "entity_type": "person", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Steven Tyler", "label": "Steven Tyler", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring.", "entity_id": "Vocal Cord Injury", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Vocal Cord Injury", "label": "Vocal Cord Injury", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury.", "entity_id": "Retirement from Touring", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Retirement from Touring", "label": "Retirement from Touring", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "September 2023 is the time when Steven Tyler suffered a fractured larynx.", "entity_id": "September 2023", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "September 2023", "label": "September 2023", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "September 2023 is the time when Steven Tyler suffered a fractured larynx."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated.", "entity_id": "Fractured Larynx", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Fractured Larynx", "label": "Fractured Larynx", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery.", "entity_id": "Unsuccessful Treatment", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Unsuccessful Treatment", "label": "Unsuccessful Treatment", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery."}]);
92
+ edges = new vis.DataSet([{"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577684, "description": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band membership,cause of retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "to": "Steven Tyler", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "cause of retirement,health impact", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "Aerosmith officially announced their retirement from touring after 54 years.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band decision,career milestone", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith officially announced their retirement from touring after 54 years.", "to": "Retirement from Touring", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Steven Tyler suffered a fractured larynx in September 2023.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury timing,medical event", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler suffered a fractured larynx in September 2023.", "to": "September 2023", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "cause-effect,medical condition", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577690, "description": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury detail,medical diagnosis", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health outcome,medical treatment", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Vocal Cord Injury", "keywords": "injury relationship,medical causation", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577693, "description": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Fractured Larynx", "keywords": "injury focus,medical intervention", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "to": "Unsuccessful Treatment", "width": 4}]);
93
+
94
+ nodeColors = {};
95
+ allNodes = nodes.get({ returnType: "Object" });
96
+ for (nodeId in allNodes) {
97
+ nodeColors[nodeId] = allNodes[nodeId].color;
98
+ }
99
+ allEdges = edges.get({ returnType: "Object" });
100
+ // adding nodes and edges to the graph
101
+ data = {nodes: nodes, edges: edges};
102
+
103
+ var options = {
104
+ "configure": {
105
+ "enabled": false
106
+ },
107
+ "edges": {
108
+ "color": {
109
+ "inherit": true
110
+ },
111
+ "smooth": {
112
+ "enabled": true,
113
+ "type": "dynamic"
114
+ }
115
+ },
116
+ "interaction": {
117
+ "dragNodes": true,
118
+ "hideEdgesOnDrag": false,
119
+ "hideNodesOnDrag": false
120
+ },
121
+ "physics": {
122
+ "enabled": true,
123
+ "stabilization": {
124
+ "enabled": true,
125
+ "fit": true,
126
+ "iterations": 1000,
127
+ "onlyDynamicEdges": false,
128
+ "updateInterval": 50
129
+ }
130
+ }
131
+ };
132
+
133
+
134
+
135
+
136
+
137
+
138
+ network = new vis.Network(container, data, options);
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+ return network;
150
+
151
+ }
152
+ drawGraph();
153
+ </script>
154
+ </body>
155
+ </html>
llm_graph.py CHANGED
@@ -28,8 +28,8 @@ AZURE_EMBEDDING_API_VERSION = os.environ["AZURE_EMBEDDING_API_VERSION"]
28
  WORKING_DIR = "./cache"
29
 
30
  MODEL_LIST = [
31
- "OpenAI/GPT-4.1-mini",
32
  "EmergentMethods/Phi-3-mini-128k-instruct-graph",
 
33
  ]
34
 
35
  class LLMGraph:
@@ -37,68 +37,52 @@ class LLMGraph:
37
  A class to interact with LLMs for knowledge graph extraction.
38
  """
39
 
40
- async def _initialize_rag(self, embedding_dimension=3072):
41
  """
42
  Initialize the LightRAG instance with the specified embedding dimension.
43
  """
44
 
45
- rag = LightRAG(
46
- working_dir=WORKING_DIR,
47
- llm_model_func=self._llm_model_func,
48
- embedding_func=EmbeddingFunc(
49
- embedding_dim=embedding_dimension,
50
- max_token_size=8192,
51
- func=self._embedding_func,
52
- ),
53
- )
 
54
 
55
- await rag.initialize_storages()
56
- await initialize_pipeline_status()
57
 
58
- return rag
 
 
 
59
 
60
- async def _get_rag(self):
61
- """
62
- Get or initialize the RAG instance (lazy loading).
63
- """
64
-
65
- if self.rag is None:
66
- self.rag = await self._initialize_rag()
67
 
68
- return self.rag
69
 
70
- def __init__(self, model="OpenAI/GPT-4.1-mini"):
71
  """
72
  Initialize the Phi3InstructGraph with a specified model.
73
  """
74
 
75
- if model not in MODEL_LIST:
76
- raise ValueError(f"Model must be one of {MODEL_LIST}")
77
-
78
- self.model_name = model
79
-
80
- if model == MODEL_LIST[0]:
81
- # Use Azure OpenAI for GPT-4.1-mini
82
- self.llm_client = AzureOpenAI(
83
- api_key=AZURE_OPENAI_API_KEY,
84
- api_version=AZURE_OPENAI_API_VERSION,
85
- azure_endpoint=AZURE_OPENAI_ENDPOINT,
86
- )
87
-
88
- self.emb_client = AzureOpenAI(
89
- api_key=AZURE_OPENAI_API_KEY,
90
- api_version=AZURE_EMBEDDING_API_VERSION,
91
- azure_endpoint=AZURE_OPENAI_ENDPOINT,
92
- )
93
-
94
- self.rag = None # Initialize as None for lazy loading
95
- else:
96
- # Use Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
97
- self.hf_client = InferenceClient(
98
- model=endpoint_url,
99
- token=api_token
100
- )
101
 
 
 
102
  def _generate(self, messages):
103
  """
104
  Generate a response from the model based on the provided messages.
@@ -167,22 +151,22 @@ class LLMGraph:
167
 
168
  return messages
169
 
170
- async def extract(self, text):
171
  """
172
  Extract knowledge graph from text
173
  """
174
 
175
- generated_text = ""
176
 
177
- if self.model_name == MODEL_LIST[0]:
178
- # Use LightRAG with Azure OpenAI
179
- rag = await self._get_rag()
180
- rag.insert(text)
181
- else:
182
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
183
  messages = self._get_messages(text)
184
  generated_text = self._generate(messages)
185
-
 
 
 
 
186
  return generated_text
187
 
188
  async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
@@ -190,17 +174,20 @@ class LLMGraph:
190
  Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
191
  """
192
 
193
- messages = []
 
 
 
 
194
 
 
195
  if system_prompt:
196
  messages.append({"role": "system", "content": system_prompt})
197
-
198
  if history_messages:
199
  messages.extend(history_messages)
200
-
201
  messages.append({"role": "user", "content": prompt})
202
 
203
- chat_completion = self.llm_client.chat.completions.create(
204
  model=AZURE_OPENAI_DEPLOYMENT,
205
  messages=messages,
206
  temperature=kwargs.get("temperature", 0),
@@ -215,7 +202,19 @@ class LLMGraph:
215
  Call the Azure OpenAI embeddings endpoint with the given texts.
216
  """
217
 
218
- embedding = self.emb_client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
 
 
 
 
 
 
219
  embeddings = [item.embedding for item in embedding.data]
220
 
221
  return np.array(embeddings)
 
 
 
 
 
 
 
28
  WORKING_DIR = "./cache"
29
 
30
  MODEL_LIST = [
 
31
  "EmergentMethods/Phi-3-mini-128k-instruct-graph",
32
+ "OpenAI/GPT-4.1-mini",
33
  ]
34
 
35
  class LLMGraph:
 
37
  A class to interact with LLMs for knowledge graph extraction.
38
  """
39
 
40
+ async def initialize_rag(self, embedding_dimension=3072):
41
  """
42
  Initialize the LightRAG instance with the specified embedding dimension.
43
  """
44
 
45
+ if self.rag is None:
46
+ self.rag = LightRAG(
47
+ working_dir=WORKING_DIR,
48
+ llm_model_func=self._llm_model_func,
49
+ embedding_func=EmbeddingFunc(
50
+ embedding_dim=embedding_dimension,
51
+ max_token_size=8192,
52
+ func=self._embedding_func,
53
+ ),
54
+ )
55
 
56
+ await self.rag.initialize_storages()
57
+ await initialize_pipeline_status()
58
 
59
+ # async def test_responses(self):
60
+ # """
61
+ # Test the LLM and embedding functions.
62
+ # """
63
 
64
+ # result = await self._llm_model_func("How are you?")
65
+ # print("Response from llm_model_func: ", result)
66
+
67
+ # result = await self._embedding_func(["How are you?"])
68
+ # print("Result of embedding_func: ", result.shape)
69
+ # print("Dimension of embedding: ", result.shape[1])
 
70
 
71
+ # return True
72
 
73
+ def __init__(self):
74
  """
75
  Initialize the Phi3InstructGraph with a specified model.
76
  """
77
 
78
+ # Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
79
+ self.hf_client = InferenceClient(
80
+ model=endpoint_url,
81
+ token=api_token
82
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
+ self.rag = None # Lazy loading of RAG instance
85
+
86
  def _generate(self, messages):
87
  """
88
  Generate a response from the model based on the provided messages.
 
151
 
152
  return messages
153
 
154
+ def extract(self, text, model_name=MODEL_LIST[0]) -> str:
155
  """
156
  Extract knowledge graph from text
157
  """
158
 
159
+ generated_text = "This is a placeholder response."
160
 
161
+ if model_name == MODEL_LIST[0]:
 
 
 
 
162
  # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
163
  messages = self._get_messages(text)
164
  generated_text = self._generate(messages)
165
+ else:
166
+ # Use LightRAG with Azure OpenAI
167
+ self.rag.insert(text) # Insert the text into the RAG storage
168
+ # TODO: Extract JSON format of the knowledge graph
169
+
170
  return generated_text
171
 
172
  async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
 
174
  Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
175
  """
176
 
177
+ llm_client = AzureOpenAI(
178
+ api_key=AZURE_OPENAI_API_KEY,
179
+ api_version=AZURE_OPENAI_API_VERSION,
180
+ azure_endpoint=AZURE_OPENAI_ENDPOINT,
181
+ )
182
 
183
+ messages = []
184
  if system_prompt:
185
  messages.append({"role": "system", "content": system_prompt})
 
186
  if history_messages:
187
  messages.extend(history_messages)
 
188
  messages.append({"role": "user", "content": prompt})
189
 
190
+ chat_completion = llm_client.chat.completions.create(
191
  model=AZURE_OPENAI_DEPLOYMENT,
192
  messages=messages,
193
  temperature=kwargs.get("temperature", 0),
 
202
  Call the Azure OpenAI embeddings endpoint with the given texts.
203
  """
204
 
205
+ emb_client = AzureOpenAI(
206
+ api_key=AZURE_OPENAI_API_KEY,
207
+ api_version=AZURE_EMBEDDING_API_VERSION,
208
+ azure_endpoint=AZURE_OPENAI_ENDPOINT,
209
+ )
210
+
211
+ embedding = emb_client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
212
  embeddings = [item.embedding for item in embedding.data]
213
 
214
  return np.array(embeddings)
215
+
216
+ # if __name__ == "__main__":
217
+ # # Initialize the LLMGraph model
218
+ # model = LLMGraph()
219
+ # asyncio.run(model.initialize_rag()) # Ensure RAG is initialized
220
+ # print("LLMGraph model initialized.")
visualize.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import networkx as nx
2
+ import rapidjson
3
+ import warnings
4
+ import os
5
+
6
+ from pyvis.network import Network
7
+
8
+ warnings.filterwarnings("ignore")
9
+
10
+ # Load the GraphML file
11
+ file_path = "./cache/graph_chunk_entity_relation.graphml"
12
+
13
+ assert os.path.exists(file_path), f"File {file_path} does not exist."
14
+ G = nx.read_graphml(file_path)
15
+
16
+ def create_graph(json_data):
17
+ """
18
+ Create interactive knowledge graph using pyvis
19
+ """
20
+
21
+ G = nx.Graph()
22
+
23
+ # Add nodes with tooltips and error handling for missing keys
24
+ for node in json_data['nodes']:
25
+ # Get node type with fallback
26
+ type = node.get("type", "Entity")
27
+
28
+ # Get detailed type with fallback
29
+ detailed_type = node.get("detailed_type", type)
30
+
31
+ # Use node ID and type info for the tooltip
32
+ G.add_node(node['id'], title=f"{type}: {detailed_type}")
33
+
34
+ # Add edges with labels
35
+ for edge in json_data['edges']:
36
+ # Check if the required keys exist
37
+ if 'from' in edge and 'to' in edge:
38
+ label = edge.get('label', 'related')
39
+ G.add_edge(edge['from'], edge['to'], title=label, label=label)
40
+
41
+ # Create network visualization
42
+ network = Network(
43
+ width="100%",
44
+ height="100vh",
45
+ notebook=False,
46
+ bgcolor="#f8fafc",
47
+ font_color="#1e293b"
48
+ )
49
+
50
+ # Configure network display
51
+ network.from_nx(G)
52
+
53
+ # Customize node appearance
54
+ for node in network.nodes:
55
+ node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
56
+ node['font'] = {'size': 14, 'color': '#1e293b'}
57
+ node['shape'] = 'dot'
58
+ node['size'] = 20
59
+
60
+ # Customize edge appearance
61
+ for edge in network.edges:
62
+ edge['width'] = 4
63
+ edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
64
+ edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
65
+
66
+ # Save and display the network
67
+ filename_out = "knowledge_graph.html"
68
+ network.show(filename_out)
69
+ print(f"Knowledge graph saved to {filename_out}")
70
+
71
+ # Convert the graph to node-link data format
72
+ js_graph = nx.node_link_data(G)
73
+ js_data = rapidjson.loads(rapidjson.dumps(js_graph))
74
+ # print(js_data)
75
+
76
+ create_graph(js_data)
77
+
78
+ # # Create a Pyvis network
79
+ # network = Network(width="100%",
80
+ # height="100vh",
81
+ # notebook=True,
82
+ # bgcolor="#f8fafc",
83
+ # font_color="#1e293b")
84
+
85
+ # # Convert NetworkX graph to Pyvis network
86
+ # network.from_nx(G)
87
+
88
+ # # Add colors and title to nodes
89
+ # for node in network.nodes:
90
+ # if "description" in node:
91
+ # node["title"] = node["description"]
92
+
93
+ # node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
94
+ # node['font'] = {'size': 14, 'color': '#1e293b'}
95
+ # node['shape'] = 'dot'
96
+ # node['size'] = 20
97
+
98
+ # # Add title to edges
99
+ # for edge in network.edges:
100
+ # if "description" in edge:
101
+ # edge["title"] = edge["description"]
102
+
103
+ # edge['width'] = 4
104
+ # edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
105
+ # edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
106
+
107
+ # # Save and display the network
108
+ # filename_out = "knowledge_graph.html"
109
+ # network.show(filename_out)
110
+ # print(f"Knowledge graph saved to {filename_out}")