Spaces:
Running
on
Zero
Running
on
Zero
Added LightRAG KG
Browse files- app.py +36 -23
- knowledge_graph.html +155 -0
- llm_graph.py +61 -62
- visualize.py +110 -0
app.py
CHANGED
@@ -1,21 +1,23 @@
|
|
1 |
-
# import spaces
|
2 |
import os
|
3 |
import spacy
|
4 |
import pickle
|
5 |
import random
|
6 |
import logging
|
7 |
-
import rapidjson
|
8 |
import asyncio
|
|
|
9 |
|
10 |
import gradio as gr
|
11 |
import networkx as nx
|
12 |
|
|
|
13 |
from llm_graph import LLMGraph, MODEL_LIST
|
|
|
14 |
from pyvis.network import Network
|
15 |
from spacy import displacy
|
16 |
from spacy.tokens import Span
|
17 |
|
18 |
logging.basicConfig(level=logging.INFO)
|
|
|
19 |
|
20 |
# Constants
|
21 |
TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
|
@@ -35,6 +37,9 @@ EXAMPLE_CACHE_FILE = os.path.join(CACHE_DIR, "first_example_cache.pkl")
|
|
35 |
# Create cache directory if it doesn't exist
|
36 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
37 |
|
|
|
|
|
|
|
38 |
def get_random_light_color():
|
39 |
"""
|
40 |
Color utilities
|
@@ -57,19 +62,17 @@ def handle_text(text=""):
|
|
57 |
|
58 |
return " ".join(text.split())
|
59 |
|
60 |
-
|
61 |
-
async def extract_kg(text="", model=None):
|
62 |
"""
|
63 |
Extract knowledge graph from text
|
64 |
"""
|
65 |
|
66 |
# Catch empty text
|
67 |
-
if not text or not
|
68 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
69 |
try:
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
return rapidjson.loads(result)
|
74 |
except Exception as e:
|
75 |
raise gr.Error(f"❌ Extraction error: {str(e)}")
|
@@ -223,17 +226,19 @@ def create_graph(json_data):
|
|
223 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
224 |
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
|
225 |
|
226 |
-
|
227 |
"""
|
228 |
Process text and visualize knowledge graph and entities
|
229 |
"""
|
230 |
|
231 |
-
if not text or not
|
232 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
233 |
|
234 |
# Check if we're processing the first example for caching
|
235 |
is_first_example = text == EXAMPLES[0][0]
|
236 |
-
|
|
|
|
|
237 |
# Try to load from cache if it's the first example
|
238 |
if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
|
239 |
try:
|
@@ -249,7 +254,7 @@ async def process_and_visualize(text, model, progress=gr.Progress()):
|
|
249 |
|
250 |
# Continue with normal processing if cache fails
|
251 |
progress(0, desc="Starting extraction...")
|
252 |
-
json_data =
|
253 |
|
254 |
progress(0.5, desc="Creating entity visualization...")
|
255 |
entities_viz = create_custom_entity_viz(json_data, text)
|
@@ -301,7 +306,7 @@ EXAMPLES = [
|
|
301 |
les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
|
302 |
]
|
303 |
|
304 |
-
|
305 |
"""
|
306 |
Generate cache for the first example if it doesn't exist when the app starts
|
307 |
"""
|
@@ -312,10 +317,10 @@ async def generate_first_example_cache():
|
|
312 |
|
313 |
try:
|
314 |
text = EXAMPLES[0][0]
|
315 |
-
|
316 |
|
317 |
# Extract data
|
318 |
-
json_data =
|
319 |
entities_viz = create_custom_entity_viz(json_data, text)
|
320 |
graph_html = create_graph(json_data)
|
321 |
|
@@ -360,7 +365,7 @@ def create_ui():
|
|
360 |
"""
|
361 |
|
362 |
# Try to generate/load the first example cache
|
363 |
-
|
364 |
|
365 |
with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
|
366 |
# Header
|
@@ -430,14 +435,14 @@ def create_ui():
|
|
430 |
)
|
431 |
|
432 |
# Set initial values from cache if available
|
433 |
-
if
|
434 |
# Use this to set initial values when the app loads
|
435 |
demo.load(
|
436 |
lambda: [
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
],
|
442 |
inputs=None,
|
443 |
outputs=[output_graph, output_entity_viz, output_json, stats_output]
|
@@ -450,5 +455,13 @@ def create_ui():
|
|
450 |
|
451 |
return demo
|
452 |
|
453 |
-
|
454 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import spacy
|
3 |
import pickle
|
4 |
import random
|
5 |
import logging
|
|
|
6 |
import asyncio
|
7 |
+
import rapidjson
|
8 |
|
9 |
import gradio as gr
|
10 |
import networkx as nx
|
11 |
|
12 |
+
# from dotenv import load_dotenv
|
13 |
from llm_graph import LLMGraph, MODEL_LIST
|
14 |
+
|
15 |
from pyvis.network import Network
|
16 |
from spacy import displacy
|
17 |
from spacy.tokens import Span
|
18 |
|
19 |
logging.basicConfig(level=logging.INFO)
|
20 |
+
# load_dotenv()
|
21 |
|
22 |
# Constants
|
23 |
TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
|
|
|
37 |
# Create cache directory if it doesn't exist
|
38 |
os.makedirs(CACHE_DIR, exist_ok=True)
|
39 |
|
40 |
+
# Initialize the LLMGraph model
|
41 |
+
model = LLMGraph()
|
42 |
+
|
43 |
def get_random_light_color():
|
44 |
"""
|
45 |
Color utilities
|
|
|
62 |
|
63 |
return " ".join(text.split())
|
64 |
|
65 |
+
def extract_kg(text="", model_name=None):
|
|
|
66 |
"""
|
67 |
Extract knowledge graph from text
|
68 |
"""
|
69 |
|
70 |
# Catch empty text
|
71 |
+
if not text or not model_name:
|
72 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
73 |
try:
|
74 |
+
result = model.extract(text, model_name)
|
75 |
+
|
|
|
76 |
return rapidjson.loads(result)
|
77 |
except Exception as e:
|
78 |
raise gr.Error(f"❌ Extraction error: {str(e)}")
|
|
|
226 |
allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
|
227 |
allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
|
228 |
|
229 |
+
def process_and_visualize(text, model_name, progress=gr.Progress()):
|
230 |
"""
|
231 |
Process text and visualize knowledge graph and entities
|
232 |
"""
|
233 |
|
234 |
+
if not text or not model_name:
|
235 |
raise gr.Error("⚠️ Both text and model must be provided!")
|
236 |
|
237 |
# Check if we're processing the first example for caching
|
238 |
is_first_example = text == EXAMPLES[0][0]
|
239 |
+
|
240 |
+
asyncio.run(model.initialize_rag()) # Ensure RAG is initialized
|
241 |
+
|
242 |
# Try to load from cache if it's the first example
|
243 |
if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
|
244 |
try:
|
|
|
254 |
|
255 |
# Continue with normal processing if cache fails
|
256 |
progress(0, desc="Starting extraction...")
|
257 |
+
json_data = extract_kg(text, model_name)
|
258 |
|
259 |
progress(0.5, desc="Creating entity visualization...")
|
260 |
entities_viz = create_custom_entity_viz(json_data, text)
|
|
|
306 |
les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
|
307 |
]
|
308 |
|
309 |
+
def generate_first_example():
|
310 |
"""
|
311 |
Generate cache for the first example if it doesn't exist when the app starts
|
312 |
"""
|
|
|
317 |
|
318 |
try:
|
319 |
text = EXAMPLES[0][0]
|
320 |
+
model_name = MODEL_LIST[0] if MODEL_LIST else None
|
321 |
|
322 |
# Extract data
|
323 |
+
json_data = extract_kg(text, model_name)
|
324 |
entities_viz = create_custom_entity_viz(json_data, text)
|
325 |
graph_html = create_graph(json_data)
|
326 |
|
|
|
365 |
"""
|
366 |
|
367 |
# Try to generate/load the first example cache
|
368 |
+
first_example = generate_first_example()
|
369 |
|
370 |
with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
|
371 |
# Header
|
|
|
435 |
)
|
436 |
|
437 |
# Set initial values from cache if available
|
438 |
+
if first_example:
|
439 |
# Use this to set initial values when the app loads
|
440 |
demo.load(
|
441 |
lambda: [
|
442 |
+
first_example["graph_html"],
|
443 |
+
first_example["entities_viz"],
|
444 |
+
first_example["json_data"],
|
445 |
+
first_example["stats"]
|
446 |
],
|
447 |
inputs=None,
|
448 |
outputs=[output_graph, output_entity_viz, output_json, stats_output]
|
|
|
455 |
|
456 |
return demo
|
457 |
|
458 |
+
def main():
|
459 |
+
"""
|
460 |
+
Main function to run the Gradio app
|
461 |
+
"""
|
462 |
+
|
463 |
+
demo = create_ui()
|
464 |
+
demo.launch(share=False)
|
465 |
+
|
466 |
+
if __name__ == "__main__":
|
467 |
+
main()
|
knowledge_graph.html
ADDED
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<html>
|
2 |
+
<head>
|
3 |
+
<meta charset="utf-8">
|
4 |
+
|
5 |
+
<script src="lib/bindings/utils.js"></script>
|
6 |
+
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
|
7 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
|
8 |
+
|
9 |
+
|
10 |
+
<center>
|
11 |
+
<h1></h1>
|
12 |
+
</center>
|
13 |
+
|
14 |
+
<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
|
15 |
+
<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
|
16 |
+
<link
|
17 |
+
href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
|
18 |
+
rel="stylesheet"
|
19 |
+
integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
|
20 |
+
crossorigin="anonymous"
|
21 |
+
/>
|
22 |
+
<script
|
23 |
+
src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
|
24 |
+
integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
|
25 |
+
crossorigin="anonymous"
|
26 |
+
></script>
|
27 |
+
|
28 |
+
|
29 |
+
<center>
|
30 |
+
<h1></h1>
|
31 |
+
</center>
|
32 |
+
<style type="text/css">
|
33 |
+
|
34 |
+
#mynetwork {
|
35 |
+
width: 100%;
|
36 |
+
height: 100vh;
|
37 |
+
background-color: #f8fafc;
|
38 |
+
border: 1px solid lightgray;
|
39 |
+
position: relative;
|
40 |
+
float: left;
|
41 |
+
}
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
+
|
48 |
+
</style>
|
49 |
+
</head>
|
50 |
+
|
51 |
+
|
52 |
+
<body>
|
53 |
+
<div class="card" style="width: 100%">
|
54 |
+
|
55 |
+
|
56 |
+
<div id="mynetwork" class="card-body"></div>
|
57 |
+
</div>
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
|
62 |
+
<script type="text/javascript">
|
63 |
+
|
64 |
+
// initialize global variables.
|
65 |
+
var edges;
|
66 |
+
var nodes;
|
67 |
+
var allNodes;
|
68 |
+
var allEdges;
|
69 |
+
var nodeColors;
|
70 |
+
var originalNodes;
|
71 |
+
var network;
|
72 |
+
var container;
|
73 |
+
var options, data;
|
74 |
+
var filter = {
|
75 |
+
item : '',
|
76 |
+
property : '',
|
77 |
+
value : []
|
78 |
+
};
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
|
84 |
+
// This method is responsible for drawing the graph, returns the drawn network
|
85 |
+
function drawGraph() {
|
86 |
+
var container = document.getElementById('mynetwork');
|
87 |
+
|
88 |
+
|
89 |
+
|
90 |
+
// parsing and collecting nodes and edges from the python
|
91 |
+
nodes = new vis.DataSet([{"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring.", "entity_id": "Aerosmith", "entity_type": "organization", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Aerosmith", "label": "Aerosmith", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring.", "entity_id": "Steven Tyler", "entity_type": "person", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Steven Tyler", "label": "Steven Tyler", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring.", "entity_id": "Vocal Cord Injury", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Vocal Cord Injury", "label": "Vocal Cord Injury", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury.", "entity_id": "Retirement from Touring", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Retirement from Touring", "label": "Retirement from Touring", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "September 2023 is the time when Steven Tyler suffered a fractured larynx.", "entity_id": "September 2023", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "September 2023", "label": "September 2023", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "September 2023 is the time when Steven Tyler suffered a fractured larynx."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated.", "entity_id": "Fractured Larynx", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Fractured Larynx", "label": "Fractured Larynx", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery.", "entity_id": "Unsuccessful Treatment", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Unsuccessful Treatment", "label": "Unsuccessful Treatment", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery."}]);
|
92 |
+
edges = new vis.DataSet([{"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577684, "description": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band membership,cause of retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "to": "Steven Tyler", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "cause of retirement,health impact", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "Aerosmith officially announced their retirement from touring after 54 years.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band decision,career milestone", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith officially announced their retirement from touring after 54 years.", "to": "Retirement from Touring", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Steven Tyler suffered a fractured larynx in September 2023.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury timing,medical event", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler suffered a fractured larynx in September 2023.", "to": "September 2023", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "cause-effect,medical condition", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577690, "description": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury detail,medical diagnosis", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health outcome,medical treatment", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Vocal Cord Injury", "keywords": "injury relationship,medical causation", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577693, "description": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Fractured Larynx", "keywords": "injury focus,medical intervention", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "to": "Unsuccessful Treatment", "width": 4}]);
|
93 |
+
|
94 |
+
nodeColors = {};
|
95 |
+
allNodes = nodes.get({ returnType: "Object" });
|
96 |
+
for (nodeId in allNodes) {
|
97 |
+
nodeColors[nodeId] = allNodes[nodeId].color;
|
98 |
+
}
|
99 |
+
allEdges = edges.get({ returnType: "Object" });
|
100 |
+
// adding nodes and edges to the graph
|
101 |
+
data = {nodes: nodes, edges: edges};
|
102 |
+
|
103 |
+
var options = {
|
104 |
+
"configure": {
|
105 |
+
"enabled": false
|
106 |
+
},
|
107 |
+
"edges": {
|
108 |
+
"color": {
|
109 |
+
"inherit": true
|
110 |
+
},
|
111 |
+
"smooth": {
|
112 |
+
"enabled": true,
|
113 |
+
"type": "dynamic"
|
114 |
+
}
|
115 |
+
},
|
116 |
+
"interaction": {
|
117 |
+
"dragNodes": true,
|
118 |
+
"hideEdgesOnDrag": false,
|
119 |
+
"hideNodesOnDrag": false
|
120 |
+
},
|
121 |
+
"physics": {
|
122 |
+
"enabled": true,
|
123 |
+
"stabilization": {
|
124 |
+
"enabled": true,
|
125 |
+
"fit": true,
|
126 |
+
"iterations": 1000,
|
127 |
+
"onlyDynamicEdges": false,
|
128 |
+
"updateInterval": 50
|
129 |
+
}
|
130 |
+
}
|
131 |
+
};
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
|
136 |
+
|
137 |
+
|
138 |
+
network = new vis.Network(container, data, options);
|
139 |
+
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
|
148 |
+
|
149 |
+
return network;
|
150 |
+
|
151 |
+
}
|
152 |
+
drawGraph();
|
153 |
+
</script>
|
154 |
+
</body>
|
155 |
+
</html>
|
llm_graph.py
CHANGED
@@ -28,8 +28,8 @@ AZURE_EMBEDDING_API_VERSION = os.environ["AZURE_EMBEDDING_API_VERSION"]
|
|
28 |
WORKING_DIR = "./cache"
|
29 |
|
30 |
MODEL_LIST = [
|
31 |
-
"OpenAI/GPT-4.1-mini",
|
32 |
"EmergentMethods/Phi-3-mini-128k-instruct-graph",
|
|
|
33 |
]
|
34 |
|
35 |
class LLMGraph:
|
@@ -37,68 +37,52 @@ class LLMGraph:
|
|
37 |
A class to interact with LLMs for knowledge graph extraction.
|
38 |
"""
|
39 |
|
40 |
-
async def
|
41 |
"""
|
42 |
Initialize the LightRAG instance with the specified embedding dimension.
|
43 |
"""
|
44 |
|
45 |
-
rag
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
54 |
|
55 |
-
|
56 |
-
|
57 |
|
58 |
-
|
|
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
self.rag = await self._initialize_rag()
|
67 |
|
68 |
-
|
69 |
|
70 |
-
def __init__(self
|
71 |
"""
|
72 |
Initialize the Phi3InstructGraph with a specified model.
|
73 |
"""
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
if model == MODEL_LIST[0]:
|
81 |
-
# Use Azure OpenAI for GPT-4.1-mini
|
82 |
-
self.llm_client = AzureOpenAI(
|
83 |
-
api_key=AZURE_OPENAI_API_KEY,
|
84 |
-
api_version=AZURE_OPENAI_API_VERSION,
|
85 |
-
azure_endpoint=AZURE_OPENAI_ENDPOINT,
|
86 |
-
)
|
87 |
-
|
88 |
-
self.emb_client = AzureOpenAI(
|
89 |
-
api_key=AZURE_OPENAI_API_KEY,
|
90 |
-
api_version=AZURE_EMBEDDING_API_VERSION,
|
91 |
-
azure_endpoint=AZURE_OPENAI_ENDPOINT,
|
92 |
-
)
|
93 |
-
|
94 |
-
self.rag = None # Initialize as None for lazy loading
|
95 |
-
else:
|
96 |
-
# Use Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
|
97 |
-
self.hf_client = InferenceClient(
|
98 |
-
model=endpoint_url,
|
99 |
-
token=api_token
|
100 |
-
)
|
101 |
|
|
|
|
|
102 |
def _generate(self, messages):
|
103 |
"""
|
104 |
Generate a response from the model based on the provided messages.
|
@@ -167,22 +151,22 @@ class LLMGraph:
|
|
167 |
|
168 |
return messages
|
169 |
|
170 |
-
|
171 |
"""
|
172 |
Extract knowledge graph from text
|
173 |
"""
|
174 |
|
175 |
-
generated_text = ""
|
176 |
|
177 |
-
if
|
178 |
-
# Use LightRAG with Azure OpenAI
|
179 |
-
rag = await self._get_rag()
|
180 |
-
rag.insert(text)
|
181 |
-
else:
|
182 |
# Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
|
183 |
messages = self._get_messages(text)
|
184 |
generated_text = self._generate(messages)
|
185 |
-
|
|
|
|
|
|
|
|
|
186 |
return generated_text
|
187 |
|
188 |
async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
|
@@ -190,17 +174,20 @@ class LLMGraph:
|
|
190 |
Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
|
191 |
"""
|
192 |
|
193 |
-
|
|
|
|
|
|
|
|
|
194 |
|
|
|
195 |
if system_prompt:
|
196 |
messages.append({"role": "system", "content": system_prompt})
|
197 |
-
|
198 |
if history_messages:
|
199 |
messages.extend(history_messages)
|
200 |
-
|
201 |
messages.append({"role": "user", "content": prompt})
|
202 |
|
203 |
-
chat_completion =
|
204 |
model=AZURE_OPENAI_DEPLOYMENT,
|
205 |
messages=messages,
|
206 |
temperature=kwargs.get("temperature", 0),
|
@@ -215,7 +202,19 @@ class LLMGraph:
|
|
215 |
Call the Azure OpenAI embeddings endpoint with the given texts.
|
216 |
"""
|
217 |
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
embeddings = [item.embedding for item in embedding.data]
|
220 |
|
221 |
return np.array(embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
WORKING_DIR = "./cache"
|
29 |
|
30 |
MODEL_LIST = [
|
|
|
31 |
"EmergentMethods/Phi-3-mini-128k-instruct-graph",
|
32 |
+
"OpenAI/GPT-4.1-mini",
|
33 |
]
|
34 |
|
35 |
class LLMGraph:
|
|
|
37 |
A class to interact with LLMs for knowledge graph extraction.
|
38 |
"""
|
39 |
|
40 |
+
async def initialize_rag(self, embedding_dimension=3072):
|
41 |
"""
|
42 |
Initialize the LightRAG instance with the specified embedding dimension.
|
43 |
"""
|
44 |
|
45 |
+
if self.rag is None:
|
46 |
+
self.rag = LightRAG(
|
47 |
+
working_dir=WORKING_DIR,
|
48 |
+
llm_model_func=self._llm_model_func,
|
49 |
+
embedding_func=EmbeddingFunc(
|
50 |
+
embedding_dim=embedding_dimension,
|
51 |
+
max_token_size=8192,
|
52 |
+
func=self._embedding_func,
|
53 |
+
),
|
54 |
+
)
|
55 |
|
56 |
+
await self.rag.initialize_storages()
|
57 |
+
await initialize_pipeline_status()
|
58 |
|
59 |
+
# async def test_responses(self):
|
60 |
+
# """
|
61 |
+
# Test the LLM and embedding functions.
|
62 |
+
# """
|
63 |
|
64 |
+
# result = await self._llm_model_func("How are you?")
|
65 |
+
# print("Response from llm_model_func: ", result)
|
66 |
+
|
67 |
+
# result = await self._embedding_func(["How are you?"])
|
68 |
+
# print("Result of embedding_func: ", result.shape)
|
69 |
+
# print("Dimension of embedding: ", result.shape[1])
|
|
|
70 |
|
71 |
+
# return True
|
72 |
|
73 |
+
def __init__(self):
|
74 |
"""
|
75 |
Initialize the Phi3InstructGraph with a specified model.
|
76 |
"""
|
77 |
|
78 |
+
# Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
|
79 |
+
self.hf_client = InferenceClient(
|
80 |
+
model=endpoint_url,
|
81 |
+
token=api_token
|
82 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
self.rag = None # Lazy loading of RAG instance
|
85 |
+
|
86 |
def _generate(self, messages):
|
87 |
"""
|
88 |
Generate a response from the model based on the provided messages.
|
|
|
151 |
|
152 |
return messages
|
153 |
|
154 |
+
def extract(self, text, model_name=MODEL_LIST[0]) -> str:
|
155 |
"""
|
156 |
Extract knowledge graph from text
|
157 |
"""
|
158 |
|
159 |
+
generated_text = "This is a placeholder response."
|
160 |
|
161 |
+
if model_name == MODEL_LIST[0]:
|
|
|
|
|
|
|
|
|
162 |
# Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
|
163 |
messages = self._get_messages(text)
|
164 |
generated_text = self._generate(messages)
|
165 |
+
else:
|
166 |
+
# Use LightRAG with Azure OpenAI
|
167 |
+
self.rag.insert(text) # Insert the text into the RAG storage
|
168 |
+
# TODO: Extract JSON format of the knowledge graph
|
169 |
+
|
170 |
return generated_text
|
171 |
|
172 |
async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
|
|
|
174 |
Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
|
175 |
"""
|
176 |
|
177 |
+
llm_client = AzureOpenAI(
|
178 |
+
api_key=AZURE_OPENAI_API_KEY,
|
179 |
+
api_version=AZURE_OPENAI_API_VERSION,
|
180 |
+
azure_endpoint=AZURE_OPENAI_ENDPOINT,
|
181 |
+
)
|
182 |
|
183 |
+
messages = []
|
184 |
if system_prompt:
|
185 |
messages.append({"role": "system", "content": system_prompt})
|
|
|
186 |
if history_messages:
|
187 |
messages.extend(history_messages)
|
|
|
188 |
messages.append({"role": "user", "content": prompt})
|
189 |
|
190 |
+
chat_completion = llm_client.chat.completions.create(
|
191 |
model=AZURE_OPENAI_DEPLOYMENT,
|
192 |
messages=messages,
|
193 |
temperature=kwargs.get("temperature", 0),
|
|
|
202 |
Call the Azure OpenAI embeddings endpoint with the given texts.
|
203 |
"""
|
204 |
|
205 |
+
emb_client = AzureOpenAI(
|
206 |
+
api_key=AZURE_OPENAI_API_KEY,
|
207 |
+
api_version=AZURE_EMBEDDING_API_VERSION,
|
208 |
+
azure_endpoint=AZURE_OPENAI_ENDPOINT,
|
209 |
+
)
|
210 |
+
|
211 |
+
embedding = emb_client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
|
212 |
embeddings = [item.embedding for item in embedding.data]
|
213 |
|
214 |
return np.array(embeddings)
|
215 |
+
|
216 |
+
# if __name__ == "__main__":
|
217 |
+
# # Initialize the LLMGraph model
|
218 |
+
# model = LLMGraph()
|
219 |
+
# asyncio.run(model.initialize_rag()) # Ensure RAG is initialized
|
220 |
+
# print("LLMGraph model initialized.")
|
visualize.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import networkx as nx
|
2 |
+
import rapidjson
|
3 |
+
import warnings
|
4 |
+
import os
|
5 |
+
|
6 |
+
from pyvis.network import Network
|
7 |
+
|
8 |
+
warnings.filterwarnings("ignore")
|
9 |
+
|
10 |
+
# Load the GraphML file
|
11 |
+
file_path = "./cache/graph_chunk_entity_relation.graphml"
|
12 |
+
|
13 |
+
assert os.path.exists(file_path), f"File {file_path} does not exist."
|
14 |
+
G = nx.read_graphml(file_path)
|
15 |
+
|
16 |
+
def create_graph(json_data):
|
17 |
+
"""
|
18 |
+
Create interactive knowledge graph using pyvis
|
19 |
+
"""
|
20 |
+
|
21 |
+
G = nx.Graph()
|
22 |
+
|
23 |
+
# Add nodes with tooltips and error handling for missing keys
|
24 |
+
for node in json_data['nodes']:
|
25 |
+
# Get node type with fallback
|
26 |
+
type = node.get("type", "Entity")
|
27 |
+
|
28 |
+
# Get detailed type with fallback
|
29 |
+
detailed_type = node.get("detailed_type", type)
|
30 |
+
|
31 |
+
# Use node ID and type info for the tooltip
|
32 |
+
G.add_node(node['id'], title=f"{type}: {detailed_type}")
|
33 |
+
|
34 |
+
# Add edges with labels
|
35 |
+
for edge in json_data['edges']:
|
36 |
+
# Check if the required keys exist
|
37 |
+
if 'from' in edge and 'to' in edge:
|
38 |
+
label = edge.get('label', 'related')
|
39 |
+
G.add_edge(edge['from'], edge['to'], title=label, label=label)
|
40 |
+
|
41 |
+
# Create network visualization
|
42 |
+
network = Network(
|
43 |
+
width="100%",
|
44 |
+
height="100vh",
|
45 |
+
notebook=False,
|
46 |
+
bgcolor="#f8fafc",
|
47 |
+
font_color="#1e293b"
|
48 |
+
)
|
49 |
+
|
50 |
+
# Configure network display
|
51 |
+
network.from_nx(G)
|
52 |
+
|
53 |
+
# Customize node appearance
|
54 |
+
for node in network.nodes:
|
55 |
+
node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
|
56 |
+
node['font'] = {'size': 14, 'color': '#1e293b'}
|
57 |
+
node['shape'] = 'dot'
|
58 |
+
node['size'] = 20
|
59 |
+
|
60 |
+
# Customize edge appearance
|
61 |
+
for edge in network.edges:
|
62 |
+
edge['width'] = 4
|
63 |
+
edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
|
64 |
+
edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
|
65 |
+
|
66 |
+
# Save and display the network
|
67 |
+
filename_out = "knowledge_graph.html"
|
68 |
+
network.show(filename_out)
|
69 |
+
print(f"Knowledge graph saved to {filename_out}")
|
70 |
+
|
71 |
+
# Convert the graph to node-link data format
|
72 |
+
js_graph = nx.node_link_data(G)
|
73 |
+
js_data = rapidjson.loads(rapidjson.dumps(js_graph))
|
74 |
+
# print(js_data)
|
75 |
+
|
76 |
+
create_graph(js_data)
|
77 |
+
|
78 |
+
# # Create a Pyvis network
|
79 |
+
# network = Network(width="100%",
|
80 |
+
# height="100vh",
|
81 |
+
# notebook=True,
|
82 |
+
# bgcolor="#f8fafc",
|
83 |
+
# font_color="#1e293b")
|
84 |
+
|
85 |
+
# # Convert NetworkX graph to Pyvis network
|
86 |
+
# network.from_nx(G)
|
87 |
+
|
88 |
+
# # Add colors and title to nodes
|
89 |
+
# for node in network.nodes:
|
90 |
+
# if "description" in node:
|
91 |
+
# node["title"] = node["description"]
|
92 |
+
|
93 |
+
# node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
|
94 |
+
# node['font'] = {'size': 14, 'color': '#1e293b'}
|
95 |
+
# node['shape'] = 'dot'
|
96 |
+
# node['size'] = 20
|
97 |
+
|
98 |
+
# # Add title to edges
|
99 |
+
# for edge in network.edges:
|
100 |
+
# if "description" in edge:
|
101 |
+
# edge["title"] = edge["description"]
|
102 |
+
|
103 |
+
# edge['width'] = 4
|
104 |
+
# edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
|
105 |
+
# edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
|
106 |
+
|
107 |
+
# # Save and display the network
|
108 |
+
# filename_out = "knowledge_graph.html"
|
109 |
+
# network.show(filename_out)
|
110 |
+
# print(f"Knowledge graph saved to {filename_out}")
|