phi-knowledge-graph

Running on Zero

App Files Files

xet

Community

vietexob commited on Aug 30

Commit

110ce02

1 Parent(s): 5bfc72c

Added LightRAG KG

Browse files

Files changed (4) hide show

app.py +36 -23
knowledge_graph.html +155 -0
llm_graph.py +61 -62
visualize.py +110 -0

app.py CHANGED Viewed

@@ -1,21 +1,23 @@
-# import spaces
 import os
 import spacy
 import pickle
 import random
 import logging
-import rapidjson
 import asyncio
 import gradio as gr
 import networkx as nx
 from llm_graph import LLMGraph, MODEL_LIST
 from pyvis.network import Network
 from spacy import displacy
 from spacy.tokens import Span
 logging.basicConfig(level=logging.INFO)
 # Constants
 TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
@@ -35,6 +37,9 @@ EXAMPLE_CACHE_FILE = os.path.join(CACHE_DIR, "first_example_cache.pkl")
 # Create cache directory if it doesn't exist
 os.makedirs(CACHE_DIR, exist_ok=True)
 def get_random_light_color():
     """
     Color utilities
@@ -57,19 +62,17 @@ def handle_text(text=""):
     return " ".join(text.split())
-# @spaces.GPU
-async def extract_kg(text="", model=None):
     """
     Extract knowledge graph from text
     """
     # Catch empty text
-    if not text or not model:
         raise gr.Error("⚠️ Both text and model must be provided!")
     try:
-        model_instance = LLMGraph(model=model)
-        result = await model_instance.extract(text)
         return rapidjson.loads(result)
     except Exception as e:
         raise gr.Error(f"❌ Extraction error: {str(e)}")
@@ -223,17 +226,19 @@ def create_graph(json_data):
         allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
         allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
-async def process_and_visualize(text, model, progress=gr.Progress()):
     """
     Process text and visualize knowledge graph and entities
     """
-    if not text or not model:
         raise gr.Error("⚠️ Both text and model must be provided!")
     # Check if we're processing the first example for caching
     is_first_example = text == EXAMPLES[0][0]
     # Try to load from cache if it's the first example
     if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
         try:
@@ -249,7 +254,7 @@ async def process_and_visualize(text, model, progress=gr.Progress()):
     # Continue with normal processing if cache fails
     progress(0, desc="Starting extraction...")
-    json_data = await extract_kg(text, model)
     progress(0.5, desc="Creating entity visualization...")
     entities_viz = create_custom_entity_viz(json_data, text)
@@ -301,7 +306,7 @@ EXAMPLES = [
                  les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
 ]
-async def generate_first_example_cache():
     """
     Generate cache for the first example if it doesn't exist when the app starts
     """
@@ -312,10 +317,10 @@ async def generate_first_example_cache():
         try:
             text = EXAMPLES[0][0]
-            model = MODEL_LIST[0] if MODEL_LIST else None
             # Extract data
-            json_data = await extract_kg(text, model)
             entities_viz = create_custom_entity_viz(json_data, text)
             graph_html = create_graph(json_data)
@@ -360,7 +365,7 @@ def create_ui():
     """
     # Try to generate/load the first example cache
-    first_example_cache = asyncio.run(generate_first_example_cache())
     with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
         # Header
@@ -430,14 +435,14 @@ def create_ui():
         )
         # Set initial values from cache if available
-        if first_example_cache:
             # Use this to set initial values when the app loads
             demo.load(
                 lambda: [
-                    first_example_cache["graph_html"],
-                    first_example_cache["entities_viz"],
-                    first_example_cache["json_data"],
-                    first_example_cache["stats"]
                 ],
                 inputs=None,
                 outputs=[output_graph, output_entity_viz, output_json, stats_output]
@@ -450,5 +455,13 @@ def create_ui():
     return demo
-demo = create_ui()
-demo.launch(share=False)

 import os
 import spacy
 import pickle
 import random
 import logging
 import asyncio
+import rapidjson
 import gradio as gr
 import networkx as nx
+# from dotenv import load_dotenv
 from llm_graph import LLMGraph, MODEL_LIST
 from pyvis.network import Network
 from spacy import displacy
 from spacy.tokens import Span
 logging.basicConfig(level=logging.INFO)
+# load_dotenv()
 # Constants
 TITLE = "🌐 Text2Graph: Extract Knowledge Graphs from Natural Language"
 # Create cache directory if it doesn't exist
 os.makedirs(CACHE_DIR, exist_ok=True)
+# Initialize the LLMGraph model
+model = LLMGraph()
 def get_random_light_color():
     """
     Color utilities
     return " ".join(text.split())
+def extract_kg(text="", model_name=None):
     """
     Extract knowledge graph from text
     """
     # Catch empty text
+    if not text or not model_name:
         raise gr.Error("⚠️ Both text and model must be provided!")
     try:
+        result = model.extract(text, model_name)
         return rapidjson.loads(result)
     except Exception as e:
         raise gr.Error(f"❌ Extraction error: {str(e)}")
         allow-top-navigation-by-user-activation allow-downloads" allowfullscreen=""
         allowpaymentrequest="" frameborder="0" srcdoc='{html}'></iframe>"""
+def process_and_visualize(text, model_name, progress=gr.Progress()):
     """
     Process text and visualize knowledge graph and entities
     """
+    if not text or not model_name:
         raise gr.Error("⚠️ Both text and model must be provided!")
     # Check if we're processing the first example for caching
     is_first_example = text == EXAMPLES[0][0]
+    asyncio.run(model.initialize_rag())  # Ensure RAG is initialized
     # Try to load from cache if it's the first example
     if is_first_example and os.path.exists(EXAMPLE_CACHE_FILE):
         try:
     # Continue with normal processing if cache fails
     progress(0, desc="Starting extraction...")
+    json_data = extract_kg(text, model_name)
     progress(0.5, desc="Creating entity visualization...")
     entities_viz = create_custom_entity_viz(json_data, text)
                  les buis et à arroser les rosiers, perpétuant ainsi une tradition d'excellence horticole qui fait la fierté de la capitale française.""")],
 ]
+def generate_first_example():
     """
     Generate cache for the first example if it doesn't exist when the app starts
     """
         try:
             text = EXAMPLES[0][0]
+            model_name = MODEL_LIST[0] if MODEL_LIST else None
             # Extract data
+            json_data = extract_kg(text, model_name)
             entities_viz = create_custom_entity_viz(json_data, text)
             graph_html = create_graph(json_data)
     """
     # Try to generate/load the first example cache
+    first_example = generate_first_example()
     with gr.Blocks(css=CUSTOM_CSS, title=TITLE) as demo:
         # Header
         )
         # Set initial values from cache if available
+        if first_example:
             # Use this to set initial values when the app loads
             demo.load(
                 lambda: [
+                    first_example["graph_html"],
+                    first_example["entities_viz"],
+                    first_example["json_data"],
+                    first_example["stats"]
                 ],
                 inputs=None,
                 outputs=[output_graph, output_entity_viz, output_json, stats_output]
     return demo
+def main():
+    """
+    Main function to run the Gradio app
+    """
+    demo = create_ui()
+    demo.launch(share=False)
+if __name__ == "__main__":
+    main()

knowledge_graph.html ADDED Viewed

	@@ -0,0 +1,155 @@

+<html>
+    <head>
+        <meta charset="utf-8">
+            <script src="lib/bindings/utils.js"></script>
+            <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/dist/vis-network.min.css" integrity="sha512-WgxfT5LWjfszlPHXRmBWHkV2eceiWTOBvrKCNbdgDYTHrT2AeLCGbF4sZlZw3UMN3WtL0tGUoIAKsu8mllg/XA==" crossorigin="anonymous" referrerpolicy="no-referrer" />
+            <script src="https://cdnjs.cloudflare.com/ajax/libs/vis-network/9.1.2/dist/vis-network.min.js" integrity="sha512-LnvoEWDFrqGHlHmDD2101OrLcbsfkrzoSpvtSQtxK3RMnRV0eOkhhBN2dXHKRrUU8p2DGRTk35n4O8nWSVe1mQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
+<center>
+<h1></h1>
+</center>
+<!-- <link rel="stylesheet" href="../node_modules/vis/dist/vis.min.css" type="text/css" />
+<script type="text/javascript" src="../node_modules/vis/dist/vis.js"> </script>-->
+        <link
+          href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/css/bootstrap.min.css"
+          rel="stylesheet"
+          integrity="sha384-eOJMYsd53ii+scO/bJGFsiCZc+5NDVN2yr8+0RDqr0Ql0h+rP48ckxlpbzKgwra6"
+          crossorigin="anonymous"
+        />
+        <script
+          src="https://cdn.jsdelivr.net/npm/bootstrap@5.0.0-beta3/dist/js/bootstrap.bundle.min.js"
+          integrity="sha384-JEW9xMcG8R+pH31jmWH6WWP0WintQrMb4s7ZOdauHnUtxwoG2vI5DkLtS3qm9Ekf"
+          crossorigin="anonymous"
+        ></script>
+        <center>
+          <h1></h1>
+        </center>
+        <style type="text/css">
+             #mynetwork {
+                 width: 100%;
+                 height: 100vh;
+                 background-color: #f8fafc;
+                 border: 1px solid lightgray;
+                 position: relative;
+                 float: left;
+             }
+        </style>
+    </head>
+    <body>
+        <div class="card" style="width: 100%">
+            <div id="mynetwork" class="card-body"></div>
+        </div>
+        <script type="text/javascript">
+              // initialize global variables.
+              var edges;
+              var nodes;
+              var allNodes;
+              var allEdges;
+              var nodeColors;
+              var originalNodes;
+              var network;
+              var container;
+              var options, data;
+              var filter = {
+                  item : '',
+                  property : '',
+                  value : []
+              };
+              // This method is responsible for drawing the graph, returns the drawn network
+              function drawGraph() {
+                  var container = document.getElementById('mynetwork');
+                  // parsing and collecting nodes and edges from the python
+                  nodes = new vis.DataSet([{"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring.", "entity_id": "Aerosmith", "entity_type": "organization", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Aerosmith", "label": "Aerosmith", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith is a legendary rock band that has been active for 54 years and has officially announced their retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring.", "entity_id": "Steven Tyler", "entity_type": "person", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Steven Tyler", "label": "Steven Tyler", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith who suffered an unrecoverable vocal cord injury, leading to the band\u0027s retirement from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring.", "entity_id": "Vocal Cord Injury", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Vocal Cord Injury", "label": "Vocal Cord Injury", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Vocal cord injury refers to the unrecoverable injury suffered by Steven Tyler that caused Aerosmith to retire from touring."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury.", "entity_id": "Retirement from Touring", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Retirement from Touring", "label": "Retirement from Touring", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Retirement from touring is the event announced by Aerosmith after 54 years, prompted by Steven Tyler\u0027s vocal cord injury."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "September 2023 is the time when Steven Tyler suffered a fractured larynx.", "entity_id": "September 2023", "entity_type": "event", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "September 2023", "label": "September 2023", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "September 2023 is the time when Steven Tyler suffered a fractured larynx."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated.", "entity_id": "Fractured Larynx", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Fractured Larynx", "label": "Fractured Larynx", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Fractured larynx is the specific injury Steven Tyler suffered in September 2023, which was unsuccessfully treated."}, {"color": {"background": "#e0e7ff", "border": "#6366f1", "highlight": {"background": "#c7d2fe", "border": "#4f46e5"}}, "created_at": 1756577670, "description": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery.", "entity_id": "Unsuccessful Treatment", "entity_type": "category", "file_path": "unknown_source", "font": {"color": "#1e293b", "size": 14}, "id": "Unsuccessful Treatment", "label": "Unsuccessful Treatment", "shape": "dot", "size": 20, "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Unsuccessful treatment refers to the medical efforts to heal Steven Tyler\u0027s fractured larynx that did not result in recovery."}]);
+                  edges = new vis.DataSet([{"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577684, "description": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band membership,cause of retirement", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler is the lead singer of Aerosmith, whose vocal injury led to the band\u0027s retirement from touring.", "to": "Steven Tyler", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "cause of retirement,health impact", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith\u0027s retirement from touring is due to Steven Tyler\u0027s unrecoverable vocal cord injury.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "Aerosmith officially announced their retirement from touring after 54 years.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Aerosmith", "keywords": "band decision,career milestone", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Aerosmith officially announced their retirement from touring after 54 years.", "to": "Retirement from Touring", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577685, "description": "Steven Tyler suffered a fractured larynx in September 2023.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury timing,medical event", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler suffered a fractured larynx in September 2023.", "to": "September 2023", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577687, "description": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "cause-effect,medical condition", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is the medical condition affecting Steven Tyler that caused Aerosmith\u0027s retirement.", "to": "Vocal Cord Injury", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577690, "description": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "injury detail,medical diagnosis", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The fractured larynx is the specific injury Steven Tyler suffered, leading to unsuccessful treatment and vocal cord damage.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Steven Tyler", "keywords": "health outcome,medical treatment", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "Steven Tyler underwent months of unsuccessful treatment for his fractured larynx.", "to": "Unsuccessful Treatment", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577691, "description": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Vocal Cord Injury", "keywords": "injury relationship,medical causation", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The vocal cord injury is a result of the fractured larynx suffered by Steven Tyler.", "to": "Fractured Larynx", "width": 4}, {"color": {"color": "#6366f1", "highlight": "#4f46e5"}, "created_at": 1756577693, "description": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "file_path": "unknown_source", "font": {"color": "#4b5563", "face": "Arial", "size": 12}, "from": "Fractured Larynx", "keywords": "injury focus,medical intervention", "source_id": "chunk-150cfba3862e116efcee671d872955be", "title": "The unsuccessful treatment was aimed at healing the fractured larynx suffered by Steven Tyler.", "to": "Unsuccessful Treatment", "width": 4}]);
+                  nodeColors = {};
+                  allNodes = nodes.get({ returnType: "Object" });
+                  for (nodeId in allNodes) {
+                    nodeColors[nodeId] = allNodes[nodeId].color;
+                  }
+                  allEdges = edges.get({ returnType: "Object" });
+                  // adding nodes and edges to the graph
+                  data = {nodes: nodes, edges: edges};
+                  var options = {
+    "configure": {
+        "enabled": false
+    },
+    "edges": {
+        "color": {
+            "inherit": true
+        },
+        "smooth": {
+            "enabled": true,
+            "type": "dynamic"
+        }
+    },
+    "interaction": {
+        "dragNodes": true,
+        "hideEdgesOnDrag": false,
+        "hideNodesOnDrag": false
+    },
+    "physics": {
+        "enabled": true,
+        "stabilization": {
+            "enabled": true,
+            "fit": true,
+            "iterations": 1000,
+            "onlyDynamicEdges": false,
+            "updateInterval": 50
+        }
+    }
+};
+                  network = new vis.Network(container, data, options);
+                  return network;
+              }
+              drawGraph();
+        </script>
+    </body>
+</html>

llm_graph.py CHANGED Viewed

@@ -28,8 +28,8 @@ AZURE_EMBEDDING_API_VERSION = os.environ["AZURE_EMBEDDING_API_VERSION"]
 WORKING_DIR = "./cache"
 MODEL_LIST = [
-  "OpenAI/GPT-4.1-mini",
   "EmergentMethods/Phi-3-mini-128k-instruct-graph",
 ]
 class LLMGraph:
@@ -37,68 +37,52 @@ class LLMGraph:
     A class to interact with LLMs for knowledge graph extraction.
     """
-    async def _initialize_rag(self, embedding_dimension=3072):
         """
         Initialize the LightRAG instance with the specified embedding dimension.
         """
-        rag = LightRAG(
-            working_dir=WORKING_DIR,
-            llm_model_func=self._llm_model_func,
-            embedding_func=EmbeddingFunc(
-                embedding_dim=embedding_dimension,
-                max_token_size=8192,
-                func=self._embedding_func,
-            ),
-        )
-        await rag.initialize_storages()
-        await initialize_pipeline_status()
-        return rag
-    async def _get_rag(self):
-        """
-        Get or initialize the RAG instance (lazy loading).
-        """
-        if self.rag is None:
-            self.rag = await self._initialize_rag()
-        return self.rag
-    def __init__(self, model="OpenAI/GPT-4.1-mini"):
         """
         Initialize the Phi3InstructGraph with a specified model.
         """
-        if model not in MODEL_LIST:
-            raise ValueError(f"Model must be one of {MODEL_LIST}")
-        self.model_name = model
-        if model == MODEL_LIST[0]:
-            # Use Azure OpenAI for GPT-4.1-mini
-            self.llm_client = AzureOpenAI(
-                api_key=AZURE_OPENAI_API_KEY,
-                api_version=AZURE_OPENAI_API_VERSION,
-                azure_endpoint=AZURE_OPENAI_ENDPOINT,
-            )
-            self.emb_client = AzureOpenAI(
-                api_key=AZURE_OPENAI_API_KEY,
-                api_version=AZURE_EMBEDDING_API_VERSION,
-                azure_endpoint=AZURE_OPENAI_ENDPOINT,
-            )
-            self.rag = None  # Initialize as None for lazy loading
-        else:
-            # Use Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
-            self.hf_client = InferenceClient(
-                model=endpoint_url,
-                token=api_token
-            )
     def _generate(self, messages):
         """
         Generate a response from the model based on the provided messages.
@@ -167,22 +151,22 @@ class LLMGraph:
         return messages
-    async def extract(self, text):
         """
         Extract knowledge graph from text
         """
-        generated_text = ""
-        if self.model_name == MODEL_LIST[0]:
-            # Use LightRAG with Azure OpenAI
-            rag = await self._get_rag()
-            rag.insert(text)
-        else:
             # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
             messages = self._get_messages(text)
             generated_text = self._generate(messages)
         return generated_text
     async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
@@ -190,17 +174,20 @@ class LLMGraph:
         Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
         """
-        messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
         if history_messages:
             messages.extend(history_messages)
         messages.append({"role": "user", "content": prompt})
-        chat_completion = self.llm_client.chat.completions.create(
             model=AZURE_OPENAI_DEPLOYMENT,
             messages=messages,
             temperature=kwargs.get("temperature", 0),
@@ -215,7 +202,19 @@ class LLMGraph:
         Call the Azure OpenAI embeddings endpoint with the given texts.
         """
-        embedding = self.emb_client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
         embeddings = [item.embedding for item in embedding.data]
         return np.array(embeddings)

 WORKING_DIR = "./cache"
 MODEL_LIST = [
   "EmergentMethods/Phi-3-mini-128k-instruct-graph",
+  "OpenAI/GPT-4.1-mini",
 ]
 class LLMGraph:
     A class to interact with LLMs for knowledge graph extraction.
     """
+    async def initialize_rag(self, embedding_dimension=3072):
         """
         Initialize the LightRAG instance with the specified embedding dimension.
         """
+        if self.rag is None:
+            self.rag = LightRAG(
+                working_dir=WORKING_DIR,
+                llm_model_func=self._llm_model_func,
+                embedding_func=EmbeddingFunc(
+                    embedding_dim=embedding_dimension,
+                    max_token_size=8192,
+                    func=self._embedding_func,
+                ),
+            )
+            await self.rag.initialize_storages()
+            await initialize_pipeline_status()
+    # async def test_responses(self):
+    #     """
+    #     Test the LLM and embedding functions.
+    #     """
+    #     result = await self._llm_model_func("How are you?")
+    #     print("Response from llm_model_func: ", result)
+    #     result = await self._embedding_func(["How are you?"])
+    #     print("Result of embedding_func: ", result.shape)
+    #     print("Dimension of embedding: ", result.shape[1])
+    #     return True
+    def __init__(self):
         """
         Initialize the Phi3InstructGraph with a specified model.
         """
+        # Hugging Face Inference API for Phi-3-mini-128k-instruct-graph
+        self.hf_client = InferenceClient(
+            model=endpoint_url,
+            token=api_token
+        )
+        self.rag = None # Lazy loading of RAG instance
     def _generate(self, messages):
         """
         Generate a response from the model based on the provided messages.
         return messages
+    def extract(self, text, model_name=MODEL_LIST[0]) -> str:
         """
         Extract knowledge graph from text
         """
+        generated_text = "This is a placeholder response."
+        if model_name == MODEL_LIST[0]:
             # Use Hugging Face Inference API with Phi-3-mini-128k-instruct-graph
             messages = self._get_messages(text)
             generated_text = self._generate(messages)
+        else:
+            # Use LightRAG with Azure OpenAI
+            self.rag.insert(text) # Insert the text into the RAG storage
+            # TODO: Extract JSON format of the knowledge graph
         return generated_text
     async def _llm_model_func(self, prompt, system_prompt=None, history_messages=[], **kwargs) -> str:
         Call the Azure OpenAI chat completion endpoint with the given prompt and optional system prompt and history messages.
         """
+        llm_client = AzureOpenAI(
+            api_key=AZURE_OPENAI_API_KEY,
+            api_version=AZURE_OPENAI_API_VERSION,
+            azure_endpoint=AZURE_OPENAI_ENDPOINT,
+        )
+        messages = []
         if system_prompt:
             messages.append({"role": "system", "content": system_prompt})
         if history_messages:
             messages.extend(history_messages)
         messages.append({"role": "user", "content": prompt})
+        chat_completion = llm_client.chat.completions.create(
             model=AZURE_OPENAI_DEPLOYMENT,
             messages=messages,
             temperature=kwargs.get("temperature", 0),
         Call the Azure OpenAI embeddings endpoint with the given texts.
         """
+        emb_client = AzureOpenAI(
+            api_key=AZURE_OPENAI_API_KEY,
+            api_version=AZURE_EMBEDDING_API_VERSION,
+            azure_endpoint=AZURE_OPENAI_ENDPOINT,
+        )
+        embedding = emb_client.embeddings.create(model=AZURE_EMBEDDING_DEPLOYMENT, input=texts)
         embeddings = [item.embedding for item in embedding.data]
         return np.array(embeddings)
+# if __name__ == "__main__":
+#     # Initialize the LLMGraph model
+#     model = LLMGraph()
+#     asyncio.run(model.initialize_rag())  # Ensure RAG is initialized
+#     print("LLMGraph model initialized.")

visualize.py ADDED Viewed

	@@ -0,0 +1,110 @@

+import networkx as nx
+import rapidjson
+import warnings
+import os
+from pyvis.network import Network
+warnings.filterwarnings("ignore")
+# Load the GraphML file
+file_path = "./cache/graph_chunk_entity_relation.graphml"
+assert os.path.exists(file_path), f"File {file_path} does not exist."
+G = nx.read_graphml(file_path)
+def create_graph(json_data):
+    """
+    Create interactive knowledge graph using pyvis
+    """
+    G = nx.Graph()
+    # Add nodes with tooltips and error handling for missing keys
+    for node in json_data['nodes']:
+        # Get node type with fallback
+        type = node.get("type", "Entity")
+        # Get detailed type with fallback
+        detailed_type = node.get("detailed_type", type)
+        # Use node ID and type info for the tooltip
+        G.add_node(node['id'], title=f"{type}: {detailed_type}")
+    # Add edges with labels
+    for edge in json_data['edges']:
+        # Check if the required keys exist
+        if 'from' in edge and 'to' in edge:
+            label = edge.get('label', 'related')
+            G.add_edge(edge['from'], edge['to'], title=label, label=label)
+    # Create network visualization
+    network = Network(
+        width="100%",
+        height="100vh",
+        notebook=False,
+        bgcolor="#f8fafc",
+        font_color="#1e293b"
+    )
+    # Configure network display
+    network.from_nx(G)
+    # Customize node appearance
+    for node in network.nodes:
+        node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
+        node['font'] = {'size': 14, 'color': '#1e293b'}
+        node['shape'] = 'dot'
+        node['size'] = 20
+    # Customize edge appearance
+    for edge in network.edges:
+        edge['width'] = 4
+        edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
+        edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
+    # Save and display the network
+    filename_out = "knowledge_graph.html"
+    network.show(filename_out)
+    print(f"Knowledge graph saved to {filename_out}")
+# Convert the graph to node-link data format
+js_graph = nx.node_link_data(G)
+js_data = rapidjson.loads(rapidjson.dumps(js_graph))
+# print(js_data)
+create_graph(js_data)
+# # Create a Pyvis network
+# network = Network(width="100%",
+#                   height="100vh",
+#                   notebook=True,
+#                   bgcolor="#f8fafc",
+#                   font_color="#1e293b")
+# # Convert NetworkX graph to Pyvis network
+# network.from_nx(G)
+# # Add colors and title to nodes
+# for node in network.nodes:
+#     if "description" in node:
+#         node["title"] = node["description"]
+#     node['color'] = {'background': '#e0e7ff', 'border': '#6366f1', 'highlight': {'background': '#c7d2fe', 'border': '#4f46e5'}}
+#     node['font'] = {'size': 14, 'color': '#1e293b'}
+#     node['shape'] = 'dot'
+#     node['size'] = 20
+# # Add title to edges
+# for edge in network.edges:
+#     if "description" in edge:
+#         edge["title"] = edge["description"]
+#     edge['width'] = 4
+#     edge['color'] = {'color': '#6366f1', 'highlight': '#4f46e5'}
+#     edge['font'] = {'size': 12, 'color': '#4b5563', 'face': 'Arial'}
+# # Save and display the network
+# filename_out = "knowledge_graph.html"
+# network.show(filename_out)
+# print(f"Knowledge graph saved to {filename_out}")