phi-knowledge-graph

Running on Zero

App Files Files

xet

Community

vietexob commited on Sep 6

Commit

1e4aac9

1 Parent(s): 364e329

Refactored the code

Browse files

Files changed (4) hide show

app.py +5 -7
data/sample3_en.txt +7 -0
data/sys_prompt.txt +25 -0
llm_graph.py +10 -32

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import rapidjson
 import gradio as gr
 import networkx as nx
-# from dotenv import load_dotenv
 from llm_graph import LLMGraph, MODEL_LIST
 from pyvis.network import Network
@@ -48,6 +47,10 @@ text_en_file2 = "./data/sample2_en.txt"
 with open(text_en_file2, 'r', encoding='utf-8') as file:
     text2_en = file.read()
 text_fr_file = "./data/sample_fr.txt"
 with open(text_fr_file, 'r', encoding='utf-8') as file:
     text_fr = file.read()
@@ -339,6 +342,7 @@ EXAMPLES = [
     [handle_text(text_fr)],
     [handle_text(text2_en)],
     [handle_text(text_es)],
 ]
 def generate_first_example():
@@ -416,12 +420,6 @@ def create_ui():
         with gr.Row():
             # Left panel - Input controls
             with gr.Column(scale=1):
-                # input_model = gr.Dropdown(
-                #     MODEL_LIST,
-                #     label="🤖 Select Model",
-                #     info="Choose a model to process your text",
-                #     value=MODEL_LIST[0] if MODEL_LIST else None,
-                # )
                 input_model = gr.Radio(
                     MODEL_LIST,
                     label="🤖 Select Model",

 import gradio as gr
 import networkx as nx
 from llm_graph import LLMGraph, MODEL_LIST
 from pyvis.network import Network
 with open(text_en_file2, 'r', encoding='utf-8') as file:
     text2_en = file.read()
+text_en_file3 = "./data/sample3_en.txt"
+with open(text_en_file3, 'r', encoding='utf-8') as file:
+    text3_en = file.read()
 text_fr_file = "./data/sample_fr.txt"
 with open(text_fr_file, 'r', encoding='utf-8') as file:
     text_fr = file.read()
     [handle_text(text_fr)],
     [handle_text(text2_en)],
     [handle_text(text_es)],
+    [handle_text(text3_en)]
 ]
 def generate_first_example():
         with gr.Row():
             # Left panel - Input controls
             with gr.Column(scale=1):
                 input_model = gr.Radio(
                     MODEL_LIST,
                     label="🤖 Select Model",

data/sample3_en.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+The small coffee shop on Maple Street had become an unlikely sanctuary for the neighborhood's most eccentric characters.
+Every Tuesday at precisely 2:47 PM, Mrs. Chen would arrive with her collection of vintage postcards, spreading them across
+table six while muttering corrections to the historical inaccuracies she'd discovered in travel documentaries. The barista,
+a philosophy student named Marcus, had learned to prepare her lavender latte without being asked, and had grown oddly fond
+of her lengthy monologues about the real story behind the Eiffel Tower's construction. Meanwhile, the jazz musician in the
+corner booth scribbled chord progressions on napkins, occasionally humming melodies that seemed to respond to the rhythmic
+hiss of the espresso machine. By closing time, the air hung thick with caffeine, dreams, and the comfortable weight of shared solitude.

data/sys_prompt.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
+The User provides text in the format:
+-------Text begin-------
+<User provided text>
+-------Text end-------
+The Assistant follows the following steps before replying to the User:
+1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
+"nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
+where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type>  is a very descriptive categorization of the entity.
+2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
+"edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
+The <entity N> must correspond to the "id" of an entity in the "nodes" list.
+The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
+The Assistant responds to the User in JSON only, according to the following JSON schema:
+{"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}

llm_graph.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import time
-# import shutil
 import numpy as np
 import networkx as nx
@@ -35,6 +35,11 @@ MODEL_LIST = [
   "OpenAI/GPT-4.1-mini",
 ]
 class LLMGraph:
     """
     A class to interact with LLMs for knowledge graph extraction.
@@ -106,35 +111,8 @@ class LLMGraph:
         Construct the message list for the chat model.
         """
-        context = dedent("""\n
-                    A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
-                    The User provides text in the format:
-                    -------Text begin-------
-                    <User provided text>
-                    -------Text end-------
-                    The Assistant follows the following steps before replying to the User:
-                    1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
-                    "nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
-                    where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type>  is a very descriptive categorization of the entity.
-                    2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
-                    "edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
-                    The <entity N> must correspond to the "id" of an entity in the "nodes" list.
-                    The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
-                    The Assistant responds to the User in JSON only, according to the following JSON schema:
-                    {"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
-                        """)
         user_message = dedent(f"""\n
                     -------Text begin-------
                     {text}
@@ -168,11 +146,11 @@ class LLMGraph:
         else:
             # Use LightRAG with Azure OpenAI
             self.rag.insert(text) # Insert the text into the RAG storage
             # Wait for GRAPHML_FILE to be created
             while not os.path.exists(GRAPHML_FILE):
-                time.sleep(0.1) # Sleep for 100ms before checking again
             # Extract dict format of the knowledge graph
             G = nx.read_graphml(GRAPHML_FILE)

 import os
 import time
 import numpy as np
 import networkx as nx
   "OpenAI/GPT-4.1-mini",
 ]
+# Read the system prompt
+sys_prompt_file = "./data/sys_prompt.txt"
+with open(sys_prompt_file, 'r', encoding='utf-8') as file:
+    sys_prompt = file.read()
 class LLMGraph:
     """
     A class to interact with LLMs for knowledge graph extraction.
         Construct the message list for the chat model.
         """
+        context = dedent(sys_prompt)
         user_message = dedent(f"""\n
                     -------Text begin-------
                     {text}
         else:
             # Use LightRAG with Azure OpenAI
             self.rag.insert(text) # Insert the text into the RAG storage
             # Wait for GRAPHML_FILE to be created
             while not os.path.exists(GRAPHML_FILE):
+                time.sleep(0.1) # Sleep for 0.1 seconds before checking again
             # Extract dict format of the knowledge graph
             G = nx.read_graphml(GRAPHML_FILE)