vietexob commited on
Commit
1e4aac9
·
1 Parent(s): 364e329

Refactored the code

Browse files
Files changed (4) hide show
  1. app.py +5 -7
  2. data/sample3_en.txt +7 -0
  3. data/sys_prompt.txt +25 -0
  4. llm_graph.py +10 -32
app.py CHANGED
@@ -12,7 +12,6 @@ import rapidjson
12
  import gradio as gr
13
  import networkx as nx
14
 
15
- # from dotenv import load_dotenv
16
  from llm_graph import LLMGraph, MODEL_LIST
17
 
18
  from pyvis.network import Network
@@ -48,6 +47,10 @@ text_en_file2 = "./data/sample2_en.txt"
48
  with open(text_en_file2, 'r', encoding='utf-8') as file:
49
  text2_en = file.read()
50
 
 
 
 
 
51
  text_fr_file = "./data/sample_fr.txt"
52
  with open(text_fr_file, 'r', encoding='utf-8') as file:
53
  text_fr = file.read()
@@ -339,6 +342,7 @@ EXAMPLES = [
339
  [handle_text(text_fr)],
340
  [handle_text(text2_en)],
341
  [handle_text(text_es)],
 
342
  ]
343
 
344
  def generate_first_example():
@@ -416,12 +420,6 @@ def create_ui():
416
  with gr.Row():
417
  # Left panel - Input controls
418
  with gr.Column(scale=1):
419
- # input_model = gr.Dropdown(
420
- # MODEL_LIST,
421
- # label="🤖 Select Model",
422
- # info="Choose a model to process your text",
423
- # value=MODEL_LIST[0] if MODEL_LIST else None,
424
- # )
425
  input_model = gr.Radio(
426
  MODEL_LIST,
427
  label="🤖 Select Model",
 
12
  import gradio as gr
13
  import networkx as nx
14
 
 
15
  from llm_graph import LLMGraph, MODEL_LIST
16
 
17
  from pyvis.network import Network
 
47
  with open(text_en_file2, 'r', encoding='utf-8') as file:
48
  text2_en = file.read()
49
 
50
+ text_en_file3 = "./data/sample3_en.txt"
51
+ with open(text_en_file3, 'r', encoding='utf-8') as file:
52
+ text3_en = file.read()
53
+
54
  text_fr_file = "./data/sample_fr.txt"
55
  with open(text_fr_file, 'r', encoding='utf-8') as file:
56
  text_fr = file.read()
 
342
  [handle_text(text_fr)],
343
  [handle_text(text2_en)],
344
  [handle_text(text_es)],
345
+ [handle_text(text3_en)]
346
  ]
347
 
348
  def generate_first_example():
 
420
  with gr.Row():
421
  # Left panel - Input controls
422
  with gr.Column(scale=1):
 
 
 
 
 
 
423
  input_model = gr.Radio(
424
  MODEL_LIST,
425
  label="🤖 Select Model",
data/sample3_en.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ The small coffee shop on Maple Street had become an unlikely sanctuary for the neighborhood's most eccentric characters.
2
+ Every Tuesday at precisely 2:47 PM, Mrs. Chen would arrive with her collection of vintage postcards, spreading them across
3
+ table six while muttering corrections to the historical inaccuracies she'd discovered in travel documentaries. The barista,
4
+ a philosophy student named Marcus, had learned to prepare her lavender latte without being asked, and had grown oddly fond
5
+ of her lengthy monologues about the real story behind the Eiffel Tower's construction. Meanwhile, the jazz musician in the
6
+ corner booth scribbled chord progressions on napkins, occasionally humming melodies that seemed to respond to the rhythmic
7
+ hiss of the espresso machine. By closing time, the air hung thick with caffeine, dreams, and the comfortable weight of shared solitude.
data/sys_prompt.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
2
+ The User provides text in the format:
3
+
4
+ -------Text begin-------
5
+ <User provided text>
6
+ -------Text end-------
7
+
8
+ The Assistant follows the following steps before replying to the User:
9
+
10
+ 1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
11
+
12
+ "nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
13
+
14
+ where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
15
+
16
+ 2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
17
+
18
+ "edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
19
+
20
+ The <entity N> must correspond to the "id" of an entity in the "nodes" list.
21
+
22
+ The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
23
+ The Assistant responds to the User in JSON only, according to the following JSON schema:
24
+
25
+ {"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
llm_graph.py CHANGED
@@ -1,6 +1,6 @@
1
  import os
2
  import time
3
- # import shutil
4
  import numpy as np
5
  import networkx as nx
6
 
@@ -35,6 +35,11 @@ MODEL_LIST = [
35
  "OpenAI/GPT-4.1-mini",
36
  ]
37
 
 
 
 
 
 
38
  class LLMGraph:
39
  """
40
  A class to interact with LLMs for knowledge graph extraction.
@@ -106,35 +111,8 @@ class LLMGraph:
106
  Construct the message list for the chat model.
107
  """
108
 
109
- context = dedent("""\n
110
- A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
111
-
112
- The User provides text in the format:
113
-
114
- -------Text begin-------
115
- <User provided text>
116
- -------Text end-------
117
-
118
- The Assistant follows the following steps before replying to the User:
119
-
120
- 1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
121
-
122
- "nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
123
-
124
- where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
125
 
126
- 2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
127
-
128
- "edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
129
-
130
- The <entity N> must correspond to the "id" of an entity in the "nodes" list.
131
-
132
- The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
133
- The Assistant responds to the User in JSON only, according to the following JSON schema:
134
-
135
- {"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
136
- """)
137
-
138
  user_message = dedent(f"""\n
139
  -------Text begin-------
140
  {text}
@@ -168,11 +146,11 @@ class LLMGraph:
168
  else:
169
  # Use LightRAG with Azure OpenAI
170
  self.rag.insert(text) # Insert the text into the RAG storage
171
-
172
  # Wait for GRAPHML_FILE to be created
173
  while not os.path.exists(GRAPHML_FILE):
174
- time.sleep(0.1) # Sleep for 100ms before checking again
175
-
176
  # Extract dict format of the knowledge graph
177
  G = nx.read_graphml(GRAPHML_FILE)
178
 
 
1
  import os
2
  import time
3
+
4
  import numpy as np
5
  import networkx as nx
6
 
 
35
  "OpenAI/GPT-4.1-mini",
36
  ]
37
 
38
+ # Read the system prompt
39
+ sys_prompt_file = "./data/sys_prompt.txt"
40
+ with open(sys_prompt_file, 'r', encoding='utf-8') as file:
41
+ sys_prompt = file.read()
42
+
43
  class LLMGraph:
44
  """
45
  A class to interact with LLMs for knowledge graph extraction.
 
111
  Construct the message list for the chat model.
112
  """
113
 
114
+ context = dedent(sys_prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  user_message = dedent(f"""\n
117
  -------Text begin-------
118
  {text}
 
146
  else:
147
  # Use LightRAG with Azure OpenAI
148
  self.rag.insert(text) # Insert the text into the RAG storage
149
+
150
  # Wait for GRAPHML_FILE to be created
151
  while not os.path.exists(GRAPHML_FILE):
152
+ time.sleep(0.1) # Sleep for 0.1 seconds before checking again
153
+
154
  # Extract dict format of the knowledge graph
155
  G = nx.read_graphml(GRAPHML_FILE)
156