Spaces:
Running
on
Zero
Running
on
Zero
Refactored the code
Browse files- app.py +5 -7
- data/sample3_en.txt +7 -0
- data/sys_prompt.txt +25 -0
- llm_graph.py +10 -32
app.py
CHANGED
@@ -12,7 +12,6 @@ import rapidjson
|
|
12 |
import gradio as gr
|
13 |
import networkx as nx
|
14 |
|
15 |
-
# from dotenv import load_dotenv
|
16 |
from llm_graph import LLMGraph, MODEL_LIST
|
17 |
|
18 |
from pyvis.network import Network
|
@@ -48,6 +47,10 @@ text_en_file2 = "./data/sample2_en.txt"
|
|
48 |
with open(text_en_file2, 'r', encoding='utf-8') as file:
|
49 |
text2_en = file.read()
|
50 |
|
|
|
|
|
|
|
|
|
51 |
text_fr_file = "./data/sample_fr.txt"
|
52 |
with open(text_fr_file, 'r', encoding='utf-8') as file:
|
53 |
text_fr = file.read()
|
@@ -339,6 +342,7 @@ EXAMPLES = [
|
|
339 |
[handle_text(text_fr)],
|
340 |
[handle_text(text2_en)],
|
341 |
[handle_text(text_es)],
|
|
|
342 |
]
|
343 |
|
344 |
def generate_first_example():
|
@@ -416,12 +420,6 @@ def create_ui():
|
|
416 |
with gr.Row():
|
417 |
# Left panel - Input controls
|
418 |
with gr.Column(scale=1):
|
419 |
-
# input_model = gr.Dropdown(
|
420 |
-
# MODEL_LIST,
|
421 |
-
# label="🤖 Select Model",
|
422 |
-
# info="Choose a model to process your text",
|
423 |
-
# value=MODEL_LIST[0] if MODEL_LIST else None,
|
424 |
-
# )
|
425 |
input_model = gr.Radio(
|
426 |
MODEL_LIST,
|
427 |
label="🤖 Select Model",
|
|
|
12 |
import gradio as gr
|
13 |
import networkx as nx
|
14 |
|
|
|
15 |
from llm_graph import LLMGraph, MODEL_LIST
|
16 |
|
17 |
from pyvis.network import Network
|
|
|
47 |
with open(text_en_file2, 'r', encoding='utf-8') as file:
|
48 |
text2_en = file.read()
|
49 |
|
50 |
+
text_en_file3 = "./data/sample3_en.txt"
|
51 |
+
with open(text_en_file3, 'r', encoding='utf-8') as file:
|
52 |
+
text3_en = file.read()
|
53 |
+
|
54 |
text_fr_file = "./data/sample_fr.txt"
|
55 |
with open(text_fr_file, 'r', encoding='utf-8') as file:
|
56 |
text_fr = file.read()
|
|
|
342 |
[handle_text(text_fr)],
|
343 |
[handle_text(text2_en)],
|
344 |
[handle_text(text_es)],
|
345 |
+
[handle_text(text3_en)]
|
346 |
]
|
347 |
|
348 |
def generate_first_example():
|
|
|
420 |
with gr.Row():
|
421 |
# Left panel - Input controls
|
422 |
with gr.Column(scale=1):
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
input_model = gr.Radio(
|
424 |
MODEL_LIST,
|
425 |
label="🤖 Select Model",
|
data/sample3_en.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
The small coffee shop on Maple Street had become an unlikely sanctuary for the neighborhood's most eccentric characters.
|
2 |
+
Every Tuesday at precisely 2:47 PM, Mrs. Chen would arrive with her collection of vintage postcards, spreading them across
|
3 |
+
table six while muttering corrections to the historical inaccuracies she'd discovered in travel documentaries. The barista,
|
4 |
+
a philosophy student named Marcus, had learned to prepare her lavender latte without being asked, and had grown oddly fond
|
5 |
+
of her lengthy monologues about the real story behind the Eiffel Tower's construction. Meanwhile, the jazz musician in the
|
6 |
+
corner booth scribbled chord progressions on napkins, occasionally humming melodies that seemed to respond to the rhythmic
|
7 |
+
hiss of the espresso machine. By closing time, the air hung thick with caffeine, dreams, and the comfortable weight of shared solitude.
|
data/sys_prompt.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
|
2 |
+
The User provides text in the format:
|
3 |
+
|
4 |
+
-------Text begin-------
|
5 |
+
<User provided text>
|
6 |
+
-------Text end-------
|
7 |
+
|
8 |
+
The Assistant follows the following steps before replying to the User:
|
9 |
+
|
10 |
+
1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
|
11 |
+
|
12 |
+
"nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
|
13 |
+
|
14 |
+
where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
|
15 |
+
|
16 |
+
2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
|
17 |
+
|
18 |
+
"edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
|
19 |
+
|
20 |
+
The <entity N> must correspond to the "id" of an entity in the "nodes" list.
|
21 |
+
|
22 |
+
The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
|
23 |
+
The Assistant responds to the User in JSON only, according to the following JSON schema:
|
24 |
+
|
25 |
+
{"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
|
llm_graph.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
import time
|
3 |
-
|
4 |
import numpy as np
|
5 |
import networkx as nx
|
6 |
|
@@ -35,6 +35,11 @@ MODEL_LIST = [
|
|
35 |
"OpenAI/GPT-4.1-mini",
|
36 |
]
|
37 |
|
|
|
|
|
|
|
|
|
|
|
38 |
class LLMGraph:
|
39 |
"""
|
40 |
A class to interact with LLMs for knowledge graph extraction.
|
@@ -106,35 +111,8 @@ class LLMGraph:
|
|
106 |
Construct the message list for the chat model.
|
107 |
"""
|
108 |
|
109 |
-
context = dedent(
|
110 |
-
A chat between a curious user and an artificial intelligence Assistant. The Assistant is an expert at identifying entities and relationships in text. The Assistant responds in JSON output only.
|
111 |
-
|
112 |
-
The User provides text in the format:
|
113 |
-
|
114 |
-
-------Text begin-------
|
115 |
-
<User provided text>
|
116 |
-
-------Text end-------
|
117 |
-
|
118 |
-
The Assistant follows the following steps before replying to the User:
|
119 |
-
|
120 |
-
1. **identify the most important entities** The Assistant identifies the most important entities in the text. These entities are listed in the JSON output under the key "nodes", they follow the structure of a list of dictionaries where each dict is:
|
121 |
-
|
122 |
-
"nodes":[{"id": <entity N>, "type": <type>, "detailed_type": <detailed type>}, ...]
|
123 |
-
|
124 |
-
where "type": <type> is a broad categorization of the entity. "detailed type": <detailed_type> is a very descriptive categorization of the entity.
|
125 |
|
126 |
-
2. **determine relationships** The Assistant uses the text between -------Text begin------- and -------Text end------- to determine the relationships between the entities identified in the "nodes" list defined above. These relationships are called "edges" and they follow the structure of:
|
127 |
-
|
128 |
-
"edges":[{"from": <entity 1>, "to": <entity 2>, "label": <relationship>}, ...]
|
129 |
-
|
130 |
-
The <entity N> must correspond to the "id" of an entity in the "nodes" list.
|
131 |
-
|
132 |
-
The Assistant never repeats the same node twice. The Assistant never repeats the same edge twice.
|
133 |
-
The Assistant responds to the User in JSON only, according to the following JSON schema:
|
134 |
-
|
135 |
-
{"type":"object","properties":{"nodes":{"type":"array","items":{"type":"object","properties":{"id":{"type":"string"},"type":{"type":"string"},"detailed_type":{"type":"string"}},"required":["id","type","detailed_type"],"additionalProperties":false}},"edges":{"type":"array","items":{"type":"object","properties":{"from":{"type":"string"},"to":{"type":"string"},"label":{"type":"string"}},"required":["from","to","label"],"additionalProperties":false}}},"required":["nodes","edges"],"additionalProperties":false}
|
136 |
-
""")
|
137 |
-
|
138 |
user_message = dedent(f"""\n
|
139 |
-------Text begin-------
|
140 |
{text}
|
@@ -168,11 +146,11 @@ class LLMGraph:
|
|
168 |
else:
|
169 |
# Use LightRAG with Azure OpenAI
|
170 |
self.rag.insert(text) # Insert the text into the RAG storage
|
171 |
-
|
172 |
# Wait for GRAPHML_FILE to be created
|
173 |
while not os.path.exists(GRAPHML_FILE):
|
174 |
-
time.sleep(0.1) # Sleep for
|
175 |
-
|
176 |
# Extract dict format of the knowledge graph
|
177 |
G = nx.read_graphml(GRAPHML_FILE)
|
178 |
|
|
|
1 |
import os
|
2 |
import time
|
3 |
+
|
4 |
import numpy as np
|
5 |
import networkx as nx
|
6 |
|
|
|
35 |
"OpenAI/GPT-4.1-mini",
|
36 |
]
|
37 |
|
38 |
+
# Read the system prompt
|
39 |
+
sys_prompt_file = "./data/sys_prompt.txt"
|
40 |
+
with open(sys_prompt_file, 'r', encoding='utf-8') as file:
|
41 |
+
sys_prompt = file.read()
|
42 |
+
|
43 |
class LLMGraph:
|
44 |
"""
|
45 |
A class to interact with LLMs for knowledge graph extraction.
|
|
|
111 |
Construct the message list for the chat model.
|
112 |
"""
|
113 |
|
114 |
+
context = dedent(sys_prompt)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
user_message = dedent(f"""\n
|
117 |
-------Text begin-------
|
118 |
{text}
|
|
|
146 |
else:
|
147 |
# Use LightRAG with Azure OpenAI
|
148 |
self.rag.insert(text) # Insert the text into the RAG storage
|
149 |
+
|
150 |
# Wait for GRAPHML_FILE to be created
|
151 |
while not os.path.exists(GRAPHML_FILE):
|
152 |
+
time.sleep(0.1) # Sleep for 0.1 seconds before checking again
|
153 |
+
|
154 |
# Extract dict format of the knowledge graph
|
155 |
G = nx.read_graphml(GRAPHML_FILE)
|
156 |
|