Prakhar Bhandari
commited on
Commit
·
0beb8e1
1
Parent(s):
b77d203
updated prompt and function for traffic law wiki
Browse files- kg_builder/src/__pycache__/api_connections.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/models.cpython-39.pyc +0 -0
- kg_builder/src/__pycache__/utils.cpython-39.pyc +0 -0
- kg_builder/src/api_connections.py +62 -2
- kg_builder/src/knowledge_graph_builder.py +1 -1
kg_builder/src/__pycache__/api_connections.cpython-39.pyc
CHANGED
Binary files a/kg_builder/src/__pycache__/api_connections.cpython-39.pyc and b/kg_builder/src/__pycache__/api_connections.cpython-39.pyc differ
|
|
kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc
CHANGED
Binary files a/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc and b/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc differ
|
|
kg_builder/src/__pycache__/models.cpython-39.pyc
CHANGED
Binary files a/kg_builder/src/__pycache__/models.cpython-39.pyc and b/kg_builder/src/__pycache__/models.cpython-39.pyc differ
|
|
kg_builder/src/__pycache__/utils.cpython-39.pyc
CHANGED
Binary files a/kg_builder/src/__pycache__/utils.cpython-39.pyc and b/kg_builder/src/__pycache__/utils.cpython-39.pyc differ
|
|
kg_builder/src/api_connections.py
CHANGED
@@ -40,10 +40,70 @@ def get_extraction_chain(
|
|
40 |
):
|
41 |
if category == "Chemotherapy":
|
42 |
# Chemotherapy-specific prompt
|
43 |
-
prompt_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
elif category == "Traffic Law":
|
45 |
# Traffic Law-specific prompt
|
46 |
-
prompt_text = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
else:
|
48 |
raise ValueError("Unknown category")
|
49 |
|
|
|
40 |
):
|
41 |
if category == "Chemotherapy":
|
42 |
# Chemotherapy-specific prompt
|
43 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
44 |
+
## 1. Overview
|
45 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.
|
46 |
+
- **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.
|
47 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.
|
48 |
+
|
49 |
+
## 2. Labeling Nodes
|
50 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
51 |
+
- For instance, consistently label drugs as **"Drug"**, symptoms as **"Symptom"**, and treatments as **"Treatment"**.
|
52 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
53 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else ""}
|
54 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else ""}
|
55 |
+
|
56 |
+
## 3. Handling Numerical Data and Dates
|
57 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
58 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
59 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.
|
60 |
+
|
61 |
+
## 4. Coreference Resolution
|
62 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
63 |
+
- For example, if "Methotrexate" and "MTX" reference the same medication, uniformly apply "Methotrexate" as the node ID.
|
64 |
+
|
65 |
+
## 5. Relationship Naming Conventions
|
66 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
67 |
+
- For instance, use "HAS_SIDE_EFFECT" instead of "HASSIDEEFFECT", use "CAN_RESULT_FROM" instead of "CANRESULTFROM" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
68 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "INHIBITS" or "ACTIVATES" for interactions between substances.
|
69 |
+
|
70 |
+
## 6. Strict Compliance
|
71 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
72 |
+
"""
|
73 |
+
|
74 |
elif category == "Traffic Law":
|
75 |
# Traffic Law-specific prompt
|
76 |
+
prompt_text = f"""# Knowledge Graph Instructions for GPT-4
|
77 |
+
## 1. Overview
|
78 |
+
You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about traffic laws and regulations in the United States.
|
79 |
+
- **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
|
80 |
+
- The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
|
81 |
+
|
82 |
+
## 2. Labeling Nodes
|
83 |
+
- **Consistency**: Utilize uniform labels for node types to maintain clarity.
|
84 |
+
- For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
|
85 |
+
- **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
|
86 |
+
{'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
|
87 |
+
{'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
|
88 |
+
|
89 |
+
## 3. Handling Numerical Data and Dates
|
90 |
+
- Integrate numerical data and dates as attributes of the corresponding nodes.
|
91 |
+
- **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
|
92 |
+
- **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
|
93 |
+
|
94 |
+
## 4. Coreference Resolution
|
95 |
+
- **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
|
96 |
+
- For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
|
97 |
+
|
98 |
+
## 5. Relationship Naming Conventions
|
99 |
+
- **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
|
100 |
+
- For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
|
101 |
+
- **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
|
102 |
+
|
103 |
+
## 6. Strict Compliance
|
104 |
+
Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
|
105 |
+
"""
|
106 |
+
|
107 |
else:
|
108 |
raise ValueError("Unknown category")
|
109 |
|
kg_builder/src/knowledge_graph_builder.py
CHANGED
@@ -28,7 +28,7 @@ def extract_and_store_graph(
|
|
28 |
|
29 |
graph = get_graph_connection(category)
|
30 |
# Extract graph data using OpenAI functions
|
31 |
-
extract_chain = get_extraction_chain(nodes, rels)
|
32 |
data = extract_chain.invoke(document.page_content)['function']
|
33 |
# Construct a graph document
|
34 |
graph_document = GraphDocument(
|
|
|
28 |
|
29 |
graph = get_graph_connection(category)
|
30 |
# Extract graph data using OpenAI functions
|
31 |
+
extract_chain = get_extraction_chain(category, nodes, rels)
|
32 |
data = extract_chain.invoke(document.page_content)['function']
|
33 |
# Construct a graph document
|
34 |
graph_document = GraphDocument(
|