Prakhar Bhandari commited on
Commit
0beb8e1
·
1 Parent(s): b77d203

updated prompt and function for traffic law wiki

Browse files
kg_builder/src/__pycache__/api_connections.cpython-39.pyc CHANGED
Binary files a/kg_builder/src/__pycache__/api_connections.cpython-39.pyc and b/kg_builder/src/__pycache__/api_connections.cpython-39.pyc differ
 
kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc CHANGED
Binary files a/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc and b/kg_builder/src/__pycache__/knowledge_graph_builder.cpython-39.pyc differ
 
kg_builder/src/__pycache__/models.cpython-39.pyc CHANGED
Binary files a/kg_builder/src/__pycache__/models.cpython-39.pyc and b/kg_builder/src/__pycache__/models.cpython-39.pyc differ
 
kg_builder/src/__pycache__/utils.cpython-39.pyc CHANGED
Binary files a/kg_builder/src/__pycache__/utils.cpython-39.pyc and b/kg_builder/src/__pycache__/utils.cpython-39.pyc differ
 
kg_builder/src/api_connections.py CHANGED
@@ -40,10 +40,70 @@ def get_extraction_chain(
40
  ):
41
  if category == "Chemotherapy":
42
  # Chemotherapy-specific prompt
43
- prompt_text = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  elif category == "Traffic Law":
45
  # Traffic Law-specific prompt
46
- prompt_text = "[Traffic Law-specific instructions]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  else:
48
  raise ValueError("Unknown category")
49
 
 
40
  ):
41
  if category == "Chemotherapy":
42
  # Chemotherapy-specific prompt
43
+ prompt_text = f"""# Knowledge Graph Instructions for GPT-4
44
+ ## 1. Overview
45
+ You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about chemotherapy and related cancer treatments.
46
+ - **Nodes** symbolize entities such as medical conditions, drugs, symptoms, treatments, and associated medical concepts.
47
+ - The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for medical practitioners and scholarly research.
48
+
49
+ ## 2. Labeling Nodes
50
+ - **Consistency**: Utilize uniform labels for node types to maintain clarity.
51
+ - For instance, consistently label drugs as **"Drug"**, symptoms as **"Symptom"**, and treatments as **"Treatment"**.
52
+ - **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
53
+ {'- **Allowed Node Labels:**' + ", ".join(['Drug', 'Symptom', 'Treatment', 'MedicalCondition', 'ResearchStudy']) if allowed_nodes else ""}
54
+ {'- **Allowed Relationship Types**:' + ", ".join(['Treats', 'Causes', 'Researches', 'Recommends']) if allowed_rels else ""}
55
+
56
+ ## 3. Handling Numerical Data and Dates
57
+ - Integrate numerical data and dates as attributes of the corresponding nodes.
58
+ - **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
59
+ - **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `approvedYear`, `dosageAmount`.
60
+
61
+ ## 4. Coreference Resolution
62
+ - **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
63
+ - For example, if "Methotrexate" and "MTX" reference the same medication, uniformly apply "Methotrexate" as the node ID.
64
+
65
+ ## 5. Relationship Naming Conventions
66
+ - **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
67
+ - For instance, use "HAS_SIDE_EFFECT" instead of "HASSIDEEFFECT", use "CAN_RESULT_FROM" instead of "CANRESULTFROM" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
68
+ - **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "INHIBITS" or "ACTIVATES" for interactions between substances.
69
+
70
+ ## 6. Strict Compliance
71
+ Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
72
+ """
73
+
74
  elif category == "Traffic Law":
75
  # Traffic Law-specific prompt
76
+ prompt_text = f"""# Knowledge Graph Instructions for GPT-4
77
+ ## 1. Overview
78
+ You are a sophisticated algorithm tailored for parsing Wikipedia pages to construct a knowledge graph about traffic laws and regulations in the United States.
79
+ - **Nodes** symbolize entities such as types of traffic violations, penalties, driving regulations, and relevant legal statutes.
80
+ - The goal is to create a precise and comprehensible knowledge graph, serving as a reliable resource for legal professionals, law enforcement agencies, and the general public.
81
+
82
+ ## 2. Labeling Nodes
83
+ - **Consistency**: Utilize uniform labels for node types to maintain clarity.
84
+ - For instance, consistently label violations as **"Violation"**, penalties as **"Penalty"**, and statutes as **"Statute"**.
85
+ - **Node IDs**: Apply descriptive, legible identifiers for node IDs, sourced directly from the text.
86
+ {'- **Allowed Node Labels:**' + ", ".join(['Violation', 'Penalty', 'Statute', 'VehicleType', 'LegalDocument']) if allowed_nodes else ""}
87
+ {'- **Allowed Relationship Types**:' + ", ".join(['Violates', 'Penalizes', 'Governs', 'Cites']) if allowed_rels else ""}
88
+
89
+ ## 3. Handling Numerical Data and Dates
90
+ - Integrate numerical data and dates as attributes of the corresponding nodes.
91
+ - **No Isolated Nodes for Dates/Numbers**: Directly associate dates and numerical figures as attributes with pertinent nodes.
92
+ - **Property Format**: Follow a straightforward key-value pattern for properties, with keys in camelCase, for example, `fineAmount`, `lawEffectiveDate`.
93
+
94
+ ## 4. Coreference Resolution
95
+ - **Entity Consistency**: Guarantee uniform identification of each entity across the graph.
96
+ - For example, if "Vehicle Code 22350" and "Speed Law" reference the same statute, uniformly apply "Vehicle Code 22350" as the node ID.
97
+
98
+ ## 5. Relationship Naming Conventions
99
+ - **Clarity and Standardization**: Utilize clear and standardized relationship names, preferring uppercase with underscores for readability.
100
+ - For instance, use "IS_PENALIZED_BY" instead of "ISPENALIZEDBY", use "IS_GOVERNED_BY" instead of "ISGOVERNEDBY" etc. You keep making the same mistakes of storing the relationships without the "_" in between the words. Any further similar errors will lead to termination.
101
+ - **Relevance and Specificity**: Choose relationship names that accurately reflect the connection between nodes, such as "REQUIRES" or "PROHIBITS" for legal requirements or prohibitions.
102
+
103
+ ## 6. Strict Compliance
104
+ Rigorous adherence to these instructions is essential. Failure to comply with the specified formatting and labeling norms will necessitate output revision or discard.
105
+ """
106
+
107
  else:
108
  raise ValueError("Unknown category")
109
 
kg_builder/src/knowledge_graph_builder.py CHANGED
@@ -28,7 +28,7 @@ def extract_and_store_graph(
28
 
29
  graph = get_graph_connection(category)
30
  # Extract graph data using OpenAI functions
31
- extract_chain = get_extraction_chain(nodes, rels)
32
  data = extract_chain.invoke(document.page_content)['function']
33
  # Construct a graph document
34
  graph_document = GraphDocument(
 
28
 
29
  graph = get_graph_connection(category)
30
  # Extract graph data using OpenAI functions
31
+ extract_chain = get_extraction_chain(category, nodes, rels)
32
  data = extract_chain.invoke(document.page_content)['function']
33
  # Construct a graph document
34
  graph_document = GraphDocument(