Spaces:
Running
Running
| from typing import List | |
| import os | |
| from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.component_types import ( | |
| Entity, | |
| Relation, | |
| KnowledgeGraph, | |
| ) | |
| from pydantic import BaseModel | |
| from typing import List | |
| class EntityExtractionList(BaseModel): | |
| entities: List[Entity] = [] | |
| class RelationshipExtractionList(BaseModel): | |
| relations: List[Relation] = [] | |
| from agentgraph.graph_generation.knowledge_graph_langsmith.knowledge_graph.agent_base_utils import ( | |
| run_agent, | |
| create_system_prompt, | |
| create_task_prompt, | |
| ) | |
| import json | |
| async def entity_extractor(input_data, context_documents=None) -> EntityExtractionList: | |
| # Define instruction prompts as strings (extracted from task descriptions) | |
| ENTITY_EXTRACTION_INSTRUCTION_PROMPT = f""" | |
| Extract and categorize all entities from the provided agent system information using REFERENCE-BASED EXTRACTION as the primary method. | |
| **CONTEXT DOCUMENTS AVAILABLE:** | |
| The following context documents are available to enhance your understanding: | |
| - {context_documents if context_documents else "None provided."} | |
| **PRIMARY INPUT DATA:** | |
| Here is the main trace you are analyzing: | |
| - {input_data} | |
| **CRITICAL: REFERENCE-ONLY EXTRACTION** | |
| - You MUST leave the `raw_prompt` field as an empty string "" for ALL entities | |
| - You MUST ONLY populate the `raw_prompt_ref` field with location references | |
| - DO NOT extract or include the actual prompt content - only identify WHERE it is located | |
| - The actual content will be extracted later by other functions using your references | |
| **CONTEXT-ENHANCED EXTRACTION:** | |
| Use the provided context documents to: | |
| 1. Better understand domain-specific terminology and concepts | |
| 2. Identify entities that might be domain-specific or technical | |
| 3. Recognize patterns and relationships specific to the business domain | |
| 4. Apply any provided schemas or guidelines for entity categorization | |
| 5. Reference examples to understand expected entity types and formats | |
| **PROMPT DEFINITION** | |
| A *prompt* is the exact text that will be injected into an LLM and which establishes the behaviour or definition of an entity (system / instruction / specification) or of a relation (interaction excerpt, format specification, etc.). | |
| **CRITICAL MULTI-OCCURRENCE REQUIREMENT (read carefully)** | |
| - The trace you receive is already numbered with `<L#>` markers. | |
| - For EVERY distinct prompt you MUST enumerate *all* **contiguous occurrences** of that prompt text in the numbered trace. | |
| - Represent each occurrence with exactly one `ContentReference` object whose `line_start` is the first `<L#>` line of the block and whose `line_end` is the last `<L#>` line of that same uninterrupted block (indented continuation-lines included). | |
| - The `raw_prompt_ref` list length **must therefore equal** the number of separate occurrences (not the number of lines). Missing even **one** occurrence will fail validation. | |
| - Overlap between the references of different entities is acceptable when prompts are truly shared. | |
| - Tool definitions that begin with `@tool` ARE ALSO PROMPTS. Treat them exactly like other prompts: leave `raw_prompt` blank and add one `ContentReference` per occurrence. | |
| Example (prompt appears twice across two blocks): | |
| ```json | |
| {{ | |
| "id": "agent_001", | |
| "type": "Agent", | |
| "name": "Time Tracker Agent", | |
| "raw_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "raw_prompt_ref": [ | |
| {{"line_start": 3, "line_end": 3}}, | |
| {{"line_start": 9, "line_end": 9}} | |
| ] | |
| }} | |
| ``` | |
| Tool-definition example (single occurrence with verification): | |
| ```json | |
| {{ | |
| "id": "tool_001", | |
| "type": "Tool", | |
| "name": "zip_compress", | |
| "raw_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "raw_prompt_ref": [ | |
| {{"line_start": 15, "line_end": 15}} | |
| ] | |
| }} | |
| ``` | |
| Verification process used: | |
| - Located anchor text "@tool" and "zip_compress" in the input | |
| - Counted from <L1> to find the exact <L15> marker | |
| - Verified <L15> contains the complete tool definition | |
| - **CRITICAL: raw_prompt left empty as required** | |
| CORE PRINCIPLE: Each entity is defined by its DISTINCT PROMPT LOCATION, not by extracting the actual content. | |
| This approach ensures: | |
| - More robust and stable knowledge graphs across multiple traces | |
| - Better entity distinction and relationship mapping | |
| - Separation of reference identification from content extraction | |
| - Reduced risk of content hallucination in entity extraction | |
| Focus on identifying distinct prompt locations that define each entity type, as prompt references are the most reliable distinguishing factor for stable knowledge graphs. | |
| CRITICAL ID FORMAT REQUIREMENT: Generate entity IDs using ONLY the format TYPE_SEQUENTIAL_NUMBER starting from 001. | |
| Examples: "agent_001", "task_001", "human_001", "tool_001", "input_001", "output_001" | |
| NEVER use names, emails, descriptions, or content as entity IDs. | |
| INDENTATION RULE FOR CHUNKED LINES: | |
| - When a single line from the original input is too long, it will be chunked into multiple lines. | |
| - The first chunk will appear at the normal indentation level. | |
| - All subsequent chunks of that same original line will be INDENTED with two spaces. | |
| - This indentation is a visual cue that the indented lines are continuations of the preceding non-indented line. | |
| LINE COUNTING METHODOLOGY (CRITICAL FOR ACCURACY): | |
| Follow this systematic approach to avoid counting errors: | |
| STEP 1 - CONTENT IDENTIFICATION: | |
| - First, identify the exact content you need to reference | |
| - Note distinctive words or phrases that will serve as anchors | |
| - Determine if the content spans single or multiple lines | |
| STEP 2 - ANCHOR-BASED POSITIONING: | |
| - Find a unique phrase or pattern near the target content | |
| - Search for that anchor text in the numbered input | |
| - Use the anchor to locate the general area, then count precisely | |
| STEP 3 - SYSTEMATIC LINE COUNTING: | |
| - Count <L#> markers sequentially from a known reference point | |
| - Do NOT skip or approximate - count every single <L#> marker | |
| - Pay attention to indented continuation lines (they have their own <L#>) | |
| - For long content, count in chunks and verify totals | |
| STEP 4 - VERIFICATION: | |
| - Double-check by counting backwards from a different reference point | |
| - Verify the line_start contains the beginning of your target content | |
| - Verify the line_end contains the end of your target content | |
| - Ensure line_end >= line_start | |
| COMMON COUNTING ERRORS TO AVOID: | |
| - Skipping indented continuation lines that have <L#> markers | |
| - Miscounting when jumping between distant parts of the input | |
| - Confusing similar content in different locations | |
| - Using approximate positions instead of exact <L#> marker counts | |
| CONTENT REFERENCE INSTRUCTIONS: | |
| - For each distinct prompt (regardless of length), you must find **ALL** occurrences in the input trace. | |
| - The `raw_prompt_ref` field for an entity must be a **LIST** of `ContentReference` objects, one for each location where that prompt appears. | |
| - Each `ContentReference` object should contain the `line_start` and `line_end` for that specific occurrence. | |
| - **CRITICAL: You MUST NOT omit any occurrence. Be COMPREHENSIVE, not conservative. It's better to include more references than to miss any.** | |
| - **For function-based tools: Include EVERY line where the function name appears (imports, calls, error messages, etc.)** | |
| - **For agents: Include EVERY message or mention of the agent name** | |
| - **For tasks: Include EVERY reference to the task or its components** | |
| ACCURACY VERIFICATION CHECKLIST (complete before submitting): | |
| ✓ I have identified unique anchor text near each content location | |
| ✓ I have counted <L#> markers systematically, not approximately | |
| ✓ I have verified line_start contains the actual content beginning | |
| ✓ I have verified line_end contains the actual content ending | |
| ✓ I have double-checked my counting using a different reference point | |
| Example (duplicate system prompt with verification): | |
| ```json | |
| {{ | |
| "id": "agent_001", | |
| "type": "Agent", | |
| "name": "Time Tracker Agent", | |
| "raw_prompt": "", // left blank per guidelines | |
| "raw_prompt_ref": [ | |
| {{"line_start": 3, "line_end": 5}}, | |
| {{"line_start": 20, "line_end": 22}} | |
| ] | |
| }} | |
| ``` | |
| In this example: | |
| - The same system prompt occurs twice and both locations are captured | |
| - Anchor text "Time Tracker Agent" was used to locate both occurrences | |
| - Line counting was verified by counting from <L1> to each location | |
| - Each occurrence was double-checked by counting backwards from <L25> | |
| PROMPT-BASED ENTITY EXTRACTION RULES: | |
| 1. Agents (System Prompt Entities) | |
| - Each DISTINCT system prompt defines a separate Agent entity | |
| - Extract complete system prompts that define agent roles, capabilities, and behaviors | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the actual system prompt via one or more `raw_prompt_ref` entries. | |
| - Name should reflect the agent's role as defined in the system prompt | |
| - Multiple agents with identical system prompts = single entity | |
| 2. Tasks (Instruction Prompt Entities) | |
| - Each DISTINCT instruction prompt defines a separate Task entity | |
| - Extract complete instruction prompts that define task objectives and requirements | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the full instruction prompt via `raw_prompt_ref`. | |
| - Name should reflect the task objective as defined in the instruction prompt | |
| - Multiple tasks with identical instruction prompts = single entity | |
| 3. Tools (Description Prompt Entities) | |
| - Each DISTINCT tool description/specification defines a separate Tool entity | |
| - Extract complete tool descriptions including function signatures, parameters, and purpose | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the full tool description/specification via `raw_prompt_ref`. | |
| - Name should reflect the tool's function as defined in the description prompt | |
| 4. Inputs (Input Format Prompt Entities) | |
| - Each DISTINCT input data format specification defines a separate Input entity | |
| - Extract format specifications, schema definitions, or data structure descriptions | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the full input format specification via `raw_prompt_ref`. | |
| - Name should reflect the input data type as defined in the format specification | |
| - Focus on data format prompts, not individual data values | |
| - Examples: Database schema definitions, API request formats, file structure specifications | |
| 5. Outputs (Output Format Prompt Entities) | |
| - Each DISTINCT output format specification defines a separate Output entity | |
| - Extract format specifications for generated results, reports, or responses | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the full output format specification via `raw_prompt_ref`. | |
| - Name should reflect the output type as defined in the format specification | |
| - Focus on output format prompts, not individual output values | |
| - Examples: Report templates, response formats, file output specifications | |
| 6. Humans (Optional Prompt Entities) | |
| - Each DISTINCT human interaction pattern defines a separate Human entity | |
| - Extract interaction prompts that define human roles, feedback patterns, or intervention methods | |
| - raw_prompt MUST be an empty string "" (leave blank). Provide the full interaction specification via `raw_prompt_ref`. | |
| - Name should reflect the human role as defined in the interaction prompt (e.g., "Business Analyst", "Data Scientist") | |
| - ID must follow format: "human_001", "human_002", etc. (NEVER use email addresses or actual names as IDs) | |
| - Only create if there are explicit human interaction prompts or feedback specifications | |
| - IMPORTANT: If you find email addresses like "skandha.tandra@unilever.com", put them in the name field, but use "human_001" as the ID | |
| PROMPT-BASED ASSIGNMENT REQUIREMENTS: | |
| - Assign unique IDs to all entities based on PROMPT UNIQUENESS, not names or descriptions | |
| - Entities with IDENTICAL prompts = SINGLE entity (even if names differ) | |
| - Entities with DIFFERENT prompts = SEPARATE entities (even if names are similar) | |
| - Use only these entity types: "Agent", "Task", "Tool", "Input", "Output", "Human" | |
| - Focus on extracting COMPLETE prompt REFERENCES that define each entity's behavior/specification | |
| - Names should be derived from prompt content understanding, not abstract classifications | |
| - **CRITICAL: The raw_prompt field MUST ALWAYS BE EMPTY - only raw_prompt_ref should be populated** | |
| ENTITY ID GENERATION RULES (MANDATORY FORMAT): | |
| - Use ONLY this format: TYPE_SEQUENTIAL_NUMBER (e.g., "agent_001", "task_001", "tool_001") | |
| - Sequential numbering starts from 001 for each entity type | |
| - NEVER use actual names, emails, or content as IDs | |
| - Examples of CORRECT IDs: | |
| * Agent entities: "agent_001", "agent_002", "agent_003" | |
| * Task entities: "task_001", "task_002", "task_003" | |
| * Tool entities: "tool_001", "tool_002", "tool_003" | |
| * Input entities: "input_001", "input_002", "input_003" | |
| * Output entities: "output_001", "output_002", "output_003" | |
| * Human entities: "human_001", "human_002", "human_003" | |
| - Examples of INCORRECT IDs: | |
| * "skandha.tandra@unilever.com" (email address) | |
| * "SQL Query Generator" (entity name) | |
| * "Generate Spend Analysis Task" (entity description) | |
| - CRITICAL: The relationship analyzer will use these exact ID values to create connections | |
| **REFERENCE-ONLY EXTRACTION REQUIREMENTS:** | |
| - **raw_prompt field**: MUST be empty string "" for ALL entities | |
| - **raw_prompt_ref field**: MUST contain location references to where the prompt content appears | |
| - **DO NOT extract actual content**: Your job is to identify locations, not extract text | |
| - **Content will be extracted later**: Other functions will use your references to get actual content | |
| Raw Prompt Reference Extraction (Identify locations of actual runtime prompts from agent system traces): | |
| Identify the LOCATIONS of ACTUAL prompts, instructions, and configurations that were used during system execution. | |
| Focus on finding the real runtime context locations, not generic descriptions. | |
| AGENT ENTITIES - Extract complete agent definitions: | |
| Look for agent framework patterns (CrewAI, LangChain, AutoGen, etc.) and extract: | |
| - Complete role definitions: "role='Entity Extractor'" or "You are an Entity Extractor" | |
| - Goal statements: "goal='Identify and categorize entities'" | |
| - Backstory/context: Full backstory or system context provided to the agent | |
| - System prompts: Any "system:" messages or agent initialization prompts | |
| - Agent configurations: Model settings, temperature, max_tokens if present | |
| CONVERSATIONAL AGENT DETECTION (CRITICAL FOR MULTI-AGENT TRACES): | |
| In addition to explicit system prompts, also identify agents from conversational patterns: | |
| 1. AGENT NAME PATTERNS: | |
| - Look for consistent agent names that appear as message senders (e.g., "ProblemSolving_Expert", "Verification_Expert") | |
| - Agent names often contain role indicators: "_Expert", "_Agent", "_Assistant", "_Bot", "_terminal" | |
| - Names with specialized domains: "ArithmeticProgressions_Expert", "Computer_terminal", "SQL_Agent" | |
| 2. CONVERSATIONAL AGENT INDICATORS: | |
| - Messages from the same named entity across multiple interactions | |
| - Specialized responses showing domain expertise (e.g., mathematical calculations, code execution, verification) | |
| - Agent-to-agent communication patterns (addressing other agents by name) | |
| - Consistent role behavior (e.g., always providing verification, always executing code) | |
| 3. AGENT IDENTIFICATION STRATEGY: | |
| - Create ONE Agent entity per UNIQUE agent name that appears in conversations | |
| - Use the agent's first substantial message as the raw_prompt_ref (their introduction or first meaningful contribution) | |
| - If no explicit system prompt exists, use their first message that demonstrates their role/capabilities | |
| - Name the entity based on their apparent role and domain expertise | |
| 4. EXAMPLES OF CONVERSATIONAL AGENTS: | |
| - "ProblemSolving_Expert" → Agent entity for problem-solving expertise | |
| - "Verification_Expert" → Agent entity for verification and validation | |
| - "Computer_terminal" → Agent entity for code execution and system interaction | |
| - "ArithmeticProgressions_Expert" → Agent entity for mathematical calculations | |
| - "SQL_Agent" → Agent entity for database operations | |
| 5. AGENT ENTITY CREATION RULES FOR CONVERSATIONS: | |
| - Each unique agent name = separate Agent entity | |
| - **COMPREHENSIVE CONTENT REFERENCES: Include ALL messages from this agent, not just the first one** | |
| - Include their introduction message, substantial contributions, and even status updates | |
| - Be exhaustive: every line where the agent name appears or where they send a message | |
| - Name should reflect their role: "ProblemSolving_Expert system prompt" → "Problem Solving Expert" | |
| - Description should summarize their demonstrated capabilities in the conversation | |
| TASK ENTITIES - Extract specific task instructions: | |
| Look for actual task definitions and instructions: | |
| - Task descriptions: Complete task objectives and requirements | |
| - Input parameters: Specific data, queries, or context provided to the task | |
| - Expected outputs: Defined output formats or requirements | |
| - Task constraints: Limitations, rules, or guidelines | |
| - Execution context: Timing, dependencies, or environmental factors | |
| TOOL ENTITIES - CRITICAL: Extract ALL tools, especially function-based tools: | |
| **MANDATORY DETECTION PATTERNS:** | |
| 1. Function imports: "from functions import perform_web_search" → Extract "perform_web_search" as Tool | |
| 2. Function calls: "perform_web_search(query, count=20)" → Extract "perform_web_search" as Tool | |
| 3. Function usage: "results = perform_web_search(...)" → Extract "perform_web_search" as Tool | |
| 4. Error mentions: "perform_web_search returned None" → Extract "perform_web_search" as Tool | |
| **EXTRACTION REQUIREMENTS:** | |
| - If you see "perform_web_search" ANYWHERE in the trace, you MUST extract it as a Tool entity | |
| - If you see "from functions import [function_name]", extract [function_name] as Tool | |
| - If you see "[function_name](" pattern, extract [function_name] as Tool | |
| - Count usage frequency across all agents | |
| - Determine importance based on usage frequency and failure impact | |
| **COMPREHENSIVE CONTENT REFERENCE REQUIREMENTS FOR TOOLS:** | |
| - Include EVERY line where the tool name appears (be exhaustive, not selective) | |
| - Include import statements: "from functions import perform_web_search" | |
| - Include function calls: "perform_web_search(query, count=20)" | |
| - Include variable assignments: "results = perform_web_search(...)" | |
| - Include error messages: "perform_web_search returned None" | |
| - Include conditional statements: "if perform_web_search(query) is None" | |
| - Include comments or documentation mentioning the tool | |
| - Include any line containing the exact tool name, regardless of context | |
| **TOOL ENTITY FIELDS:** | |
| - name: The exact function name (e.g., "perform_web_search") | |
| - description: Purpose inferred from usage context and parameters | |
| - importance: HIGH if used by multiple agents or causes failures, MEDIUM if used frequently, LOW if used rarely | |
| **DECORATOR-BASED TOOLS (@tool):** | |
| - Tool signatures: Function names, parameters, return types | |
| - Tool descriptions: Purpose and functionality explanations | |
| - Usage examples: How the tool is called with specific parameters | |
| - Tool configurations: Settings, API keys, endpoints (sanitized) | |
| - Error handling: Retry logic, fallback mechanisms | |
| HUMAN ENTITIES - Extract user interactions and feedback: | |
| Capture complete human interactions: | |
| - Original user queries: Full questions or requests | |
| - Feedback statements: Corrections, approvals, or rejections | |
| - Intervention commands: Direct instructions or overrides | |
| - Context provided: Background information or clarifications | |
| - Interaction timing: When feedback was provided | |
| INPUT/OUTPUT ENTITIES - Extract data specifications: | |
| For data entities, capture: | |
| - Data schemas: Column names, types, constraints | |
| - Query specifications: SQL queries, filters, conditions | |
| - File formats: JSON structures, CSV headers, data types | |
| - Business rules: Logic, calculations, or transformations | |
| - Data sources: Database names, table names, API endpoints | |
| EXTRACTION PATTERNS TO LOOK FOR: | |
| 1. Agent Framework Patterns: | |
| - CrewAI: "Agent(role=..., goal=..., backstory=...)" | |
| - LangChain: "SystemMessage(content=...)" | |
| - AutoGen: "ConversableAgent(name=..., system_message=...)" | |
| 1b. Conversational Agent Patterns: | |
| - Named message senders: "ProblemSolving_Expert (assistant): [message content]" | |
| - Agent role indicators: "Verification_Expert", "Computer_terminal", "ArithmeticProgressions_Expert" | |
| - Multi-agent conversations: agents addressing each other by name | |
| - Specialized responses: mathematical calculations, code execution, domain expertise | |
| - Agent introductions: "You are given: (1) a task..." or "To solve the task..." | |
| 2. Task Patterns: | |
| - "Task(description=..., expected_output=...)" | |
| - "Please [action] with [parameters]" | |
| - "Your task is to [objective]" | |
| 3. Tool Patterns: | |
| - "@tool" decorators with function definitions | |
| - "Action: [tool_name]" with "Action Input: [parameters]" | |
| - API calls with endpoints and parameters | |
| - Function imports: "from [module] import [function_name]" | |
| - Function calls: "[function_name]([parameters])" with multiple usage instances | |
| - Module function calls: "[module].[function_name]([parameters])" | |
| - Utility functions used across multiple agents or contexts | |
| 4. Human Interaction Patterns: | |
| - Direct user messages or queries | |
| - Feedback like "That's not correct, try again" | |
| - Approvals like "Yes, proceed with this approach" | |
| FORMATTING REQUIREMENTS: | |
| - Preserve original formatting, indentation, and structure when possible | |
| - Use triple quotes for multi-line prompts | |
| - Include parameter names and types for tools | |
| - Maintain JSON/YAML structure for configurations | |
| - Sanitize sensitive information (API keys, passwords) but keep structure | |
| Examples (showing actual runtime extraction): | |
| ``` | |
| # Agent prompt example (CrewAI) | |
| Agent( | |
| role='SQL Query Generator', | |
| goal='Generate accurate Databricks SQL queries based on business requirements', | |
| backstory='You are an expert SQL developer specializing in Databricks SQL Warehouse. You understand complex business logic and can translate natural language requirements into efficient SQL queries.', | |
| llm='gpt-5-mini' | |
| ) | |
| ``` | |
| ``` | |
| # Task prompt example | |
| Task( | |
| description='Generate a SQL query to compare spend and supplier count for fatty alcohol purchases between 2023 and 2024. Include filters for plant exclusions and intercompany indicators.', | |
| expected_output='A complete SQL query with proper joins, filters, and aggregations that can be executed in Databricks SQL Warehouse' | |
| ) | |
| ``` | |
| ``` | |
| # Tool prompt example (@tool decorator) | |
| @tool | |
| def databricks_sql_executor(query: str, warehouse_id: str) -> dict: | |
| \"\"\"Execute SQL queries in Databricks SQL Warehouse | |
| Args: | |
| query: SQL query string to execute | |
| warehouse_id: Databricks warehouse identifier | |
| Returns: | |
| Dictionary with query results and metadata | |
| \"\"\" | |
| ``` | |
| ``` | |
| # COMPREHENSIVE TOOL EXTRACTION EXAMPLE | |
| # ALL these lines should be included in raw_prompt_ref for "perform_web_search": | |
| # Line 45: from functions import perform_web_search | |
| # Line 67: results = perform_web_search(query="machine learning trends", count=20) | |
| # Line 89: search_results = perform_web_search(query="AI applications", count=15) | |
| # Line 102: if perform_web_search(query) is None: | |
| # Line 156: logger.error("perform_web_search returned None") | |
| # Line 203: # Using perform_web_search for data retrieval | |
| # Line 234: except Exception as e: # perform_web_search failed | |
| # RESULT: Extract ALL 7 occurrences as ContentReference objects | |
| {{ | |
| "id": "tool_001", | |
| "type": "Tool", | |
| "name": "perform_web_search", | |
| "raw_prompt_ref": [ | |
| {{"line_start": 45, "line_end": 45}}, # import statement | |
| {{"line_start": 67, "line_end": 67}}, # first function call | |
| {{"line_start": 89, "line_end": 89}}, # second function call | |
| {{"line_start": 102, "line_end": 102}}, # conditional check | |
| {{"line_start": 156, "line_end": 156}}, # error message | |
| {{"line_start": 203, "line_end": 203}}, # comment mention | |
| {{"line_start": 234, "line_end": 234}} # exception comment | |
| ] | |
| }} | |
| ``` | |
| ``` | |
| # Human prompt example | |
| Can you compare the spend and SupplierName count on PurchaseCommodityName fatty alcohol for 2023 and 2024 and share insights? I need this for the quarterly business review. | |
| ``` | |
| IMPORTANCE ASSESSMENT REQUIREMENTS: | |
| For each entity, you MUST assign an importance level based on its role in the system: | |
| HIGH IMPORTANCE: | |
| - Core agents that coordinate or manage other agents | |
| - Critical tasks that are essential for system function or user goals | |
| - Essential tools that multiple agents depend on (e.g., perform_web_search used by multiple agents) | |
| - Function-based tools with frequent usage across the workflow | |
| - Primary inputs that drive the entire workflow | |
| - Final outputs that represent the main system deliverables | |
| - Key human stakeholders who make critical decisions | |
| MEDIUM IMPORTANCE: | |
| - Supporting agents with specialized but non-critical functions | |
| - Standard operational tasks that support the main workflow | |
| - Commonly used tools that enhance functionality (e.g., utility functions used occasionally) | |
| - Function-based tools with moderate usage frequency | |
| - Secondary inputs that provide additional context | |
| - Intermediate outputs that feed into other processes | |
| - Regular human users who provide routine input | |
| LOW IMPORTANCE: | |
| - Auxiliary agents with very specific or rare functions | |
| - Simple tasks with minimal impact on overall system success | |
| - Rarely used tools or utilities (e.g., debugging functions used once) | |
| - Function-based tools with single or infrequent usage | |
| - Optional inputs that provide minor enhancements | |
| - Diagnostic or logging outputs | |
| - Occasional human observers or reviewers | |
| ASSESSMENT GUIDELINES: | |
| - Consider the entity's centrality in the workflow | |
| - Evaluate how many other entities depend on this one | |
| - Assess the impact if this entity failed or was removed | |
| - Look at frequency and criticality of usage patterns | |
| - Consider whether the entity is replaceable or unique | |
| - For function-based tools: Count usage frequency and cross-agent dependencies | |
| """ | |
| entity_extractor_agent_info = { | |
| "role": "Entity Extractor", | |
| "goal": "Extract all entities with proper types, importance levels, and raw prompts from agent trace data", | |
| "backstory": """You specialize in identifying entities within various data sources. You can recognize agent names, | |
| tools, tasks, and other important elements in logs, documentation, model cards, or natural language descriptions. | |
| You're particularly skilled at extracting model information, parameters, and performance metrics when available. | |
| You create concise, informative one-sentence descriptions for every entity you identify, capturing its core purpose | |
| or function in a way that helps others understand its role in the system. | |
| Your expertise helps create comprehensive knowledge graphs by ensuring all relevant entities are properly identified, | |
| categorized, and described. You focus on detail and ensure nothing important is missed, regardless of the format of the input data.""", | |
| "task_expected_output": "A list of properly structured entities with types, importance levels, and prompts", | |
| } | |
| system_prompt = create_system_prompt( | |
| role=entity_extractor_agent_info["role"], | |
| backstory=entity_extractor_agent_info["backstory"], | |
| goal=entity_extractor_agent_info["goal"], | |
| ) | |
| task_description = create_task_prompt( | |
| task_description=ENTITY_EXTRACTION_INSTRUCTION_PROMPT, | |
| task_expected_output=entity_extractor_agent_info["task_expected_output"], | |
| response_format=EntityExtractionList, | |
| previous_step_output=None, | |
| ) | |
| return await run_agent( | |
| model_name=os.environ["OPENAI_MODEL_NAME"], | |
| system_prompt=system_prompt, | |
| task_description=task_description, | |
| response_format=EntityExtractionList, | |
| ) | |
| async def relationship_analyzer(input_data, previous_step_output, context_documents="") -> RelationshipExtractionList: | |
| RELATION_EXTRACTION_INSTRUCTION_PROMPT = f"""Map all relationships between system entities (identified in the previous step from this window) using ONLY the predefined relationship types. | |
| Your analysis should focus on interactions described *within this specific window* of a larger chronological trace. | |
| **CONTEXT DOCUMENTS AVAILABLE:** | |
| The following context documents are available to enhance your understanding: | |
| - {context_documents} | |
| **PRIMARY INPUT DATA:** | |
| Here is the main trace you are analyzing: | |
| - {input_data} | |
| MANDATORY: You MUST reference the exact entity list from the previous step with their IDs. | |
| Every entity will have an ID in the format: TYPE_NUMBER (e.g., "agent_001", "human_001", "task_001") | |
| You can ONLY use these exact IDs in your relationship source and target fields. | |
| **CONTEXT-ENHANCED RELATIONSHIP ANALYSIS:** | |
| Use the provided context documents to: | |
| 1. Better understand domain-specific workflows and processes | |
| 2. Identify standard relationship patterns in the business domain | |
| 3. Apply any provided guidelines for relationship categorization | |
| 4. Reference examples to understand expected relationship types | |
| 5. Recognize technical dependencies and data flows specific to the domain | |
| Identify these 10 relationship types: | |
| 1. CONSUMED_BY: Input is processed by Agent | |
| 2. PERFORMS: Agent executes Task (focus on actual execution) | |
| 3. ASSIGNED_TO: Task delegated to Agent (focus on responsibility) | |
| 4. USES: Agent utilizes Tool | |
| 5. REQUIRED_BY: Tool is needed by Task | |
| 6. SUBTASK_OF: Task is component of parent Task | |
| 7. NEXT: Task follows another Task sequentially | |
| 8. PRODUCES: Task generates Output | |
| 9. DELIVERS_TO: Output is delivered to Human | |
| 10. INTERVENES: Agent/Human corrects Task | |
| Critical distinctions: | |
| - CONSUMED_BY: Input→Agent = data processing | |
| - PERFORMS: Agent→Task = actual execution | |
| - ASSIGNED_TO: Task→Agent = responsibility assignment | |
| - DELIVERS_TO: Output→Human = final delivery | |
| - INTERVENES: Agent/Human→Task = active correction/override | |
| RELATIONSHIP EXTRACTION GUIDELINES: | |
| When identifying relationships, be careful to ONLY map connections between actual entities: | |
| 1. DO NOT create these relationships: | |
| - Between framework containers (e.g., "Crew", "Pipeline") and other entities | |
| - Using execution IDs or session identifiers as entities | |
| - Between status indicators and actual entities | |
| - Between log formatting elements and actual entities | |
| 2. DO create relationships between: | |
| - Actual named agents (e.g., "Organizer", "Thinker") and their tasks | |
| - Agents and the specific tools they use | |
| - Tasks and the tools they require | |
| - Tasks that have sequential or hierarchical dependencies | |
| - Entities and the actual inputs/outputs they consume/produce | |
| - Human participants and the entities they review/modify | |
| 3. For agent frameworks: | |
| - The framework container (e.g., "Crew", "Pipeline") is NOT an entity and should NOT have relationships | |
| - Task IDs should be replaced with actual task names/descriptions in relationships | |
| - Focus on the meaningful operational relationships, not the framework structure | |
| EXAMPLE: | |
| In a log entry like: | |
| "🚀 Crew: crew | |
| └── 📋 Task: abc-123 (Generate creative text) | |
| Status: Executing Task... | |
| └── 🤖 Agent: Researcher | |
| Status: In Progress" | |
| CORRECT relationship (if "Generate creative text" is an identified Task entity and "Researcher" an Agent entity): | |
| - "Researcher PERFORMS Generate creative text" | |
| INCORRECT relationships: | |
| - "crew PERFORMS abc-123" (framework container to task ID, unless 'crew' is a defined entity and interacts) | |
| - "Researcher PERFORMS abc-123" (using task ID instead of description from entity list) | |
| For each relationship: | |
| - CRITICAL: Use the exact entity.id field values (NOT entity.name) for source and target fields | |
| - Source field must contain the exact ID of an entity from the extracted entities list | |
| - Target field must contain the exact ID of an entity from the extracted entities list | |
| - Clearly define the relationship type and its directionality (source → relationship → target) | |
| - Populate interaction_prompt according to the prompt-based requirements above | |
| - VALIDATION: Every source and target ID MUST correspond to an existing entity.id in the entities list | |
| INTERACTION-BASED interaction_prompt content requirements: | |
| - For CONSUMED_BY: Extract the ACTUAL DATA CONSUMPTION MESSAGE/LOG showing how the agent processed the input data | |
| - For PERFORMS: Extract the ACTUAL EXECUTION MESSAGE/LOG showing the agent starting or executing the task | |
| - For ASSIGNED_TO: Extract the ACTUAL ASSIGNMENT MESSAGE/LOG showing the task being delegated to the agent | |
| - For USES: Extract the ACTUAL TOOL USAGE MESSAGE/LOG showing the agent calling or using the tool | |
| - For REQUIRED_BY: Extract the ACTUAL REQUIREMENT MESSAGE/LOG showing the task needing or requesting the tool | |
| - For SUBTASK_OF: Extract the ACTUAL HIERARCHICAL MESSAGE/LOG showing the parent-child task relationship | |
| - For NEXT: Extract the ACTUAL SEQUENCE MESSAGE/LOG showing one task following another | |
| - For PRODUCES: Extract the ACTUAL OUTPUT GENERATION MESSAGE/LOG showing the task creating the output | |
| - For DELIVERS_TO: Extract the ACTUAL DELIVERY MESSAGE/LOG showing the output being sent to the human | |
| - For INTERVENES: Extract the ACTUAL INTERVENTION MESSAGE/LOG showing the human/agent correcting the task | |
| **CRITICAL: REFERENCE-ONLY INTERACTION EXTRACTION** | |
| - You MUST leave the `interaction_prompt` field as an empty string "" for ALL relationships | |
| - You MUST ONLY populate the `interaction_prompt_ref` field with location references to runtime interaction evidence | |
| - DO NOT extract or include the actual interaction content - only identify WHERE it is located | |
| - The actual interaction content will be extracted later by other functions using your references | |
| - When you find interaction evidence you MUST enumerate every **contiguous occurrence** of that interaction text in the numbered trace and include one `ContentReference` object per occurrence in the `interaction_prompt_ref` list | |
| - interaction_prompt_ref points to WHERE in the trace this specific interaction occurred (not static definitions) | |
| - If no explicit interaction evidence exists in the trace, set interaction_prompt="" and interaction_prompt_ref=[] | |
| Example with reference-only interaction: | |
| ```json | |
| {{ | |
| "type": "USES", | |
| "source": "agent_001", | |
| "target": "tool_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{ "line_start": 120, "line_end": 120 }}, | |
| {{ "line_start": 250, "line_end": 250 }} | |
| ] | |
| }} | |
| ``` | |
| Entity type constraints (STRICT): | |
| - CONSUMED_BY: Input→Agent | |
| - PERFORMS: Agent→Task | |
| - ASSIGNED_TO: Task→Agent | |
| - USES: Agent→Tool | |
| - REQUIRED_BY: Tool→Task | |
| - SUBTASK_OF: Task→Task | |
| - NEXT: Task→Task | |
| - PRODUCES: Task→Output (only Task can produce Output) | |
| - DELIVERS_TO: Output→Human | |
| - INTERVENES: Agent/Human→Task (either Agent or Human can intervene in tasks) | |
| Data flow analysis: | |
| - For CONSUMED_BY: Track explicit and implicit inputs, consumption patterns by agents | |
| - For PRODUCES: Track artifacts, intermediate and final outputs from tasks | |
| - For DELIVERS_TO: Track final delivery of outputs to humans | |
| - Identify data transformations and potential failure points | |
| CRITICAL ID MATCHING REQUIREMENT: | |
| - Use ONLY the exact entity.id values in source and target fields | |
| - DO NOT use entity.name values in source/target fields | |
| - Every relationship source/target must reference an existing entity.id | |
| - Example: If entity has id="agent_001" and name="SQL Query Generator", use "agent_001" in relationships | |
| - VALIDATION: Check that every source and target ID exists in the entities list before creating the relationship | |
| Connection requirements: | |
| Every entity MUST connect to at least one other entity. For disconnected entities: | |
| - Agents: Create PERFORMS, CONSUMED_BY, or logical connection based on role | |
| - Tasks: Must have PERFORMS or ASSIGNED_TO, and typically PRODUCES | |
| - Tools: Must have USES or REQUIRED_BY | |
| - Inputs: Must be connected via CONSUMED_BY to at least one agent | |
| - Outputs: Must be produced by at least one task via PRODUCES, and may be delivered via DELIVERS_TO | |
| - Humans: Connect via DELIVERS_TO or INTERVENES | |
| If no obvious connection exists, create a logical CONSUMED_BY or PRODUCES relationship at minimum. | |
| Interaction Prompt Extraction (Capture actual runtime interaction details): | |
| Extract SPECIFIC interaction details that show HOW entities actually interacted during execution. | |
| Focus on real execution context, timing, parameters, and outcomes. | |
| PERFORMS Relationships (Agent→Task): | |
| Extract the actual execution details: | |
| - Task assignment: "Agent X assigned to execute Task Y at timestamp Z" | |
| - Execution parameters: Specific inputs, configurations, constraints provided | |
| - Execution context: Environmental conditions, dependencies, prerequisites | |
| - Progress indicators: Status updates, intermediate results, completion signals | |
| - Performance metrics: Timing, resource usage, success/failure indicators | |
| USES Relationships (Agent→Tool): | |
| Extract specific tool usage details: | |
| - Tool invocation: Exact tool calls with parameters and context | |
| - Usage purpose: Why the tool was needed at this specific moment | |
| - Input/output: Specific data passed to tool and results received | |
| - Usage patterns: Frequency, timing, conditional usage | |
| - Error handling: Retry attempts, fallback mechanisms, error recovery | |
| ASSIGNED_TO Relationships (Task→Agent): | |
| Extract delegation and assignment details: | |
| - Assignment reason: Why this specific agent was chosen for this task | |
| - Delegation context: Who assigned, when, under what conditions | |
| - Responsibility scope: Specific aspects of the task assigned | |
| - Authority level: Decision-making power, escalation procedures | |
| - Success criteria: How completion/success will be measured | |
| CONSUMED_BY Relationships (Input→Agent): | |
| Extract data consumption details: | |
| - Data source: Specific input location, format, access method | |
| - Consumption pattern: How much, how often, under what conditions | |
| - Processing method: Transformation, validation, filtering applied by agent | |
| - Data dependencies: Required data quality, completeness, timeliness | |
| - Consumption triggers: Events or conditions that initiate consumption | |
| PRODUCES Relationships (Task→Output): | |
| Extract output generation details: | |
| - Output specification: Exact format, structure, content requirements | |
| - Generation process: Steps, transformations, calculations performed | |
| - Quality control: Validation, verification, approval processes | |
| - Delivery method: How output is provided, stored, or transmitted | |
| - Output dependencies: Prerequisites, inputs required for generation | |
| DELIVERS_TO/INTERVENES Relationships (Output→Human, Agent/Human→Task): | |
| Extract human interaction details: | |
| - Delivery method: How output reaches human (email, dashboard, report, etc.) | |
| - Delivery criteria: When and under what conditions output is delivered | |
| - Intervention triggers: Conditions that prompted human/agent involvement | |
| - Feedback specifics: Exact corrections, suggestions, approvals given | |
| - Timing context: When delivery/intervention occurred in the process | |
| - Impact assessment: How the delivery/intervention changed the outcome | |
| EXTRACTION PATTERNS TO LOOK FOR: | |
| 1. Execution Logs: | |
| - "Agent X started Task Y with parameters {{...}}" | |
| - "Tool Z called with input {{...}} returned {{...}}" | |
| - "Task completed in X seconds with status Y" | |
| 2. Delegation Patterns: | |
| - "Assigning Task X to Agent Y because of expertise in Z" | |
| - "Agent Y selected for Task X due to availability and skills" | |
| 3. Data Flow Patterns: | |
| - "Processing input data from source X with filters Y" | |
| - "Generated output file Z with format Y containing X records" | |
| 4. Human Interaction Patterns: | |
| - "User provided feedback: 'This needs more detail'" | |
| - "Human approval received for proceeding with approach X" | |
| 5. Tool Usage Patterns: | |
| - "Executing SQL query on database X with timeout Y" | |
| - "API call to service X with parameters Y returned status Z" | |
| FORMATTING REQUIREMENTS: | |
| - Include timestamps when available | |
| - Preserve parameter names and values | |
| - Include status codes, error messages, success indicators | |
| - Maintain data format specifications | |
| - Show actual values, not generic placeholders | |
| RELATIONSHIP ID MATCHING EXAMPLES: | |
| Given these entities from the previous step: | |
| - Entity 1: {{id: "input_001", name: "Spend Database Schema", type: "Input"}} | |
| - Entity 2: {{id: "agent_001", name: "SQL Query Generator", type: "Agent"}} | |
| - Entity 3: {{id: "task_001", name: "Generate Spend Analysis", type: "Task"}} | |
| - Entity 4: {{id: "output_001", name: "Analysis Report", type: "Output"}} | |
| - Entity 5: {{id: "human_001", name: "Business Analyst", type: "Human"}} | |
| CORRECT relationships: | |
| ``` | |
| {{ | |
| source: "input_001", // Use exact entity.id from entity list | |
| target: "agent_001", // Use exact entity.id from entity list | |
| type: "CONSUMED_BY" | |
| }} | |
| {{ | |
| source: "agent_001", | |
| target: "task_001", | |
| type: "PERFORMS" | |
| }} | |
| {{ | |
| source: "output_001", | |
| target: "human_001", // Use "human_001", NOT "skandha.tandra@unilever.com" | |
| type: "DELIVERS_TO" | |
| }} | |
| ``` | |
| INCORRECT relationships (will cause graph errors): | |
| ``` | |
| {{ | |
| source: "Spend Database Schema", // WRONG: using entity.name | |
| target: "SQL Query Generator", // WRONG: using entity.name | |
| type: "CONSUMED_BY" | |
| }} | |
| {{ | |
| source: "output_001", | |
| target: "skandha.tandra@unilever.com", // WRONG: using email/content, not entity.id | |
| type: "DELIVERS_TO" | |
| }} | |
| ``` | |
| COMPLETE REFERENCE-ONLY Examples with interaction_prompt_ref: | |
| ```json | |
| // CONSUMED_BY example (Reference to Data Consumption Location) | |
| {{ | |
| "type": "CONSUMED_BY", | |
| "source": "input_001", | |
| "target": "agent_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{"line_start": 45, "line_end": 45}} | |
| ] | |
| }} | |
| ``` | |
| ```json | |
| // USES example (Reference to Tool Usage Location) | |
| {{ | |
| "type": "USES", | |
| "source": "agent_001", | |
| "target": "tool_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{"line_start": 89, "line_end": 91}} | |
| ] | |
| }} | |
| ``` | |
| ```json | |
| // PERFORMS example (Reference to Task Execution Location) | |
| {{ | |
| "type": "PERFORMS", | |
| "source": "agent_001", | |
| "target": "task_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{"line_start": 67, "line_end": 67}} | |
| ] | |
| }} | |
| ``` | |
| ```json | |
| // DELIVERS_TO example (Reference to Output Delivery Location) | |
| {{ | |
| "type": "DELIVERS_TO", | |
| "source": "output_001", | |
| "target": "human_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{"line_start": 123, "line_end": 123}} | |
| ] | |
| }} | |
| ``` | |
| ```json | |
| // INTERVENES example (Reference to Human Intervention Location) | |
| {{ | |
| "type": "INTERVENES", | |
| "source": "human_001", | |
| "target": "task_001", | |
| "interaction_prompt": "", // ALWAYS EMPTY - DO NOT FILL | |
| "interaction_prompt_ref": [ | |
| {{"line_start": 156, "line_end": 156}} | |
| ] | |
| }} | |
| ``` | |
| - 'PRODUCES' relationships must only originate from 'Task' entities. Do NOT create 'PRODUCES' relationships from 'Agent' or 'Tool' entities. If such a relationship is detected, reassign it to the appropriate Task or remove it. | |
| - 'CONSUMED_BY' relationships must only go from 'Input' to 'Agent'. Do NOT create reverse relationships. | |
| - 'DELIVERS_TO' relationships must only go from 'Output' to 'Human'. | |
| FINAL VALIDATION CHECKLIST: | |
| Before submitting relationships, verify: | |
| 1. Every source field contains an exact entity.id from the entities list (format: TYPE_NUMBER) | |
| 2. Every target field contains an exact entity.id from the entities list (format: TYPE_NUMBER) | |
| 3. No source or target field contains entity names, descriptions, emails, or actual content | |
| 4. All relationship types are from the approved list of 10 types | |
| 5. Source/target entity types match the constraints for each relationship type | |
| 6. SPECIFIC CHECK: No email addresses (like "skandha.tandra@unilever.com") in source/target fields | |
| 7. SPECIFIC CHECK: All human references use "human_001", "human_002", etc., not actual names or emails | |
| 8. CRITICAL CHECK: For ALL relationships, interaction_prompt MUST be empty string "" - only populate interaction_prompt_ref with location references | |
| 9. CRITICAL CHECK: interaction_prompt_ref should point to ACTUAL RUNTIME MESSAGES/LOGS locations, not static prompt definitions or specifications | |
| IMPORTANCE ASSESSMENT REQUIREMENTS: | |
| For each relationship, you MUST assign an importance level based on its role in the system: | |
| HIGH IMPORTANCE: | |
| - Critical data flows that are essential for system operation | |
| - Core agent-task assignments that drive main functionality | |
| - Essential tool usage that multiple workflows depend on | |
| - Primary input consumption that initiates key processes | |
| - Final output delivery to key stakeholders | |
| - Critical intervention relationships that prevent failures | |
| MEDIUM IMPORTANCE: | |
| - Standard operational workflows and data processing | |
| - Common agent-task interactions in normal operation | |
| - Regular tool usage that supports functionality | |
| - Secondary input processing that provides context | |
| - Intermediate output generation for downstream processes | |
| - Routine human interactions and feedback loops | |
| LOW IMPORTANCE: | |
| - Auxiliary connections with minimal system impact | |
| - Optional workflow steps that can be skipped | |
| - Rarely used tool interactions or utilities | |
| - Diagnostic or logging data flows | |
| - Backup or redundant relationships | |
| - Occasional human oversight or monitoring | |
| # ASSESSMENT GUIDELINES: | |
| # - Consider the relationship's criticality to system success | |
| # - Evaluate how often this interaction occurs | |
| # - Assess the impact if this relationship failed | |
| # - Look at whether this connection is replaceable | |
| # - Consider the consequences of removing this relationship | |
| """ | |
| relationship_analyzer_agent_info = { | |
| "role": "Relationship Analyzer", | |
| "goal": "Discover standard relationships between entities using exact entity IDs and predefined relationship types", | |
| "backstory": """You are an expert in understanding relationships and connections between entities. | |
| You can identify when agents delegate tasks, use tools, ask questions of each other, or work | |
| together on tasks from various data sources including logs, documentation, model cards, or natural language descriptions. | |
| You strictly adhere to using only the ten predefined relationship types (CONSUMED_BY, PERFORMS, ASSIGNED_TO, USES, | |
| REQUIRED_BY, SUBTASK_OF, NEXT, PRODUCES, DELIVERS_TO, INTERVENES) and never create custom relationship types. You maintain the correct source and target entity types | |
| for each relationship as defined in the system. | |
| CRITICAL SKILL: You are meticulous about using exact entity.id values (not names) in relationship source and target fields. | |
| You understand that using entity names instead of IDs will break the knowledge graph visualization and cause system errors. | |
| You always double-check that every source and target ID corresponds to an actual entity from the extracted entities list. | |
| You clearly distinguish between: | |
| - PERFORMS (Agent→Task): When an agent actually executes/carries out a task | |
| - ASSIGNED_TO (Task→Agent): When a task is delegated/assigned to an agent as a responsibility | |
| For relationships requiring prompts, you extract the appropriate prompt-based content. For relationships not requiring prompts, | |
| you leave the interaction_prompt field empty. | |
| You see patterns in interactions that others might miss, making you | |
| essential for mapping the complex web of relationships in multi-agent systems, | |
| regardless of how the system information is presented.""", | |
| "task_expected_output": "A list of properly structured relationships with exact entity references", | |
| } | |
| system_prompt = create_system_prompt( | |
| role=relationship_analyzer_agent_info["role"], | |
| backstory=relationship_analyzer_agent_info["backstory"], | |
| goal=relationship_analyzer_agent_info["goal"] | |
| ) | |
| task_description = create_task_prompt( | |
| task_description=RELATION_EXTRACTION_INSTRUCTION_PROMPT, | |
| task_expected_output=relationship_analyzer_agent_info["task_expected_output"], | |
| response_format=RelationshipExtractionList, | |
| previous_step_output=previous_step_output, | |
| ) | |
| return await run_agent( | |
| model_name=os.environ["OPENAI_MODEL_NAME"], | |
| system_prompt=system_prompt, | |
| task_description=task_description, | |
| response_format=RelationshipExtractionList, | |
| ) | |
| async def knowledge_graph_builder(input_data, previous_step_output, context_documents="") -> KnowledgeGraph: | |
| GRAPH_BUILDER_INSTRUCTION_PROMPT = f""" | |
| **CONTEXT DOCUMENTS AVAILABLE:** | |
| The following context documents are available to enhance your understanding: | |
| - {context_documents} | |
| **PRIMARY INPUT DATA:** | |
| Here is the input window you are analysing (with <L#> line numbers): | |
| - {input_data} | |
| **CONTEXT-ENHANCED KNOWLEDGE GRAPH CONSTRUCTION:** | |
| Use the provided context documents to: | |
| 1. Create more accurate system names and summaries based on domain knowledge | |
| 2. Apply domain-specific importance assessments | |
| 3. Follow any provided guidelines for knowledge graph structure | |
| 4. Reference examples for system categorization and analysis | |
| 5. Incorporate business domain understanding into failure detection | |
| Construct a unified knowledge graph from analyzed entities, relationships, **and detected failures**. | |
| FAILURE LIST REQUIREMENT (YOU must perform this detection): | |
| - Add a top-level field called `failures` (array) to the final JSON. | |
| - Each item must match the `Failure` schema (id, risk_type, description, raw_text, raw_text_ref, affected_id). | |
| - Use the following predefined risk_type values only: AGENT_ERROR, PLANNING_ERROR, EXECUTION_ERROR, RETRIEVAL_ERROR, HALLUCINATION. | |
| - For every distinct mistake or risk you identify in this window, create exactly one Failure object with **all** occurrences referenced via `raw_text_ref`. | |
| - Leave `raw_text` empty "" and rely on `raw_text_ref` for extraction (same convention as prompts). | |
| - `affected_id` should point to the entity or relation most responsible, if applicable; otherwise leave null. | |
| **MANDATORY**: If this window shows *any* error, bug, or incorrect behaviour you **MUST** add at least one Failure object. Unit-tests will fail if the `failures` array is missing or empty. | |
| IF ANY SUCH KEYWORD APPEARS AND THERE IS NO FAILURE OBJECT, THE OUTPUT WILL BE REJECTED. | |
| QUICK CHECKLIST BEFORE YOU SUBMIT: | |
| 1. `failures` array exists in top-level JSON. | |
| 2. Each Failure has at least one `raw_text_ref` entry. | |
| 3. Failure IDs follow sequential `failure_001`, `failure_002`, … order. | |
| 4. The first entry in `raw_text_ref` (index 0) must occur **on or before** the dataset's `mistake_step` line. | |
| - The **primary evidence** for a Failure must be the **exact agent message** at the first mistake step—the line where the incorrect answer or erroneous action first appears. Do NOT rely solely on later diagnostic logs. | |
| - Typical evidence keywords include: "ERROR", "Incorrect answer", "Traceback", "I cannot", "Failed to". Capture that specific message line via `raw_text_ref`. | |
| CRITICAL FIRST-SYMPTOM LINE RULE | |
| • The *very first* line that shows the mistake MUST be captured via `raw_text_ref`. | |
| • "First line" means the earliest agent or tool message whose content already demonstrates the error. | |
| • Typical trigger words to scan for: "error", "incorrect", "failed", "traceback", "cannot", "exception", "invalid". | |
| • Mini-example (multi-line traceback): | |
| assistant: Traceback (most recent call last) | |
| assistant: File "...", line 12, in <module> | |
| assistant: ValueError: division by zero ← only this FIRST offending line is referenced | |
| Correct `raw_text_ref` → `[{{"line_start": 2, "line_end": 2}}]` | |
| Example Failure object: | |
| ```json | |
| {{ | |
| "id": "failure_001", | |
| "risk_type": "AGENT_ERROR", | |
| "description": "Agent provided incorrect SQL syntax causing downstream failure", | |
| "raw_text": "", | |
| "raw_text_ref": [{{"line_start": 42, "line_end": 43}}], | |
| "affected_id": "agent_001" | |
| }} | |
| ``` | |
| Core requirements: | |
| 1. Integrate entities and relationships into a coherent structure | |
| 2. Maintain consistent entity references | |
| 3. Use ONLY the ten predefined relation types | |
| 4. Preserve all prompt content and importance assessments | |
| 5. Include metadata with timestamp and statistics | |
| 6. Create a descriptive system name (3-7 words) | |
| 7. Write a concise 2-3 sentence system summary | |
| 8. Include comprehensive system assessment | |
| System naming guidelines: | |
| - Reflect primary purpose and function | |
| - Include key agent roles | |
| - Mention domain/industry if applicable | |
| - Highlight distinctive capabilities | |
| Example names: "Financial Research Collaboration Network", "Customer Support Ticket Triage System" | |
| System summary must explain: | |
| - What the system does (purpose/function) | |
| - How it works (agent coordination pattern) | |
| - Value provided (problem solved) | |
| Example summary: "This system analyzes customer support tickets using a classifier agent and specialist agents to route issues to appropriate departments. It manages workflow and handoffs between specialists. The system reduces response time by matching issues with qualified representatives." | |
| Validation requirements: | |
| 1. Include ONLY these relationship types: | |
| - CONSUMED_BY: Input→Agent | |
| - PERFORMS: Agent→Task | |
| - ASSIGNED_TO: Task→Agent | |
| - USES: Agent→Tool | |
| - REQUIRED_BY: Tool→Task | |
| - SUBTASK_OF: Task→Task | |
| - NEXT: Task→Task (sequence) | |
| - PRODUCES: Task→Output | |
| - DELIVERS_TO: Output→Human | |
| - INTERVENES: Agent/Human→Task | |
| 2. Confirm task relationships accurately show: | |
| - Sequential dependencies (NEXT) | |
| - Hierarchical structure (SUBTASK_OF) | |
| 3. Verify entity IDs (not names) in all relationships | |
| Connectivity validation: | |
| - All entities must connect to at least one other entity | |
| - All inputs must be consumed by agents via CONSUMED_BY | |
| - All outputs must be produced by tasks via PRODUCES | |
| - All outputs should be delivered to humans via DELIVERS_TO when applicable | |
| - All components must be reachable (no isolated subgraphs) | |
| - Clear paths must exist from inputs to outputs through agents and tasks | |
| - All agents must have defined roles | |
| - Document any added connections in metadata.connectivity_fixups | |
| System Integration: | |
| - Focus on comprehensive system analysis and assessment | |
| - Include detailed metadata about system components and interactions | |
| - Document system architecture patterns and design decisions | |
| System assessment: | |
| 1. Evaluate overall system importance (HIGH/MEDIUM/LOW) based on: | |
| - Component count and centrality | |
| - Workflow centrality | |
| - Uniqueness/replaceability | |
| - Failure impact | |
| - Single points of failure | |
| - Usage frequency | |
| 2. Provide 3-5 sentence justification covering: | |
| - Importance level rationale | |
| - Key assessment factors | |
| - Architecture strengths/vulnerabilities | |
| - Risk mitigations | |
| - Comparison to similar systems | |
| Data flow analysis: | |
| - Map input consumption paths | |
| - Track output production and utilization | |
| - Identify transformation points | |
| - Document critical data paths | |
| - Highlight bottlenecks and redundancies | |
| Output a complete KnowledgeGraph object with entities, relations, metadata, system_name, and system_summary. | |
| """ | |
| knowledge_graph_builder_agent_info = { | |
| "role": "Knowledge Graph Builder", | |
| "goal": "Build a complete, consistent knowledge graph using extracted entities and relationships with proper validation", | |
| "backstory": """You are skilled at organizing information into structured knowledge graphs. | |
| You understand how to represent entities and relationships in a way that captures the essence | |
| of a system. Your knowledge graphs are well-structured, consistent, and follow best practices | |
| for knowledge representation. | |
| You excel at analyzing complex systems holistically to provide overall risk assessments. | |
| You can evaluate the criticality of entire systems based on their components, dependencies, | |
| and role in broader workflows. Your system-level risk analyses help stakeholders understand | |
| key vulnerabilities and critical components that warrant special attention. | |
| You ensure the final output is in a format that can be easily used for further analysis or visualization.""", | |
| "task_expected_output": "A complete knowledge graph with entities, relationships, failures, and metadata", | |
| } | |
| system_prompt = create_system_prompt( | |
| role=knowledge_graph_builder_agent_info["role"], | |
| backstory=knowledge_graph_builder_agent_info["backstory"], | |
| goal=knowledge_graph_builder_agent_info["goal"], | |
| ) | |
| full_previous_step_output = { | |
| **previous_step_output, | |
| "incorrect_results": [], | |
| } | |
| kg = None | |
| for i in range(3): | |
| if kg is not None: | |
| full_previous_step_output['incorrect_results'].append(kg.model_dump()) | |
| task_description = create_task_prompt( | |
| task_description=GRAPH_BUILDER_INSTRUCTION_PROMPT, | |
| task_expected_output=knowledge_graph_builder_agent_info["task_expected_output"], | |
| response_format=KnowledgeGraph, | |
| previous_step_output=json.dumps(full_previous_step_output, indent=2), | |
| ) | |
| kg = await run_agent( | |
| model_name=os.environ["OPENAI_MODEL_NAME"], | |
| system_prompt=system_prompt, | |
| task_description=task_description, | |
| response_format=KnowledgeGraph, | |
| ) | |
| valid_result = True | |
| entities_ids = [e.id for e in kg.entities] | |
| for rel in kg.relations: | |
| if rel.source not in entities_ids or rel.target not in entities_ids: | |
| valid_result = False | |
| break | |
| if valid_result: | |
| break | |
| if not valid_result: | |
| valid_relations = [] | |
| entities_ids = [e.id for e in kg.entities] | |
| for rel in kg.relations: | |
| if rel.source not in entities_ids or rel.target not in entities_ids: | |
| continue | |
| else: | |
| valid_relations.append(rel) | |
| kg = kg.model_copy(update={"relations": valid_relations}) | |
| return kg | |
| async def run(input_data, context: str = None) -> KnowledgeGraph: | |
| entity_extractor_result = await entity_extractor(input_data, context) | |
| previous_step_output = json.dumps( | |
| { | |
| "entities": entity_extractor_result.model_dump(), | |
| }, | |
| indent=2, | |
| ) | |
| relationship_analyzer_result = await relationship_analyzer( | |
| input_data, previous_step_output, context | |
| ) | |
| previous_step_output = { | |
| "entities": entity_extractor_result.model_dump(), | |
| "relations": relationship_analyzer_result.model_dump(), | |
| } | |
| knowledge_graph_result = await knowledge_graph_builder( | |
| input_data, previous_step_output, context | |
| ) | |
| return knowledge_graph_result | |