Preformu / layers /intent_parser.py
Kevinshh's picture
Upload 3 files
e335526 verified
"""
Intent Parser - Layer 1: User Intent Understanding
This module implements the first layer of the three-layer architecture.
It uses LLM to understand user's natural language goals and converts them
to structured AnalysisIntent objects.
CRITICAL BOUNDARIES:
- ✅ LLM is used ONLY for semantic understanding
- ❌ NO numerical calculations
- ❌ NO regulatory decisions
- ❌ NO data sufficiency checks
"""
import json
import re
from typing import Dict, Any, Optional, Tuple, List
from dataclasses import asdict
from schemas.analysis_intent import (
AnalysisIntent,
AnalysisType,
AnalysisPurpose,
UserPreferences,
HardConstraints,
ExtractedDataSummary,
)
from prompts.intent_prompts import IntentPrompts
from layers.model_invoker import ModelInvoker
class IntentParser:
"""
Layer 1: User Intent Understanding.
Converts natural language analysis goals into structured AnalysisIntent.
Uses LLM purely for semantic understanding - no calculations or decisions.
"""
def __init__(self, model_invoker: Optional[ModelInvoker] = None):
"""
Initialize the intent parser.
Args:
model_invoker: LLM invoker instance. Creates new one if not provided.
"""
self.model_invoker = model_invoker or ModelInvoker()
def parse(
self,
user_goal: str,
data_summary: Optional[ExtractedDataSummary] = None
) -> AnalysisIntent:
"""
Parse user's natural language goal into structured intent.
Args:
user_goal: User's raw analysis goal text
data_summary: Optional summary of extracted data
Returns:
AnalysisIntent structure for Layer 2
"""
if data_summary is None:
data_summary = ExtractedDataSummary()
# Get prompts
system_prompt, user_prompt = IntentPrompts.get_intent_prompt(
user_goal=user_goal,
n_batches=len(data_summary.batch_ids),
n_conditions=len(data_summary.conditions),
cqa_list=", ".join(data_summary.cqa_list) if data_summary.cqa_list else "未知",
max_timepoint=max(data_summary.available_timepoints) if data_summary.available_timepoints else 0
)
# Call LLM
response = self.model_invoker.invoke(
system_prompt=system_prompt,
user_prompt=user_prompt,
temperature=0.1 # Low temperature for consistent parsing
)
if not response.success:
# Fallback to default intent
return self._create_default_intent(user_goal, data_summary)
# Parse LLM response
try:
parsed = self._extract_json(response.content)
return self._build_intent(user_goal, parsed, data_summary)
except Exception as e:
# Fallback on parse error
return self._create_default_intent(user_goal, data_summary, str(e))
def _extract_json(self, text: str) -> Dict[str, Any]:
"""Extract JSON from LLM response (handles markdown code blocks)."""
# Try to find JSON in markdown code block
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
if json_match:
json_str = json_match.group(1)
else:
# Try to find raw JSON
json_str = text.strip()
# Clean up common issues
json_str = json_str.strip()
if json_str.startswith('{') and json_str.endswith('}'):
return json.loads(json_str)
raise ValueError(f"Could not extract valid JSON from: {text[:200]}")
def _build_intent(
self,
raw_goal: str,
parsed: Dict[str, Any],
data_summary: ExtractedDataSummary
) -> AnalysisIntent:
"""Build AnalysisIntent from parsed JSON."""
# Map analysis type
analysis_type_str = parsed.get("analysis_type", "trend_assessment")
try:
analysis_type = AnalysisType(analysis_type_str)
except ValueError:
analysis_type = AnalysisType.TREND_ASSESSMENT
# Map purpose
purpose_str = parsed.get("purpose", "rd_reference")
try:
purpose = AnalysisPurpose(purpose_str)
except ValueError:
purpose = AnalysisPurpose.RD_REFERENCE
# Build preferences
preferences = UserPreferences(
allow_extrapolation=parsed.get("allow_extrapolation", True),
target_timepoints=parsed.get("target_timepoints", [24, 36]),
required_confidence=parsed.get("required_confidence", 0.95)
)
# Build constraints
spec_limit = parsed.get("specification_limit")
if spec_limit is None or spec_limit <= 0:
spec_limit = 0.5 # Default
constraints = HardConstraints(
purpose=purpose,
primary_cqa=parsed.get("primary_cqa", "总杂质"),
specification_limit=spec_limit
)
# Build intent
intent = AnalysisIntent(
raw_goal=raw_goal,
analysis_type=analysis_type,
preferences=preferences,
constraints=constraints,
data_summary=data_summary,
parse_confidence=parsed.get("parse_confidence", 0.5),
ambiguities=parsed.get("ambiguities", [])
)
return intent
def _create_default_intent(
self,
raw_goal: str,
data_summary: ExtractedDataSummary,
error_msg: str = None
) -> AnalysisIntent:
"""Create a default intent when parsing fails."""
# Try simple keyword matching for analysis type
goal_lower = raw_goal.lower()
if any(kw in goal_lower for kw in ["预测", "货架期", "shelf"]):
analysis_type = AnalysisType.SHELF_LIFE_PREDICTION
elif any(kw in goal_lower for kw in ["对比", "筛选", "最优", "比较"]):
analysis_type = AnalysisType.BATCH_COMPARISON
elif any(kw in goal_lower for kw in ["风险", "超标", "合规"]):
analysis_type = AnalysisType.RISK_EVALUATION
else:
analysis_type = AnalysisType.TREND_ASSESSMENT
# Extract timepoints if mentioned
timepoints = [24, 36] # Default
tp_match = re.findall(r'(\d+)\s*[Mm月]', raw_goal)
if tp_match:
timepoints = [int(t) for t in tp_match]
ambiguities = []
if error_msg:
ambiguities.append(f"LLM解析失败: {error_msg}")
return AnalysisIntent(
raw_goal=raw_goal,
analysis_type=analysis_type,
preferences=UserPreferences(
target_timepoints=timepoints
),
constraints=HardConstraints(),
data_summary=data_summary,
parse_confidence=0.3, # Low confidence for fallback
ambiguities=ambiguities
)
def parse_with_confirmation(
self,
user_goal: str,
data_summary: Optional[ExtractedDataSummary] = None
) -> Tuple[AnalysisIntent, bool, str]:
"""
Parse intent and determine if user confirmation is needed.
Returns:
Tuple of (intent, needs_confirmation, confirmation_prompt)
"""
intent = self.parse(user_goal, data_summary)
if intent.is_high_confidence():
return intent, False, ""
# Build confirmation prompt
confirmation_parts = [
"请确认以下解析结果:",
f"- 分析类型: {intent.analysis_type.value}",
f"- 目标时间点: {intent.preferences.target_timepoints}",
f"- 主要CQA: {intent.constraints.primary_cqa}",
f"- 分析目的: {intent.constraints.purpose.value}",
]
if intent.ambiguities:
confirmation_parts.append("\n⚠️ 存在以下不确定项:")
for amb in intent.ambiguities:
confirmation_parts.append(f" - {amb}")
confirmation_prompt = "\n".join(confirmation_parts)
return intent, True, confirmation_prompt