| """
|
| Intent Parser - Layer 1: User Intent Understanding
|
|
|
| This module implements the first layer of the three-layer architecture.
|
| It uses LLM to understand user's natural language goals and converts them
|
| to structured AnalysisIntent objects.
|
|
|
| CRITICAL BOUNDARIES:
|
| - ✅ LLM is used ONLY for semantic understanding
|
| - ❌ NO numerical calculations
|
| - ❌ NO regulatory decisions
|
| - ❌ NO data sufficiency checks
|
| """
|
|
|
| import json
|
| import re
|
| from typing import Dict, Any, Optional, Tuple, List
|
| from dataclasses import asdict
|
|
|
| from schemas.analysis_intent import (
|
| AnalysisIntent,
|
| AnalysisType,
|
| AnalysisPurpose,
|
| UserPreferences,
|
| HardConstraints,
|
| ExtractedDataSummary,
|
| )
|
| from prompts.intent_prompts import IntentPrompts
|
| from layers.model_invoker import ModelInvoker
|
|
|
|
|
| class IntentParser:
|
| """
|
| Layer 1: User Intent Understanding.
|
|
|
| Converts natural language analysis goals into structured AnalysisIntent.
|
| Uses LLM purely for semantic understanding - no calculations or decisions.
|
| """
|
|
|
| def __init__(self, model_invoker: Optional[ModelInvoker] = None):
|
| """
|
| Initialize the intent parser.
|
|
|
| Args:
|
| model_invoker: LLM invoker instance. Creates new one if not provided.
|
| """
|
| self.model_invoker = model_invoker or ModelInvoker()
|
|
|
| def parse(
|
| self,
|
| user_goal: str,
|
| data_summary: Optional[ExtractedDataSummary] = None
|
| ) -> AnalysisIntent:
|
| """
|
| Parse user's natural language goal into structured intent.
|
|
|
| Args:
|
| user_goal: User's raw analysis goal text
|
| data_summary: Optional summary of extracted data
|
|
|
| Returns:
|
| AnalysisIntent structure for Layer 2
|
| """
|
| if data_summary is None:
|
| data_summary = ExtractedDataSummary()
|
|
|
|
|
| system_prompt, user_prompt = IntentPrompts.get_intent_prompt(
|
| user_goal=user_goal,
|
| n_batches=len(data_summary.batch_ids),
|
| n_conditions=len(data_summary.conditions),
|
| cqa_list=", ".join(data_summary.cqa_list) if data_summary.cqa_list else "未知",
|
| max_timepoint=max(data_summary.available_timepoints) if data_summary.available_timepoints else 0
|
| )
|
|
|
|
|
| response = self.model_invoker.invoke(
|
| system_prompt=system_prompt,
|
| user_prompt=user_prompt,
|
| temperature=0.1
|
| )
|
|
|
| if not response.success:
|
|
|
| return self._create_default_intent(user_goal, data_summary)
|
|
|
|
|
| try:
|
| parsed = self._extract_json(response.content)
|
| return self._build_intent(user_goal, parsed, data_summary)
|
| except Exception as e:
|
|
|
| return self._create_default_intent(user_goal, data_summary, str(e))
|
|
|
| def _extract_json(self, text: str) -> Dict[str, Any]:
|
| """Extract JSON from LLM response (handles markdown code blocks)."""
|
|
|
| json_match = re.search(r'```(?:json)?\s*([\s\S]*?)\s*```', text)
|
| if json_match:
|
| json_str = json_match.group(1)
|
| else:
|
|
|
| json_str = text.strip()
|
|
|
|
|
| json_str = json_str.strip()
|
| if json_str.startswith('{') and json_str.endswith('}'):
|
| return json.loads(json_str)
|
|
|
| raise ValueError(f"Could not extract valid JSON from: {text[:200]}")
|
|
|
| def _build_intent(
|
| self,
|
| raw_goal: str,
|
| parsed: Dict[str, Any],
|
| data_summary: ExtractedDataSummary
|
| ) -> AnalysisIntent:
|
| """Build AnalysisIntent from parsed JSON."""
|
|
|
|
|
| analysis_type_str = parsed.get("analysis_type", "trend_assessment")
|
| try:
|
| analysis_type = AnalysisType(analysis_type_str)
|
| except ValueError:
|
| analysis_type = AnalysisType.TREND_ASSESSMENT
|
|
|
|
|
| purpose_str = parsed.get("purpose", "rd_reference")
|
| try:
|
| purpose = AnalysisPurpose(purpose_str)
|
| except ValueError:
|
| purpose = AnalysisPurpose.RD_REFERENCE
|
|
|
|
|
| preferences = UserPreferences(
|
| allow_extrapolation=parsed.get("allow_extrapolation", True),
|
| target_timepoints=parsed.get("target_timepoints", [24, 36]),
|
| required_confidence=parsed.get("required_confidence", 0.95)
|
| )
|
|
|
|
|
| spec_limit = parsed.get("specification_limit")
|
| if spec_limit is None or spec_limit <= 0:
|
| spec_limit = 0.5
|
|
|
| constraints = HardConstraints(
|
| purpose=purpose,
|
| primary_cqa=parsed.get("primary_cqa", "总杂质"),
|
| specification_limit=spec_limit
|
| )
|
|
|
|
|
| intent = AnalysisIntent(
|
| raw_goal=raw_goal,
|
| analysis_type=analysis_type,
|
| preferences=preferences,
|
| constraints=constraints,
|
| data_summary=data_summary,
|
| parse_confidence=parsed.get("parse_confidence", 0.5),
|
| ambiguities=parsed.get("ambiguities", [])
|
| )
|
|
|
| return intent
|
|
|
| def _create_default_intent(
|
| self,
|
| raw_goal: str,
|
| data_summary: ExtractedDataSummary,
|
| error_msg: str = None
|
| ) -> AnalysisIntent:
|
| """Create a default intent when parsing fails."""
|
|
|
|
|
| goal_lower = raw_goal.lower()
|
|
|
| if any(kw in goal_lower for kw in ["预测", "货架期", "shelf"]):
|
| analysis_type = AnalysisType.SHELF_LIFE_PREDICTION
|
| elif any(kw in goal_lower for kw in ["对比", "筛选", "最优", "比较"]):
|
| analysis_type = AnalysisType.BATCH_COMPARISON
|
| elif any(kw in goal_lower for kw in ["风险", "超标", "合规"]):
|
| analysis_type = AnalysisType.RISK_EVALUATION
|
| else:
|
| analysis_type = AnalysisType.TREND_ASSESSMENT
|
|
|
|
|
| timepoints = [24, 36]
|
| tp_match = re.findall(r'(\d+)\s*[Mm月]', raw_goal)
|
| if tp_match:
|
| timepoints = [int(t) for t in tp_match]
|
|
|
| ambiguities = []
|
| if error_msg:
|
| ambiguities.append(f"LLM解析失败: {error_msg}")
|
|
|
| return AnalysisIntent(
|
| raw_goal=raw_goal,
|
| analysis_type=analysis_type,
|
| preferences=UserPreferences(
|
| target_timepoints=timepoints
|
| ),
|
| constraints=HardConstraints(),
|
| data_summary=data_summary,
|
| parse_confidence=0.3,
|
| ambiguities=ambiguities
|
| )
|
|
|
| def parse_with_confirmation(
|
| self,
|
| user_goal: str,
|
| data_summary: Optional[ExtractedDataSummary] = None
|
| ) -> Tuple[AnalysisIntent, bool, str]:
|
| """
|
| Parse intent and determine if user confirmation is needed.
|
|
|
| Returns:
|
| Tuple of (intent, needs_confirmation, confirmation_prompt)
|
| """
|
| intent = self.parse(user_goal, data_summary)
|
|
|
| if intent.is_high_confidence():
|
| return intent, False, ""
|
|
|
|
|
| confirmation_parts = [
|
| "请确认以下解析结果:",
|
| f"- 分析类型: {intent.analysis_type.value}",
|
| f"- 目标时间点: {intent.preferences.target_timepoints}",
|
| f"- 主要CQA: {intent.constraints.primary_cqa}",
|
| f"- 分析目的: {intent.constraints.purpose.value}",
|
| ]
|
|
|
| if intent.ambiguities:
|
| confirmation_parts.append("\n⚠️ 存在以下不确定项:")
|
| for amb in intent.ambiguities:
|
| confirmation_parts.append(f" - {amb}")
|
|
|
| confirmation_prompt = "\n".join(confirmation_parts)
|
|
|
| return intent, True, confirmation_prompt
|
|
|