Spaces:

Kevinshh
/

Preformu

Running

App Files Files Community

Preformu / layers /intent_parser.py

Kevinshh

Upload 3 files

e335526 verified 3 months ago

raw

history blame contribute delete

8.59 kB

	"""
	Intent Parser - Layer 1: User Intent Understanding

	This module implements the first layer of the three-layer architecture.
	It uses LLM to understand user's natural language goals and converts them
	to structured AnalysisIntent objects.

	CRITICAL BOUNDARIES:
	- ✅ LLM is used ONLY for semantic understanding
	- ❌ NO numerical calculations
	- ❌ NO regulatory decisions
	- ❌ NO data sufficiency checks
	"""

	import json
	import re
	from typing import Dict, Any, Optional, Tuple, List
	from dataclasses import asdict

	from schemas.analysis_intent import (
	AnalysisIntent,
	AnalysisType,
	AnalysisPurpose,
	UserPreferences,
	HardConstraints,
	ExtractedDataSummary,
	)
	from prompts.intent_prompts import IntentPrompts
	from layers.model_invoker import ModelInvoker


	class IntentParser:
	"""
	Layer 1: User Intent Understanding.

	Converts natural language analysis goals into structured AnalysisIntent.
	Uses LLM purely for semantic understanding - no calculations or decisions.
	"""

	def __init__(self, model_invoker: Optional[ModelInvoker] = None):
	"""
	Initialize the intent parser.

	Args:
	model_invoker: LLM invoker instance. Creates new one if not provided.
	"""
	self.model_invoker = model_invoker or ModelInvoker()

	def parse(
	self,
	user_goal: str,
	data_summary: Optional[ExtractedDataSummary] = None
	) -> AnalysisIntent:
	"""
	Parse user's natural language goal into structured intent.

	Args:
	user_goal: User's raw analysis goal text
	data_summary: Optional summary of extracted data

	Returns:
	AnalysisIntent structure for Layer 2
	"""
	if data_summary is None:
	data_summary = ExtractedDataSummary()

	# Get prompts
	system_prompt, user_prompt = IntentPrompts.get_intent_prompt(
	user_goal=user_goal,
	n_batches=len(data_summary.batch_ids),
	n_conditions=len(data_summary.conditions),
	cqa_list=", ".join(data_summary.cqa_list) if data_summary.cqa_list else "未知",
	max_timepoint=max(data_summary.available_timepoints) if data_summary.available_timepoints else 0
	)

	# Call LLM
	response = self.model_invoker.invoke(
	system_prompt=system_prompt,
	user_prompt=user_prompt,
	temperature=0.1 # Low temperature for consistent parsing
	)

	if not response.success:
	# Fallback to default intent
	return self._create_default_intent(user_goal, data_summary)

	# Parse LLM response
	try:
	parsed = self._extract_json(response.content)
	return self._build_intent(user_goal, parsed, data_summary)
	except Exception as e:
	# Fallback on parse error
	return self._create_default_intent(user_goal, data_summary, str(e))

	def _extract_json(self, text: str) -> Dict[str, Any]:
	"""Extract JSON from LLM response (handles markdown code blocks)."""
	# Try to find JSON in markdown code block
	json_match = re.search(r'```(?:json)?\s([\s\S]?)\s*```', text)
	if json_match:
	json_str = json_match.group(1)
	else:
	# Try to find raw JSON
	json_str = text.strip()

	# Clean up common issues
	json_str = json_str.strip()
	if json_str.startswith('{') and json_str.endswith('}'):
	return json.loads(json_str)

	raise ValueError(f"Could not extract valid JSON from: {text[:200]}")

	def _build_intent(
	self,
	raw_goal: str,
	parsed: Dict[str, Any],
	data_summary: ExtractedDataSummary
	) -> AnalysisIntent:
	"""Build AnalysisIntent from parsed JSON."""

	# Map analysis type
	analysis_type_str = parsed.get("analysis_type", "trend_assessment")
	try:
	analysis_type = AnalysisType(analysis_type_str)
	except ValueError:
	analysis_type = AnalysisType.TREND_ASSESSMENT

	# Map purpose
	purpose_str = parsed.get("purpose", "rd_reference")
	try:
	purpose = AnalysisPurpose(purpose_str)
	except ValueError:
	purpose = AnalysisPurpose.RD_REFERENCE

	# Build preferences
	preferences = UserPreferences(
	allow_extrapolation=parsed.get("allow_extrapolation", True),
	target_timepoints=parsed.get("target_timepoints", [24, 36]),
	required_confidence=parsed.get("required_confidence", 0.95)
	)

	# Build constraints
	spec_limit = parsed.get("specification_limit")
	if spec_limit is None or spec_limit <= 0:
	spec_limit = 0.5 # Default

	constraints = HardConstraints(
	purpose=purpose,
	primary_cqa=parsed.get("primary_cqa", "总杂质"),
	specification_limit=spec_limit
	)

	# Build intent
	intent = AnalysisIntent(
	raw_goal=raw_goal,
	analysis_type=analysis_type,
	preferences=preferences,
	constraints=constraints,
	data_summary=data_summary,
	parse_confidence=parsed.get("parse_confidence", 0.5),
	ambiguities=parsed.get("ambiguities", [])
	)

	return intent

	def _create_default_intent(
	self,
	raw_goal: str,
	data_summary: ExtractedDataSummary,
	error_msg: str = None
	) -> AnalysisIntent:
	"""Create a default intent when parsing fails."""

	# Try simple keyword matching for analysis type
	goal_lower = raw_goal.lower()

	if any(kw in goal_lower for kw in ["预测", "货架期", "shelf"]):
	analysis_type = AnalysisType.SHELF_LIFE_PREDICTION
	elif any(kw in goal_lower for kw in ["对比", "筛选", "最优", "比较"]):
	analysis_type = AnalysisType.BATCH_COMPARISON
	elif any(kw in goal_lower for kw in ["风险", "超标", "合规"]):
	analysis_type = AnalysisType.RISK_EVALUATION
	else:
	analysis_type = AnalysisType.TREND_ASSESSMENT

	# Extract timepoints if mentioned
	timepoints = [24, 36] # Default
	tp_match = re.findall(r'(\d+)\s*[Mm月]', raw_goal)
	if tp_match:
	timepoints = [int(t) for t in tp_match]

	ambiguities = []
	if error_msg:
	ambiguities.append(f"LLM解析失败: {error_msg}")

	return AnalysisIntent(
	raw_goal=raw_goal,
	analysis_type=analysis_type,
	preferences=UserPreferences(
	target_timepoints=timepoints
	),
	constraints=HardConstraints(),
	data_summary=data_summary,
	parse_confidence=0.3, # Low confidence for fallback
	ambiguities=ambiguities
	)

	def parse_with_confirmation(
	self,
	user_goal: str,
	data_summary: Optional[ExtractedDataSummary] = None
	) -> Tuple[AnalysisIntent, bool, str]:
	"""
	Parse intent and determine if user confirmation is needed.

	Returns:
	Tuple of (intent, needs_confirmation, confirmation_prompt)
	"""
	intent = self.parse(user_goal, data_summary)

	if intent.is_high_confidence():
	return intent, False, ""

	# Build confirmation prompt
	confirmation_parts = [
	"请确认以下解析结果：",
	f"- 分析类型: {intent.analysis_type.value}",
	f"- 目标时间点: {intent.preferences.target_timepoints}",
	f"- 主要CQA: {intent.constraints.primary_cqa}",
	f"- 分析目的: {intent.constraints.purpose.value}",
	]

	if intent.ambiguities:
	confirmation_parts.append("\n⚠️ 存在以下不确定项：")
	for amb in intent.ambiguities:
	confirmation_parts.append(f" - {amb}")

	confirmation_prompt = "\n".join(confirmation_parts)

	return intent, True, confirmation_prompt