Spaces:

kushalExplores
/

flatmate_rl

Sleeping

App Files Files Community

flatmate_rl / server /episode.py

kushalExplores

Add step-2 GRPO notebook and hidden-flex fix

dbb1ce2 verified 12 days ago

raw

history blame contribute delete

71.2 kB

	"""Deterministic episode logic for Flatmate RL."""

	from __future__ import annotations

	import os
	from copy import deepcopy
	import json
	import re
	from typing import Any

	try:
	from ..models import FlatmateRlAction, FlatmateRlObservation, FlatmateRlState
	from .heuristic_policy import expected_policy_action
	from .scenario_variants import apply_seed_variant
	from .scenarios import POSTS, SCENARIOS
	except ImportError:
	from models import FlatmateRlAction, FlatmateRlObservation, FlatmateRlState
	from server.heuristic_policy import expected_policy_action
	from server.scenario_variants import apply_seed_variant
	from server.scenarios import POSTS, SCENARIOS


	BUYER_TOOLS = [
	"store_user_details",
	"search_posts",
	"close_buyer_conversation",
	"match_location_preference",
	"get_commute_time",
	"check_calendar_slots",
	"shortlist",
	"contact_poster",
	"book_viewing",
	# Scenario 1: hidden-budget negotiation
	"propose_price_to_buyer",
	"propose_price_to_seller",
	"confirm_negotiated_deal",
	# Scenario 2: slot cancellation waitlist
	"add_to_waitlist",
	"notify_buyer_slot_freed",
	# Scenario 3: multi-visit preference evolution
	"debrief_visit",
	"filter_new_arrivals",
	]
	SELLER_TOOLS = [
	"store_seller_details",
	"match_location_preference",
	"check_table_slot_matches",
	"confirm_seller_match",
	"offer_matched_listing_to_buyer",
	"schedule_table_visit",
	]
	ALL_TOOLS = set(BUYER_TOOLS + SELLER_TOOLS)
	BUYER_FIELD_KEYWORDS = {
	"budget": ("budget", "rs.", "20,000"),
	"diet": ("diet", "non-veg", "vegetarian"),
	"areas": ("area", "andheri", "jogeshwari"),
	"occupation": ("work", "occupation", "engineer", "job"),
	"visit_availability": ("visit", "availability", "slot", "time"),
	}
	SELLER_FIELD_KEYWORDS = {
	"area": ("area", "jogeshwari", "andheri"),
	"rent": ("rent", "19,500", "19500"),
	"dietary": ("diet", "non-veg", "veg"),
	"listing_type": ("2bhk", "1bhk", "room", "share", "household"),
	"occupation_requirement": ("working professional", "professionals", "occupation", "fit", "flat is for", "who the flat is for"),
	"calendar_slots": ("slot", "saturday", "sunday", "time"),
	}
	FIELD_TO_PROFILE_KEY = {
	"budget": "budget_max",
	"diet": "dietary",
	"areas": "areas",
	"occupation": "occupation",
	"visit_availability": "visit_availability",
	}


	class FlatmateEpisode:
	"""Stateful deterministic simulator for broker-style visit scheduling."""

	def __init__(self, strict_eval_mode: bool \| None = None) -> None:
	if strict_eval_mode is None:
	strict_eval_mode = os.getenv("STRICT_EVAL_MODE", "").lower() in {"1", "true", "yes", "on"}
	self._strict_eval_mode = strict_eval_mode
	self._state = FlatmateRlState()
	self._scenario: dict[str, Any] = {}
	self._posts: dict[str, dict[str, Any]] = {}
	self._tool_results: list[dict[str, Any]] = []
	self._tool_trace: list[dict[str, Any]] = []
	self._history: list[dict[str, str]] = []
	self._buyer_history: list[dict[str, str]] = []
	self._seller_history: list[dict[str, str]] = []
	self._violations: list[str] = []
	self._matched_posts: dict[str, bool] = {}
	self._slots_checked: dict[str, list[str]] = {}
	self._commutes_checked: dict[str, int] = {}
	self._poster_confirmations: dict[str, str] = {}
	self._client_confirmations: dict[str, str] = {}
	self._seller_profile_fit_confirmations: dict[str, bool] = {}
	self._seller_confirmations: dict[str, str] = {}
	self._buyer_offer_confirmations: dict[str, str] = {}
	self._dynamic_post_id: str \| None = None
	self._searched = False
	self._done = False
	self._last_user_message = ""
	self._total_reward = 0.0
	self._last_action_signature = ""
	self._repeated_action_streak = 0
	self._last_observation: FlatmateRlObservation \| None = None
	# Scenario 1: hidden-budget negotiation state
	self._negotiation_rounds_buyer: int = 0
	self._negotiation_rounds_seller: int = 0
	self._buyer_price_accepted: int \| None = None
	self._seller_price_accepted: int \| None = None
	self._negotiated_deal_closed: bool = False
	# Scenario 2: slot cancellation waitlist state
	self._waitlist_active: bool = False
	self._waitlist_post_id: str = ""
	self._waitlist_slot: str = ""
	self._cancellation_fired: bool = False
	# Scenario 3: multi-visit preference evolution state
	self._post_arrivals_fired: set[int] = set()
	self._available_post_ids: list[str] = []

	def reset(self, scenario_id: str \| None = None, seed: int \| None = None) -> FlatmateRlObservation:
	selected = scenario_id or "task_visit_single"
	base_scenario = deepcopy(SCENARIOS[selected])
	base_posts = {post_id: deepcopy(POSTS[post_id]) for post_id in base_scenario["task_post_ids"]}
	self._scenario, self._posts = apply_seed_variant(base_scenario, base_posts, seed)
	self._tool_results = []
	self._tool_trace = []
	self._history = []
	self._buyer_history = []
	self._seller_history = []
	self._violations = []
	self._matched_posts = {}
	self._slots_checked = {}
	self._commutes_checked = {}
	self._poster_confirmations = {}
	self._client_confirmations = {}
	self._seller_profile_fit_confirmations = {}
	self._seller_confirmations = {}
	self._buyer_offer_confirmations = {}
	self._dynamic_post_id = None
	self._searched = False
	self._done = False
	self._total_reward = 0.0
	self._last_action_signature = ""
	self._repeated_action_streak = 0
	self._last_observation = None
	# Reset scenario-specific state
	self._negotiation_rounds_buyer = 0
	self._negotiation_rounds_seller = 0
	self._buyer_price_accepted = None
	self._seller_price_accepted = None
	self._negotiated_deal_closed = False
	self._waitlist_active = False
	self._waitlist_post_id = ""
	self._waitlist_slot = ""
	self._cancellation_fired = False
	self._post_arrivals_fired = set()
	# Set available post IDs (may be a subset for multi-visit scenario)
	initial_ids = self._scenario.get("scenario_creation_config", {}).get("initial_post_ids")
	if initial_ids is not None:
	self._available_post_ids = list(initial_ids)
	else:
	self._available_post_ids = list(self._scenario["task_post_ids"])

	gathered_fields = self._initial_buyer_fields()
	initial_message = self._scenario["initial_user_message"]
	self._last_user_message = initial_message
	self._history.append({"role": "user", "content": initial_message})
	self._buyer_history.append({"role": "user", "content": initial_message})
	self._state = FlatmateRlState(
	scenario_id=selected,
	phase="buyer",
	status="ready",
	gathered_fields=gathered_fields,
	selected_posts=[],
	booked_visits=[],
	buyer_profile_stored=False,
	seller_profile_stored=False,
	tool_trace=[],
	total_reward=0.0,
	done=False,
	)
	return self._observation(
	status="ready",
	message="Scenario ready.",
	current_user_request=initial_message,
	last_tool_result={},
	reward=0.0,
	done=False,
	)

	def step(self, action: FlatmateRlAction) -> FlatmateRlObservation:
	if self._done:
	return self._observation(
	status="completed",
	message="Episode is finished. Call reset() to start a new scenario.",
	current_user_request="",
	last_tool_result={},
	reward=0.0,
	done=True,
	)
	self._state.step_count += 1
	expected_action = self._expected_flow_action()
	if action.action_type == "assistant_message":
	observation = self._handle_assistant_message(action.assistant_message)
	else:
	observation = self._handle_tool_call(action.tool_name, action.tool_arguments)
	return self._apply_flow_adjustment(observation, action, expected_action)

	def state(self) -> FlatmateRlState:
	return self._state

	def _initial_buyer_fields(self) -> list[str]:
	return list(self._scenario["buyer_profile"]["initial_disclosure_fields"])

	def _phase_tools(self) -> list[str]:
	tools = SELLER_TOOLS if self._state.phase == "seller" else BUYER_TOOLS
	tools = list(tools)
	if self._state.phase == "seller" and self._state.seller_profile_stored:
	tools.remove("store_seller_details")
	if self._state.phase == "buyer" and self._state.buyer_profile_stored:
	tools.remove("store_user_details")
	return tools

	def _required_fields(self) -> list[str]:
	if self._state.phase == "seller":
	return ["area", "rent", "dietary", "listing_type", "occupation_requirement", "calendar_slots"]
	required = list(self._scenario["ground_truth"]["required_info"])
	if self._state.phase == "buyer":
	return [field for field in required if field != "listing_choices" or self._scenario["task_id"] == "task_visit_multi"]
	return required

	def _remaining_fields(self) -> list[str]:
	gathered = set(self._state.gathered_fields)
	remaining = []
	for field in self._required_fields():
	if field == "listing_choices" and not self._searched:
	continue
	if field not in gathered:
	remaining.append(field)
	return remaining

	def _matches_any_slot(self, candidate: str, slots: list[str]) -> bool:
	normalized = candidate.strip().lower()
	for slot in slots:
	slot_normalized = slot.strip().lower()
	if normalized == slot_normalized:
	return True
	if normalized.endswith("7pm") and slot_normalized in {"today 7pm", "tomorrow 7pm"}:
	return True
	return False

	def _all_buyer_slots(self) -> list[str]:
	profile = self._scenario["buyer_profile"]
	slots = list(profile["visit_availability"])
	if self._scenario["task_id"] == "task_visit_single_hidden_flex":
	if self._state.gathered_fields.count("hidden_flex_revealed"):
	slots.extend(profile["hidden_additional_availability"])
	return slots

	def _record_client_confirmation_for_slot(self, slot: str) -> None:
	for post_id, checked_slots in self._slots_checked.items():
	if slot in checked_slots:
	self._client_confirmations[post_id] = slot
	return

	def _record_violation(self, text: str) -> None:
	if text not in self._violations:
	self._violations.append(text)

	def _expected_flow_action(self) -> FlatmateRlAction \| None:
	if self._last_observation is None:
	return None
	payload = expected_policy_action(self._scenario["task_id"], self._last_observation.model_dump())
	if payload is None:
	return None
	return FlatmateRlAction.model_validate(payload)

	def _actions_match_expected_flow(self, actual: FlatmateRlAction, expected: FlatmateRlAction \| None) -> bool:
	if expected is None:
	return True
	if actual.action_type != expected.action_type:
	return False
	if actual.action_type == "assistant_message":
	return bool(actual.assistant_message.strip())
	return actual.tool_name == expected.tool_name

	def _describe_action(self, action: FlatmateRlAction \| None) -> str:
	if action is None:
	return "null"
	if action.action_type == "assistant_message":
	return "assistant_message"
	return action.tool_name

	def _missing_required_args(self, action: FlatmateRlAction) -> list[str]:
	if action.action_type != "tool_call":
	return []
	args = action.tool_arguments
	tool_name = action.tool_name
	if tool_name in {"contact_poster", "book_viewing"}:
	return [field for field in ["post_id", "time_text"] if not args.get(field)]
	if tool_name in {"match_location_preference", "get_commute_time", "check_calendar_slots"} and self._state.phase == "buyer":
	return ["post_ids"] if not args.get("post_ids") else []
	return []

	def _is_redundant_successful_tool_call(self, action: FlatmateRlAction) -> bool:
	if action.action_type != "tool_call":
	return False
	current_args = json.dumps(action.tool_arguments or {}, ensure_ascii=False, sort_keys=True)
	for trace in self._tool_trace[-6:-1]:
	if not trace.get("success"):
	continue
	previous_args = json.dumps(trace.get("args") or {}, ensure_ascii=False, sort_keys=True)
	if trace.get("tool") == action.tool_name and previous_args == current_args:
	return True
	return False

	def _book_viewing_violation_category(self, action: FlatmateRlAction) -> tuple[str, str] \| None:
	if action.action_type != "tool_call" or action.tool_name != "book_viewing":
	return None
	post_id = str(action.tool_arguments.get("post_id", ""))
	time_text = str(action.tool_arguments.get("time_text", ""))
	checked_slots = self._slots_checked.get(post_id, [])
	if not checked_slots:
	return "missing_prerequisite", "book_viewing requires a successful check_calendar_slots for that post first"
	if time_text not in checked_slots:
	return "calendar_mismatch", f"book_viewing slot {time_text or '<missing>'} was not returned by check_calendar_slots for {post_id or '<missing>'}"
	if self._poster_confirmations.get(post_id) != time_text or self._client_confirmations.get(post_id) != time_text:
	return "consent_violation", "book_viewing requires both buyer and poster confirmation for the same slot"
	return None

	def _classify_flow_adjustment(
	self,
	observation: FlatmateRlObservation,
	actual_action: FlatmateRlAction,
	expected_action: FlatmateRlAction \| None,
	) -> tuple[str, float \| None, bool, str] \| None:
	if actual_action.action_type == "tool_call":
	if actual_action.tool_name not in ALL_TOOLS:
	return "hallucination", -1.0, True, f"unknown tool {actual_action.tool_name}"
	missing_args = self._missing_required_args(actual_action)
	if missing_args:
	return "hallucination", -1.0, True, f"{actual_action.tool_name} missing required args: {', '.join(missing_args)}"

	if not (observation.done and "action_loop_detected" in self._violations):
	booking_violation = self._book_viewing_violation_category(actual_action)
	if booking_violation is not None:
	category, detail = booking_violation
	return category, -0.5, False, detail

	last_message = str(observation.last_tool_result.get("message", "")).lower()
	if "must be called before" in last_message or "before closing" in last_message:
	return "missing_prerequisite", -0.5, False, observation.last_tool_result.get("message", "")

	if self._is_redundant_successful_tool_call(actual_action):
	return "redundant_tool_call", -0.05, False, f"repeated successful {actual_action.tool_name} call within last 5 steps"

	if self._actions_match_expected_flow(actual_action, expected_action):
	if float(observation.step_reward) >= 0.0:
	return "on_canonical_path", 0.1, False, "matched expected action"
	return "on_canonical_path", None, False, "matched expected action"

	expected = self._describe_action(expected_action)
	got = self._describe_action(actual_action)
	return "non_canonical_order", -0.1, False, f"expected {expected}, got {got}"

	def _apply_flow_adjustment(
	self,
	observation: FlatmateRlObservation,
	actual_action: FlatmateRlAction,
	expected_action: FlatmateRlAction \| None,
	) -> FlatmateRlObservation:
	adjustment = self._classify_flow_adjustment(observation, actual_action, expected_action)
	if adjustment is None:
	return observation

	category, replacement_reward, terminate, detail = adjustment
	if category == "on_canonical_path" and replacement_reward is None:
	return observation

	if category != "on_canonical_path":
	self._record_violation(category)

	payload = observation.model_dump()
	previous_reward = float(payload.get("step_reward", 0.0))
	if replacement_reward is not None:
	reward_delta = replacement_reward - previous_reward
	self._total_reward += reward_delta
	payload["step_reward"] = replacement_reward
	payload["reward"] = replacement_reward
	else:
	reward_delta = 0.0

	if terminate:
	self._done = True
	self._state.done = True
	self._state.status = "failed"
	payload["status"] = "failed"
	payload["done"] = True

	self._state.total_reward = self._total_reward
	self._state.tool_trace = deepcopy(self._tool_trace)
	payload["total_reward"] = self._total_reward
	payload["violations"] = list(self._violations)
	if reward_delta:
	payload["message"] = f"{observation.message} {category}: {detail}.".strip()
	else:
	payload["message"] = observation.message
	adjusted = FlatmateRlObservation.model_validate(payload)
	self._last_observation = adjusted
	if self._strict_eval_mode:
	return self._strict_eval_observation(adjusted)
	return adjusted

	def _action_signature(self, action_type: str, content: str = "", tool_name: str = "", arguments: dict[str, Any] \| None = None) -> str:
	if action_type == "assistant_message":
	normalized_message = re.sub(r"\s+", " ", content.strip().lower())
	return f"assistant:{normalized_message}"
	normalized_args = json.dumps(arguments or {}, ensure_ascii=False, sort_keys=True)
	return f"tool:{tool_name}:{normalized_args}"

	def _apply_loop_penalty(self, signature: str, reward: float, message: str, status: str, done: bool) -> tuple[float, str, str, bool]:
	if signature == self._last_action_signature:
	self._repeated_action_streak += 1
	else:
	self._last_action_signature = signature
	self._repeated_action_streak = 1

	if self._repeated_action_streak < 3:
	return reward, message, status, done

	penalty = -0.5 * (self._repeated_action_streak - 2)
	self._record_violation("action_loop_detected")
	reward += penalty
	message = f"{message} Loop penalty applied for repeating the same action {self._repeated_action_streak} times."

	if self._repeated_action_streak >= 4:
	self._done = True
	self._state.done = True
	self._state.status = "failed"
	return reward, "Episode terminated due to repeated identical actions.", "failed", True

	return reward, message, status, done

	def _handle_assistant_message(self, message: str) -> FlatmateRlObservation:
	phase_before_message = self._state.phase
	self._history.append({"role": "assistant", "content": message})
	if phase_before_message == "seller":
	self._seller_history.append({"role": "assistant", "content": message})
	else:
	self._buyer_history.append({"role": "assistant", "content": message})
	lowered = message.lower()
	response = ""
	reward = 0.0

	if self._state.phase == "buyer":
	if self._scenario["task_id"] == "task_visit_multi" and "post_" in lowered and ("which" in lowered or "choose" in lowered):
	response = "Let’s pursue post_031 and post_052 first."
	if "listing_choices" not in self._state.gathered_fields:
	self._state.gathered_fields.append("listing_choices")
	self._state.selected_posts = ["post_031", "post_052"]
	reward = 0.2
	else:
	response = self._buyer_response(message)
	else:
	response = self._seller_response(message)

	self._last_user_message = response
	self._history.append({"role": "user", "content": response})
	if self._state.phase == "seller":
	self._seller_history.append({"role": "user", "content": response})
	else:
	self._buyer_history.append({"role": "user", "content": response})
	done = self._maybe_finish_from_message()
	status = "completed" if done else "user_response"
	reward, response_message, status, done = self._apply_loop_penalty(
	signature=self._action_signature("assistant_message", content=message),
	reward=reward,
	message="User responded.",
	status=status,
	done=done,
	)
	self._total_reward += reward
	return self._observation(
	status=status,
	message=response_message,
	current_user_request=response,
	last_tool_result={},
	reward=reward,
	done=done,
	)

	def _buyer_response(self, message: str) -> str:
	lowered = message.lower()
	profile = self._scenario["buyer_profile"]
	task_id = self._scenario["task_id"]

	if task_id == "task_visit_single_hidden_flex":
	alternatives_offered = any(slot.lower() in lowered for slot in ["saturday", "sunday"])
	if alternatives_offered and "hidden_flex_revealed" not in self._state.gathered_fields:
	self._state.gathered_fields.append("hidden_flex_revealed")
	if alternatives_offered:
	if "sunday 5pm" in lowered:
	self._record_client_confirmation_for_slot("Sunday 5pm")
	return "I can make Sunday 5pm work, so I confirm Sunday 5pm."
	if "saturday 1pm" in lowered:
	self._record_client_confirmation_for_slot("Saturday 1pm")
	return "Saturday 1pm works for me too, so I confirm Saturday 1pm."

	# Scenario 2: waitlist — fire cancellation notification on first message after add_to_waitlist
	if task_id == "task_slot_cancellation_waitlist":
	if self._waitlist_active and not self._cancellation_fired:
	self._cancellation_fired = True
	freed_slot = self._waitlist_slot
	wl_post = self._waitlist_post_id
	# Make freed slot bookable in subsequent calls
	self._slots_checked[wl_post] = [freed_slot]
	post = self._posts.get(wl_post)
	if post and freed_slot in post.get("pre_booked_slots", []):
	post["pre_booked_slots"].remove(freed_slot)
	return (
	f"Thanks for adding me to the waitlist! "
	f"Oh — I just got a notification that {freed_slot} for {wl_post} has opened up due to a cancellation. "
	f"Can you please book that slot for me?"
	)

	# Scenario 3: multi-visit — return scripted post-visit feedback when agent asks
	if task_id == "task_multi_visit_preference_evolution":
	booked_ids = [v["post_id"] for v in self._state.booked_visits]
	if any(kw in lowered for kw in ["how was", "what did you think", "how did", "liked the flat", "after visiting"]):
	if len(booked_ids) == 1 and booked_ids[0] == "post_023":
	return "The area was really noisy — definitely not what I'm looking for. I need somewhere quieter."
	if len(booked_ids) == 2 and booked_ids[1] == "post_052":
	return "post_052 was nice and quiet, but there is no gym nearby, which is important to me."

	if "confirm" in lowered:
	for post_id, slots in self._slots_checked.items():
	for slot in slots:
	if slot.lower() in lowered and self._slot_fits_buyer(slot):
	self._client_confirmations[post_id] = slot
	return f"I confirm {slot}."

	requested_fields = []
	for field in ["diet", "visit_availability", "occupation", "budget", "areas"]:
	if any(keyword in lowered for keyword in BUYER_FIELD_KEYWORDS[field]):
	requested_fields.append(field)
	if requested_fields:
	response_parts = []
	for field in requested_fields:
	if field == "diet":
	if "diet" not in self._state.gathered_fields:
	self._state.gathered_fields.append("diet")
	response_parts.append("I’m non-vegetarian")
	elif field == "visit_availability":
	if "visit_availability" not in self._state.gathered_fields:
	self._state.gathered_fields.append("visit_availability")
	if self._scenario["task_id"] == "task_visit_single_hidden_flex" and "hidden_flex_revealed" not in self._state.gathered_fields:
	response_parts.append("right now, Tuesday after 6pm is the slot I had in mind")
	else:
	response_parts.append("my visit availability is " + " or ".join(profile["visit_availability"]))
	elif field == "occupation":
	if "occupation" not in self._state.gathered_fields:
	self._state.gathered_fields.append("occupation")
	response_parts.append(f"I work as a {profile['occupation']}")
	elif field == "budget":
	if "budget" not in self._state.gathered_fields:
	self._state.gathered_fields.append("budget")
	response_parts.append(f"my max budget is Rs. {profile['budget_max']}")
	elif field == "areas":
	if "areas" not in self._state.gathered_fields:
	self._state.gathered_fields.append("areas")
	response_parts.append("I prefer " + " or ".join(profile["areas"]))
	if response_parts:
	return ". ".join(response_parts) + "."

	missing = self._remaining_fields()
	if missing:
	next_field = missing[0]
	if next_field == "diet":
	self._state.gathered_fields.append("diet")
	return "I’m non-vegetarian."
	if next_field == "visit_availability":
	self._state.gathered_fields.append("visit_availability")
	return "My visit availability is " + " or ".join(profile["visit_availability"]) + "."
	return "Please continue with suitable options."

	def _seller_response(self, message: str) -> str:
	profile = self._scenario["seller_profile"]
	if not profile:
	return "No seller profile is defined."
	lowered = message.lower()
	if "confirm" in lowered:
	for slot in profile["calendar_slots"]:
	if slot.lower() in lowered:
	self._seller_confirmations[self._dynamic_post_id or "post_dynamic_followup_1"] = slot
	return f"Confirmed, {slot} works from the seller side."
	requested_fields = []
	for field in ["dietary", "occupation_requirement", "area", "rent", "listing_type", "calendar_slots"]:
	if any(keyword in lowered for keyword in SELLER_FIELD_KEYWORDS[field]):
	requested_fields.append(field)
	if requested_fields:
	response_parts = []
	for field in requested_fields:
	if field == "dietary":
	if "dietary" not in self._state.gathered_fields:
	self._state.gathered_fields.append("dietary")
	response_parts.append(f"the household is {profile['dietary']}")
	elif field == "occupation_requirement":
	if "occupation_requirement" not in self._state.gathered_fields:
	self._state.gathered_fields.append("occupation_requirement")
	response_parts.append(f"it’s for {profile['occupation_requirement']}")
	elif field == "area":
	if "area" not in self._state.gathered_fields:
	self._state.gathered_fields.append("area")
	response_parts.append(f"the area is {profile['area']}")
	elif field == "rent":
	if "rent" not in self._state.gathered_fields:
	self._state.gathered_fields.append("rent")
	response_parts.append(f"the rent is Rs. {profile['rent']}")
	elif field == "listing_type":
	if "listing_type" not in self._state.gathered_fields:
	self._state.gathered_fields.append("listing_type")
	response_parts.append(f"it is a {profile['listing_type']}")
	elif field == "calendar_slots":
	if "calendar_slots" not in self._state.gathered_fields:
	self._state.gathered_fields.append("calendar_slots")
	response_parts.append("available slots are " + " or ".join(profile["calendar_slots"]))
	if response_parts:
	return ". ".join(response_parts) + "."
	if "description" in lowered or "about" in lowered:
	return profile["description"] + "."
	return "Yes, those listing details are correct."

	def _slot_fits_buyer(self, slot: str) -> bool:
	visible_slots = list(self._scenario["buyer_profile"]["visit_availability"])
	task_id = self._scenario["task_id"]
	if task_id == "task_visit_single_hidden_flex" and "hidden_flex_revealed" in self._state.gathered_fields:
	visible_slots.extend(self._scenario["buyer_profile"]["hidden_additional_availability"])
	if task_id == "task_visit_single":
	if slot in {"today 7pm", "tomorrow 7pm", "Saturday 11am", "Saturday 4pm"}:
	return True
	if task_id == "task_visit_multi":
	if slot in {"tomorrow 7pm", "Saturday 4pm", "Saturday 11am", "Sunday 2pm", "Sunday 4pm", "Sunday 5pm"}:
	return True
	if task_id == "task_visit_single_seller_followup":
	return slot in {"Saturday 4pm", "Sunday 5pm"}
	if task_id == "task_multi_visit_preference_evolution":
	# Buyer is flexible — accepts any slot from the slots we've checked
	return True
	return self._matches_any_slot(slot, visible_slots)

	def _handle_tool_call(self, tool_name: str, arguments: dict[str, Any]) -> FlatmateRlObservation:
	result = self._execute_tool(tool_name, arguments)
	self._tool_results.append(result)
	reward = 0.1 if result.get("success") else -0.2
	self._tool_trace.append(
	{
	"step": self._state.step_count,
	"phase": self._state.phase,
	"tool": tool_name,
	"args": deepcopy(arguments),
	"success": bool(result.get("success")),
	"message": result.get("message", ""),
	}
	)
	done = self._done
	status = "completed" if done else "tool_result"
	reward, step_message, status, done = self._apply_loop_penalty(
	signature=self._action_signature("tool_call", tool_name=tool_name, arguments=arguments),
	reward=reward,
	message=result.get("message", ""),
	status=status,
	done=done,
	)
	self._total_reward += reward
	return self._observation(
	status=status,
	message=step_message,
	current_user_request=self._last_user_message,
	last_tool_result=result,
	reward=reward,
	done=done,
	)

	def _execute_tool(self, tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:
	phase_tools = self._phase_tools()
	if tool_name not in phase_tools:
	if self._state.phase == "buyer" and tool_name == "store_user_details" and self._state.buyer_profile_stored:
	return {"tool": tool_name, "success": True, "message": "Buyer profile already stored."}
	if self._state.phase == "seller" and tool_name == "store_seller_details" and self._state.seller_profile_stored:
	return {
	"tool": tool_name,
	"success": True,
	"message": "Seller profile already stored.",
	"post_id": self._dynamic_post_id,
	}
	self._record_violation(f"tool_not_available:{tool_name}")
	return {"tool": tool_name, "success": False, "message": f"Tool {tool_name} is not available in phase {self._state.phase}."}

	if self._state.phase == "buyer" and tool_name != "store_user_details" and not self._state.buyer_profile_stored:
	self._record_violation(f"store_user_details_required_before:{tool_name}")
	return {"tool": tool_name, "success": False, "message": f"store_user_details must be called before {tool_name}."}

	if self._state.phase == "seller" and tool_name != "store_seller_details" and not self._state.seller_profile_stored:
	self._record_violation(f"store_seller_details_required_before:{tool_name}")
	return {"tool": tool_name, "success": False, "message": f"store_seller_details must be called before {tool_name}."}

	handler = getattr(self, f"_tool_{tool_name}")
	return handler(arguments)

	def _tool_store_user_details(self, arguments: dict[str, Any]) -> dict[str, Any]:
	del arguments
	missing = [field for field in ["budget", "diet", "areas", "occupation", "visit_availability"] if field not in self._state.gathered_fields]
	if missing:
	return {"tool": "store_user_details", "success": False, "message": f"Missing buyer fields: {', '.join(missing)}."}
	self._state.buyer_profile_stored = True
	return {"tool": "store_user_details", "success": True, "message": "Buyer profile stored."}

	def _tool_search_posts(self, arguments: dict[str, Any]) -> dict[str, Any]:
	del arguments
	self._searched = True
	results = []
	negotiable_results = []
	rejected_for_slots = []
	buyer = self._scenario["buyer_profile"]
	gathered = set(self._state.gathered_fields)
	task_id = self._scenario["task_id"]
	is_negotiation = bool(self._scenario.get("scenario_creation_config", {}).get("negotiation_config"))

	for post_id in self._available_post_ids:
	post = self._posts.get(post_id)
	if post is None:
	continue
	if post["rent"] > buyer["budget_max"]:
	if is_negotiation and post.get("negotiable"):
	negotiable_results.append(post_id)
	continue
	if post["area"] not in buyer["areas"]:
	continue
	if buyer["dietary"] == "non-veg" and post["diet"] == "veg only":
	continue
	# Multi-visit scenario: filter by discovered amenity preferences
	if task_id == "task_multi_visit_preference_evolution":
	amenities = post.get("amenities", {})
	if "quiet_area" in gathered and not amenities.get("quiet"):
	continue
	if "gym_nearby" in gathered and not amenities.get("gym_nearby"):
	continue
	if task_id == "task_visit_single_seller_followup":
	buyer_slots = set(buyer["visit_availability"])
	if not any(slot in buyer_slots for slot in post["calendar_slots"]):
	rejected_for_slots.append(post_id)
	continue
	results.append(post_id)

	if task_id == "task_visit_single_seller_followup" and not results:
	return {
	"tool": "search_posts",
	"success": True,
	"message": "Found 0 current posts compatible with the buyer's visit availability.",
	"post_ids": [],
	"rejected_for_slot_mismatch": rejected_for_slots,
	}
	if negotiable_results:
	return {
	"tool": "search_posts",
	"success": True,
	"message": (
	f"Found {len(results)} posts within budget and "
	f"{len(negotiable_results)} above budget but open to negotiation."
	),
	"post_ids": results,
	"negotiable_post_ids": negotiable_results,
	}
	return {"tool": "search_posts", "success": True, "message": f"Found {len(results)} matching posts.", "post_ids": results}

	def _tool_close_buyer_conversation(self, arguments: dict[str, Any]) -> dict[str, Any]:
	del arguments
	if self._scenario["task_id"] != "task_visit_single_seller_followup":
	return {
	"tool": "close_buyer_conversation",
	"success": False,
	"message": "Buyer conversation can only be closed this way in seller follow-up scenarios.",
	}
	if not self._searched:
	return {
	"tool": "close_buyer_conversation",
	"success": False,
	"message": "Search existing posts before closing the buyer conversation.",
	}

	buyer_closure = (
	"None of the current listings fit your weekend availability. "
	"I will follow up if a suitable listing comes in."
	)
	seller_message = self._scenario["seller_initial_message"]
	self._history.append({"role": "assistant", "content": buyer_closure})
	self._buyer_history.append({"role": "assistant", "content": buyer_closure})
	self._history.append({"role": "user", "content": seller_message})
	self._seller_history.append({"role": "user", "content": seller_message})
	self._last_user_message = seller_message
	self._state.phase = "seller"
	self._state.gathered_fields = ["area", "rent", "listing_type"]
	return {
	"tool": "close_buyer_conversation",
	"success": True,
	"message": "Buyer conversation closed; seller follow-up started.",
	}

	def _tool_match_location_preference(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_ids = list(arguments.get("post_ids", []))
	if not post_ids and self._state.phase == "seller" and self._dynamic_post_id:
	post_ids = [self._dynamic_post_id]
	buyer_areas = set(self._scenario["buyer_profile"]["areas"])
	matches = {}
	for post_id in post_ids:
	post = self._resolve_post(post_id)
	if not post:
	matches[post_id] = {"match": False, "reason": "unknown post"}
	continue
	matches[post_id] = {"match": post["area"] in buyer_areas}
	self._matched_posts[post_id] = matches[post_id]["match"]
	return {"tool": "match_location_preference", "success": True, "message": "Location matches evaluated.", "matches": matches}

	def _tool_get_commute_time(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_ids = list(arguments.get("post_ids", []))
	results = {}
	for post_id in post_ids:
	post = self._resolve_post(post_id)
	if not post:
	results[post_id] = None
	continue
	commute = post["commute_to_goregaon_mins"]
	self._commutes_checked[post_id] = commute
	results[post_id] = commute
	return {"tool": "get_commute_time", "success": True, "message": "Commute times fetched.", "commute_minutes": results}

	def _tool_check_calendar_slots(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_ids = list(arguments.get("post_ids", []))
	available_by_post: dict[str, list[str]] = {}
	pre_booked_by_post: dict[str, list[str]] = {}
	any_conflicts = False
	for post_id in post_ids:
	post = self._resolve_post(post_id)
	if not post:
	available_by_post[post_id] = []
	continue
	all_slots = list(post["calendar_slots"])
	pre_booked = list(post.get("pre_booked_slots", []))
	available = [s for s in all_slots if s not in pre_booked]
	self._slots_checked[post_id] = available
	available_by_post[post_id] = available
	if pre_booked:
	pre_booked_by_post[post_id] = pre_booked
	any_conflicts = True
	result: dict[str, Any] = {
	"tool": "check_calendar_slots",
	"success": True,
	"message": "Calendar slots fetched. Some slots are already booked by other buyers." if any_conflicts else "Calendar slots fetched.",
	"calendar_slots": available_by_post,
	}
	if any_conflicts:
	result["pre_booked_slots"] = pre_booked_by_post
	return result

	def _tool_shortlist(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_ids = list(arguments.get("post_ids", []))
	self._state.selected_posts = post_ids
	return {"tool": "shortlist", "success": True, "message": "Posts shortlisted.", "selected_posts": post_ids}

	def _buyer_profile_summary_for_seller(self) -> str:
	profile = self._scenario["buyer_profile"]
	return (
	f"buyer profile: budget up to Rs. {profile['budget_max']}; "
	f"dietary preference {profile['dietary']}; "
	f"preferred areas {', '.join(profile['areas'])}; "
	f"occupation {profile['occupation']}; "
	f"visit availability {', '.join(profile['visit_availability'])}"
	)

	def _tool_contact_poster(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_id = arguments.get("post_id", "")
	time_text = arguments.get("time_text", "")
	post = self._resolve_post(post_id)
	if not post:
	return {"tool": "contact_poster", "success": False, "message": f"Unknown post {post_id}."}
	slots = self._slots_checked.get(post_id, [])
	if not time_text or time_text not in slots:
	return {"tool": "contact_poster", "success": False, "message": "Time must come from check_calendar_slots."}
	self._seller_history.append(
	{
	"role": "assistant",
	"content": (
	f"Client selected {post_id}. Please review this {self._buyer_profile_summary_for_seller()}. "
	f"Can you confirm the buyer profile is acceptable and that we can visit at {time_text}?"
	),
	}
	)
	self._poster_confirmations[post_id] = time_text
	self._seller_profile_fit_confirmations[post_id] = True
	poster_message = f"Yes, confirmed. The buyer profile is acceptable and {time_text} works for the visit."
	self._seller_history.append({"role": "user", "content": poster_message})
	return {
	"tool": "contact_poster",
	"success": True,
	"message": f"Poster confirmed buyer profile fit and {time_text}.",
	"post_id": post_id,
	"time_text": time_text,
	"buyer_profile_shared": True,
	"seller_profile_fit_confirmed": True,
	}

	def _tool_book_viewing(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_id = arguments.get("post_id", "")
	time_text = arguments.get("time_text", "")
	if post_id not in self._poster_confirmations or self._poster_confirmations[post_id] != time_text:
	return {"tool": "book_viewing", "success": False, "message": "Poster has not explicitly confirmed this time."}
	if not self._seller_profile_fit_confirmations.get(post_id):
	return {"tool": "book_viewing", "success": False, "message": "Poster has not confirmed the buyer profile fit."}
	if post_id not in self._client_confirmations or self._client_confirmations[post_id] != time_text:
	return {"tool": "book_viewing", "success": False, "message": "Client has not explicitly confirmed this time."}
	if self._scenario["task_id"] == "task_visit_multi" and post_id not in self._state.selected_posts:
	return {"tool": "book_viewing", "success": False, "message": "Client has not chosen this listing."}
	if any(entry["time"] == time_text for entry in self._state.booked_visits):
	return {"tool": "book_viewing", "success": False, "message": "Visit time overlaps an existing booking."}
	self._state.booked_visits.append({"post_id": post_id, "time": time_text})
	# Fire post-arrival events for multi-visit scenario
	if self._scenario["task_id"] == "task_multi_visit_preference_evolution":
	self._apply_post_arrival_event(len(self._state.booked_visits))
	if len(self._state.booked_visits) >= self._scenario["ground_truth"]["required_bookings"]:
	self._done = True
	self._state.done = True
	self._state.status = "completed"
	return {"tool": "book_viewing", "success": True, "message": f"Viewing booked for {post_id} at {time_text}.", "booked_visits": deepcopy(self._state.booked_visits)}

	# ------------------------------------------------------------------ #
	# Scenario 1: Hidden-budget negotiation tools #
	# ------------------------------------------------------------------ #

	def _tool_propose_price_to_buyer(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_negotiation_hidden_budget":
	return {"tool": "propose_price_to_buyer", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	proposed_rent = int(arguments.get("proposed_rent", 0))
	config = self._scenario["scenario_creation_config"].get("negotiation_config", {})
	buyer_ceiling = config.get("buyer_ceiling", 0)
	self._negotiation_rounds_buyer += 1
	if proposed_rent <= buyer_ceiling:
	self._buyer_price_accepted = proposed_rent
	return {
	"tool": "propose_price_to_buyer",
	"success": True,
	"message": f"Buyer accepted Rs. {proposed_rent} for {post_id}.",
	"accepted": True,
	"proposed_rent": proposed_rent,
	}
	hint = " I could stretch a little, but not by much." if self._negotiation_rounds_buyer >= 2 else ""
	return {
	"tool": "propose_price_to_buyer",
	"success": True,
	"message": f"Buyer rejected Rs. {proposed_rent} — still too high.{hint}",
	"accepted": False,
	"proposed_rent": proposed_rent,
	}

	def _tool_propose_price_to_seller(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_negotiation_hidden_budget":
	return {"tool": "propose_price_to_seller", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	proposed_rent = int(arguments.get("proposed_rent", 0))
	config = self._scenario["scenario_creation_config"].get("negotiation_config", {})
	seller_floor = config.get("seller_floor", 0)
	self._negotiation_rounds_seller += 1
	self._seller_history.append(
	{
	"role": "assistant",
	"content": f"The buyer is interested in {post_id}. Would you accept Rs. {proposed_rent}?",
	}
	)
	if proposed_rent >= seller_floor:
	self._seller_price_accepted = proposed_rent
	self._seller_history.append({"role": "user", "content": f"Yes, I can accept Rs. {proposed_rent}."})
	return {
	"tool": "propose_price_to_seller",
	"success": True,
	"message": f"Seller accepted Rs. {proposed_rent} for {post_id}.",
	"accepted": True,
	"proposed_rent": proposed_rent,
	}
	hint = " Maybe a small discount is possible." if self._negotiation_rounds_seller >= 2 else ""
	self._seller_history.append({"role": "user", "content": f"I can't go as low as Rs. {proposed_rent}.{hint}"})
	return {
	"tool": "propose_price_to_seller",
	"success": True,
	"message": f"Seller rejected Rs. {proposed_rent} — can't go that low.{hint}",
	"accepted": False,
	"proposed_rent": proposed_rent,
	}

	def _tool_confirm_negotiated_deal(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_negotiation_hidden_budget":
	return {"tool": "confirm_negotiated_deal", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	agreed_rent = int(arguments.get("agreed_rent", 0))
	if self._buyer_price_accepted != agreed_rent:
	return {
	"tool": "confirm_negotiated_deal",
	"success": False,
	"message": f"Buyer has not yet accepted Rs. {agreed_rent}. Check buyer acceptance first.",
	}
	if self._seller_price_accepted != agreed_rent:
	return {
	"tool": "confirm_negotiated_deal",
	"success": False,
	"message": f"Seller has not yet accepted Rs. {agreed_rent}. Check seller acceptance first.",
	}
	self._negotiated_deal_closed = True
	self._state.booked_visits.append({"post_id": post_id, "time": "negotiated_deal", "agreed_rent": agreed_rent})
	self._done = True
	self._state.done = True
	self._state.status = "completed"
	return {
	"tool": "confirm_negotiated_deal",
	"success": True,
	"message": f"Deal confirmed for {post_id} at Rs. {agreed_rent}. Both buyer and seller have agreed.",
	"agreed_rent": agreed_rent,
	}

	# ------------------------------------------------------------------ #
	# Scenario 2: Slot cancellation waitlist tools #
	# ------------------------------------------------------------------ #

	def _tool_add_to_waitlist(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_slot_cancellation_waitlist":
	return {"tool": "add_to_waitlist", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	post = self._resolve_post(post_id)
	if not post:
	return {"tool": "add_to_waitlist", "success": False, "message": f"Unknown post {post_id}."}
	config = self._scenario["scenario_creation_config"].get("cancellation_event", {})
	self._waitlist_active = True
	self._waitlist_post_id = post_id
	self._waitlist_slot = config.get("freed_slot", "")
	return {
	"tool": "add_to_waitlist",
	"success": True,
	"message": f"Buyer added to waitlist for {post_id}. Will notify if a slot opens up.",
	"post_id": post_id,
	}

	def _tool_notify_buyer_slot_freed(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_slot_cancellation_waitlist":
	return {"tool": "notify_buyer_slot_freed", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	slot = str(arguments.get("slot", self._waitlist_slot))
	if not self._cancellation_fired:
	return {"tool": "notify_buyer_slot_freed", "success": False, "message": "No cancellation event has occurred yet for this post."}
	if post_id != self._waitlist_post_id or slot != self._waitlist_slot:
	return {"tool": "notify_buyer_slot_freed", "success": False, "message": f"Freed slot is {self._waitlist_slot} for {self._waitlist_post_id}, not {slot} for {post_id}."}
	# Buyer is considered to have confirmed this slot
	self._client_confirmations[post_id] = slot
	self._slots_checked[post_id] = [slot]
	return {
	"tool": "notify_buyer_slot_freed",
	"success": True,
	"message": f"Buyer notified and confirmed {slot} for {post_id}. Ready to book.",
	"post_id": post_id,
	"slot": slot,
	}

	# ------------------------------------------------------------------ #
	# Scenario 3: Multi-visit preference evolution tools #
	# ------------------------------------------------------------------ #

	def _tool_debrief_visit(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_multi_visit_preference_evolution":
	return {"tool": "debrief_visit", "success": False, "message": "Not applicable in this scenario."}
	post_id = str(arguments.get("post_id", ""))
	user_feedback = str(arguments.get("user_feedback", "")).lower()
	new_prefs: list[str] = []
	if any(kw in user_feedback for kw in ["noisy", "noise", "loud"]):
	if "quiet_area" not in self._state.gathered_fields:
	self._state.gathered_fields.append("quiet_area")
	new_prefs.append("quiet_area")
	if any(kw in user_feedback for kw in ["gym", "fitness", "workout"]):
	if "gym_nearby" not in self._state.gathered_fields:
	self._state.gathered_fields.append("gym_nearby")
	new_prefs.append("gym_nearby")
	pref_str = ", ".join(new_prefs) if new_prefs else "none new"
	return {
	"tool": "debrief_visit",
	"success": True,
	"message": f"Visit to {post_id} debriefed. Discovered preferences: {pref_str}.",
	"post_id": post_id,
	"discovered_preferences": new_prefs,
	}

	def _tool_filter_new_arrivals(self, arguments: dict[str, Any]) -> dict[str, Any]:
	if self._scenario["task_id"] != "task_multi_visit_preference_evolution":
	return {"tool": "filter_new_arrivals", "success": False, "message": "Not applicable in this scenario."}
	post_ids = list(arguments.get("post_ids", []))
	gathered = set(self._state.gathered_fields)
	buyer = self._scenario["buyer_profile"]
	buyer_areas = set(buyer["areas"])
	budget = buyer["budget_max"]
	relevant: list[str] = []
	irrelevant: list[str] = []
	for post_id in post_ids:
	post = self._posts.get(post_id)
	if not post:
	irrelevant.append(post_id)
	continue
	amenities = post.get("amenities", {})
	if post["area"] not in buyer_areas or post["rent"] > budget:
	irrelevant.append(post_id)
	continue
	if "quiet_area" in gathered and not amenities.get("quiet"):
	irrelevant.append(post_id)
	continue
	if "gym_nearby" in gathered and not amenities.get("gym_nearby"):
	irrelevant.append(post_id)
	continue
	relevant.append(post_id)
	return {
	"tool": "filter_new_arrivals",
	"success": True,
	"message": (
	f"Filtered {len(post_ids)} new listings: "
	f"{len(relevant)} relevant, {len(irrelevant)} irrelevant given current preferences."
	),
	"relevant_post_ids": relevant,
	"irrelevant_post_ids": irrelevant,
	}

	def _apply_post_arrival_event(self, visit_number: int) -> None:
	"""Inject new posts into the available pool after a visit milestone (Scenario 3)."""
	config = self._scenario.get("scenario_creation_config", {})
	for event in config.get("post_arrival_events", []):
	if event["after_visit"] == visit_number and visit_number not in self._post_arrivals_fired:
	self._post_arrivals_fired.add(visit_number)
	for new_post_id in event["new_post_ids"]:
	if new_post_id in POSTS and new_post_id not in self._posts:
	self._posts[new_post_id] = deepcopy(POSTS[new_post_id])
	if new_post_id not in self._available_post_ids:
	self._available_post_ids.append(new_post_id)

	def _tool_store_seller_details(self, arguments: dict[str, Any]) -> dict[str, Any]:
	del arguments
	missing = [field for field in ["area", "rent", "dietary", "listing_type", "occupation_requirement", "calendar_slots"] if field not in self._state.gathered_fields]
	if missing:
	return {"tool": "store_seller_details", "success": False, "message": f"Missing seller fields: {', '.join(missing)}."}
	self._state.seller_profile_stored = True
	self._dynamic_post_id = "post_dynamic_followup_1"
	seller = self._scenario["seller_profile"]
	self._posts[self._dynamic_post_id] = {
	"id": self._dynamic_post_id,
	"area": seller["area"],
	"rent": seller["rent"],
	"diet": seller["dietary"],
	"type": seller["listing_type"],
	"commute_to_goregaon_mins": seller["commute_to_goregaon_mins"],
	"constraints": list(seller["constraints"]),
	"calendar_slots": list(seller["calendar_slots"]),
	"description": seller["description"],
	}
	return {"tool": "store_seller_details", "success": True, "message": "Seller profile stored.", "post_id": self._dynamic_post_id}

	def _tool_check_table_slot_matches(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_ids = list(arguments.get("post_ids", []))
	if not post_ids and self._state.phase == "seller" and self._dynamic_post_id:
	post_ids = [self._dynamic_post_id]
	buyer_slots = set(self._scenario["buyer_profile"]["visit_availability"])
	matches = {}
	for post_id in post_ids:
	post = self._resolve_post(post_id)
	if not post:
	matches[post_id] = []
	continue
	overlap = [slot for slot in post["calendar_slots"] if slot in buyer_slots]
	matches[post_id] = overlap
	self._slots_checked[post_id] = list(post["calendar_slots"])
	return {"tool": "check_table_slot_matches", "success": True, "message": "Buyer-seller slot overlap checked.", "slot_matches": matches}

	def _infer_followup_post_and_time(self, arguments: dict[str, Any]) -> tuple[str, str]:
	post_id = str(arguments.get("post_id") or self._dynamic_post_id or "post_dynamic_followup_1")
	time_text = str(arguments.get("time_text") or "")

	if not time_text:
	slot_matches = arguments.get("slot_matches")
	if isinstance(slot_matches, dict):
	for key, value in slot_matches.items():
	if not arguments.get("post_id"):
	post_id = str(key)
	if isinstance(value, list) and value:
	time_text = str(value[0])
	break
	if not time_text:
	calendar_slots = arguments.get("calendar_slots")
	if isinstance(calendar_slots, list) and calendar_slots:
	time_text = str(calendar_slots[0])

	return post_id, time_text

	def _tool_confirm_seller_match(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_id, time_text = self._infer_followup_post_and_time(arguments)
	post = self._resolve_post(post_id)
	if not post or time_text not in post["calendar_slots"]:
	return {"tool": "confirm_seller_match", "success": False, "message": "Selected seller slot is invalid."}
	self._seller_history.append({"role": "assistant", "content": f"Can we confirm {time_text} for {post_id}?"})
	self._seller_confirmations[post_id] = time_text
	self._seller_history.append({"role": "user", "content": f"Confirmed, {time_text} works from the seller side."})
	return {"tool": "confirm_seller_match", "success": True, "message": f"Seller confirmed {time_text}.", "post_id": post_id, "time_text": time_text}

	def _tool_offer_matched_listing_to_buyer(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_id, time_text = self._infer_followup_post_and_time(arguments)
	if self._seller_confirmations.get(post_id) != time_text:
	return {"tool": "offer_matched_listing_to_buyer", "success": False, "message": "Seller has not confirmed this slot yet."}
	self._buyer_offer_confirmations[post_id] = time_text
	return {"tool": "offer_matched_listing_to_buyer", "success": True, "message": f"Buyer confirmed {time_text} for {post_id}.", "post_id": post_id, "time_text": time_text}

	def _tool_schedule_table_visit(self, arguments: dict[str, Any]) -> dict[str, Any]:
	post_id, time_text = self._infer_followup_post_and_time(arguments)
	if self._seller_confirmations.get(post_id) != time_text:
	return {"tool": "schedule_table_visit", "success": False, "message": "Seller confirmation missing for this slot."}
	if self._buyer_offer_confirmations.get(post_id) != time_text:
	return {"tool": "schedule_table_visit", "success": False, "message": "Buyer confirmation missing for this slot."}
	self._state.booked_visits.append({"post_id": post_id, "time": time_text})
	self._done = True
	self._state.done = True
	self._state.status = "completed"
	return {"tool": "schedule_table_visit", "success": True, "message": f"Viewing booked for {post_id} at {time_text}.", "booked_visits": deepcopy(self._state.booked_visits)}

	def _resolve_post(self, post_id: str) -> dict[str, Any] \| None:
	return self._posts.get(post_id)

	def _maybe_finish_from_message(self) -> bool:
	if len(self._state.booked_visits) >= self._scenario["ground_truth"]["required_bookings"]:
	self._done = True
	self._state.done = True
	self._state.status = "completed"
	return True
	return False

	def _profile_stored(self) -> bool:
	return self._state.seller_profile_stored if self._state.phase == "seller" else self._state.buyer_profile_stored

	def _prerequisites_satisfied(self) -> dict[str, bool]:
	return {
	"details_stored": self._profile_stored(),
	"posts_searched": self._searched,
	"location_matched": any(self._matched_posts.values()),
	"slots_checked": bool(self._slots_checked),
	"buyer_confirmed": bool(self._client_confirmations or self._buyer_offer_confirmations),
	"poster_confirmed": bool(self._poster_confirmations or self._seller_confirmations),
	}

	def _tool_arguments_summary(self, arguments: dict[str, Any]) -> dict[str, Any]:
	summary: dict[str, Any] = {}
	for key, value in arguments.items():
	if isinstance(value, list):
	summary[key] = value if len(value) <= 3 else [*value[:3], f"... {len(value) - 3} more"]
	elif isinstance(value, dict):
	summary[key] = f"{len(value)} keys"
	else:
	summary[key] = value
	return summary

	def _recent_tool_calls(self) -> list[dict[str, Any]]:
	return [
	{
	"tool_name": trace.get("tool", ""),
	"tool_arguments_summary": self._tool_arguments_summary(dict(trace.get("args") or {})),
	"success": bool(trace.get("success")),
	}
	for trace in self._tool_trace[-5:]
	]

	def _sanitize_tool_result(self, result: dict[str, Any]) -> dict[str, Any]:
	sanitized = deepcopy(result)
	sanitized.pop("stored_profile", None)
	return sanitized

	def _feedback_summary(self, status: str, message: str, last_tool_result: dict[str, Any]) -> str:
	tool_name = str(last_tool_result.get("tool", ""))
	tool_message = str(last_tool_result.get("message", "")).strip()
	success = bool(last_tool_result.get("success"))

	if tool_name == "store_user_details" and "Missing buyer fields:" in tool_message:
	missing = tool_message.split("Missing buyer fields:", 1)[1].strip()
	return f"store_user_details failed: missing fields {missing}."
	if tool_name == "store_seller_details" and "Missing seller fields:" in tool_message:
	missing = tool_message.split("Missing seller fields:", 1)[1].strip()
	return f"store_seller_details failed: missing fields {missing}."
	if tool_name == "search_posts" and success and not last_tool_result.get("post_ids"):
	return "search_posts returned 0 results."
	if tool_name == "search_posts" and success:
	return f"search_posts returned {len(last_tool_result.get('post_ids', []))} result(s)."
	if tool_name == "store_seller_details" and success:
	post_id = str(last_tool_result.get("post_id", ""))
	return f"Seller profile stored{(' as ' + post_id) if post_id else ''}."
	if tool_name == "confirm_negotiated_deal" and success:
	return f"Deal confirmed at Rs. {last_tool_result.get('agreed_rent', '?')}."
	if tool_name == "add_to_waitlist" and success:
	return f"Buyer added to waitlist for {last_tool_result.get('post_id', '?')}."
	if tool_name == "notify_buyer_slot_freed" and success:
	return f"Buyer notified of freed slot {last_tool_result.get('slot', '?')} — ready to book."
	if tool_name == "debrief_visit" and success:
	prefs = last_tool_result.get("discovered_preferences", [])
	return f"debrief_visit succeeded. Discovered: {', '.join(prefs) if prefs else 'no new preferences'}."
	if tool_name == "filter_new_arrivals" and success:
	rel = last_tool_result.get("relevant_post_ids", [])
	return f"filter_new_arrivals: {len(rel)} relevant listing(s) found."
	if tool_name in {"match_location_preference", "check_table_slot_matches", "confirm_seller_match",
	"offer_matched_listing_to_buyer", "check_calendar_slots", "contact_poster",
	"propose_price_to_buyer", "propose_price_to_seller", "shortlist"} and success:
	return f"{tool_name} succeeded."
	if tool_name == "book_viewing" and success:
	return "Viewing booked."
	if "action_loop_detected" in self._violations:
	return "Loop detected: identical action repeated. Try a different action."
	if self._state.phase == "buyer" and not self._state.buyer_profile_stored:
	missing = self._remaining_fields()
	if missing:
	return f"Missing buyer fields: {', '.join(missing)}."
	if self._state.phase == "seller" and not self._state.seller_profile_stored:
	missing = self._remaining_fields()
	if missing:
	return f"Missing seller fields: {', '.join(missing)}."
	if message:
	return message
	if status == "ready":
	return "Scenario started."
	return ""

	def _strict_eval_observation(self, observation: FlatmateRlObservation) -> FlatmateRlObservation:
	payload = observation.model_dump()
	payload["scenario_id"] = ""
	payload["scenario_label"] = ""
	payload["difficulty"] = ""
	payload["gathered_fields"] = []
	payload["remaining_required_fields"] = []
	payload["violations"] = []
	payload["tool_trace"] = []
	payload["step_reward"] = 0.0
	payload["total_reward"] = 0.0
	payload["last_tool_result"] = self._sanitize_tool_result(payload["last_tool_result"])
	payload["tool_results"] = [self._sanitize_tool_result(item) for item in payload["tool_results"]]
	return FlatmateRlObservation.model_validate(payload)

	def _observation(
	self,
	*,
	status: str,
	message: str,
	current_user_request: str,
	last_tool_result: dict[str, Any],
	reward: float,
	done: bool,
	) -> FlatmateRlObservation:
	self._state.status = status
	self._state.tool_trace = deepcopy(self._tool_trace)
	self._state.total_reward = self._total_reward
	observation = FlatmateRlObservation(
	status=status,
	scenario_id=self._scenario["task_id"],
	scenario_label=self._scenario["label"],
	difficulty=self._scenario["difficulty"],
	phase=self._state.phase,
	current_user_request=current_user_request,
	last_user_message=self._last_user_message,
	conversation_history=deepcopy(self._history),
	buyer_conversation_history=deepcopy(self._buyer_history),
	seller_conversation_history=deepcopy(self._seller_history),
	last_tool_result=deepcopy(last_tool_result),
	tool_results=deepcopy(self._tool_results),
	tool_trace=deepcopy(self._tool_trace),
	available_tools=self._phase_tools(),
	prerequisites_satisfied=self._prerequisites_satisfied(),
	recent_tool_calls=self._recent_tool_calls(),
	gathered_fields=list(self._state.gathered_fields),
	remaining_required_fields=self._remaining_fields(),
	selected_posts=list(self._state.selected_posts),
	booked_visits=deepcopy(self._state.booked_visits),
	profile_stored=self._profile_stored(),
	buyer_profile_stored=self._state.buyer_profile_stored,
	seller_profile_stored=self._state.seller_profile_stored,
	violations=list(self._violations),
	step_reward=reward,
	total_reward=self._total_reward,
	message=message,
	feedback_summary=self._feedback_summary(status, message, last_tool_result),
	reward=reward,
	done=done,
	)
	self._last_observation = observation
	if self._strict_eval_mode:
	return self._strict_eval_observation(observation)
	return observation