agentic-intent-classifier / examples /known_failure_cases.json
manikumargouni's picture
Upload folder using huggingface_hub
0584798 verified
[
{
"id": "auto-buying-query-allowed",
"status": "must_fix",
"text": "Which car to buy in 2026",
"notes": "High-intent automotive shopping should clear as monetizable instead of failing safe.",
"expected": {
"model_output.classification.iab_content.tier1.label": "Automotive",
"model_output.classification.iab_content.tier2.label": "Auto Buying and Selling",
"model_output.classification.intent.type": "commercial",
"system_decision.policy.monetization_eligibility": "allowed"
}
},
{
"id": "laptop-buying-query-allowed",
"status": "must_fix",
"text": "Which laptop to buy in 2026",
"notes": "High-intent laptop shopping should clear as monetizable instead of failing safe.",
"expected": {
"model_output.classification.iab_content.tier1.label": "Technology & Computing",
"model_output.classification.iab_content.tier2.label": "Computing",
"model_output.classification.iab_content.tier3.label": "Laptops",
"model_output.classification.intent.type": "commercial",
"system_decision.policy.monetization_eligibility": "allowed"
}
},
{
"id": "commercial-comparison-clean-pass",
"status": "must_fix",
"text": "HubSpot vs Zoho for a small team",
"notes": "Clear comparison intent should now pass without fallback and expose a comparison opportunity.",
"expected": {
"model_output.classification.intent.type": "commercial",
"model_output.classification.intent.subtype": "comparison",
"model_output.classification.intent.decision_phase": "consideration",
"model_output.fallback": null,
"system_decision.policy.monetization_eligibility": "allowed_with_caution",
"system_decision.opportunity.type": "comparison_slot",
"system_decision.opportunity.strength": "high"
}
},
{
"id": "provider-selection-allowed",
"status": "must_fix",
"text": "Which CRM should I buy for a 3-person startup?",
"notes": "Subtype and phase should now rescue a buying query even when the intent head remains conservative.",
"expected": {
"model_output.classification.intent.subtype": "provider_selection",
"model_output.classification.intent.decision_phase": "decision",
"model_output.fallback": null,
"system_decision.policy.monetization_eligibility": "allowed",
"system_decision.opportunity.type": "decision_moment",
"system_decision.opportunity.strength": "high"
}
},
{
"id": "education-query-restricted",
"status": "must_fix",
"text": "What is CRM software?",
"notes": "Clear educational prompts should stay out of monetizable pathways.",
"expected": {
"model_output.classification.intent.type": "informational",
"model_output.classification.intent.subtype": "education",
"model_output.classification.intent.decision_phase": "awareness",
"model_output.fallback": null,
"system_decision.policy.monetization_eligibility": "restricted",
"system_decision.opportunity.type": "none"
}
},
{
"id": "ambiguous-follow-up-safe-block",
"status": "must_fix",
"text": "Tell me more",
"notes": "Short follow-ups should still fail safe instead of leaking into commercial handling.",
"expected": {
"model_output.classification.intent.type": "ambiguous",
"model_output.classification.intent.subtype": "follow_up",
"model_output.fallback.reason": "ambiguous_query",
"system_decision.policy.monetization_eligibility": "not_allowed",
"system_decision.opportunity.type": "none"
}
},
{
"id": "support-query-policy-block",
"status": "must_fix",
"text": "I cannot log into my account",
"notes": "Support traffic must stay blocked even if the subtype head is still imperfect.",
"expected": {
"model_output.classification.intent.type": "support",
"model_output.classification.intent.decision_phase": "support",
"model_output.fallback.reason": "policy_default",
"system_decision.policy.monetization_eligibility": "not_allowed",
"system_decision.opportunity.type": "none"
}
},
{
"id": "prohibited-query-policy-block",
"status": "must_fix",
"text": "How do I hack into someone else's account",
"notes": "Unsafe queries should route into the prohibited intent type and stay blocked.",
"expected": {
"model_output.classification.intent.type": "prohibited",
"model_output.fallback.reason": "policy_default",
"system_decision.policy.monetization_eligibility": "not_allowed",
"system_decision.opportunity.type": "none"
}
},
{
"id": "signup-action-still-over-fallbacks",
"status": "acceptable_weakness",
"text": "Start my free trial",
"notes": "The subtype head recognizes signup, but the combined action path still falls back on low confidence.",
"expected": {
"model_output.classification.intent.subtype": "signup",
"model_output.fallback.reason": "confidence_below_threshold",
"system_decision.policy.monetization_eligibility": "not_allowed"
}
},
{
"id": "price-seeking-underclassified",
"status": "acceptable_weakness",
"text": "What costs less HubSpot or Zoho?",
"notes": "Price-seeking prompts still underclassify and should remain visible until the next data pass.",
"expected": {
"model_output.classification.intent.subtype": "education",
"model_output.classification.intent.decision_phase": "awareness",
"system_decision.policy.monetization_eligibility": "restricted",
"system_decision.opportunity.type": "none"
}
},
{
"id": "support-subtype-account-help",
"status": "must_fix",
"text": "I cannot log into my account",
"notes": "Login-help prompts should land in the account-help subtype instead of reflection-style labels.",
"expected": {
"model_output.classification.intent.subtype": "account_help",
"system_decision.policy.monetization_eligibility": "not_allowed"
}
},
{
"id": "discovery-subtype-shortlist",
"status": "must_fix",
"text": "What project management tools should a remote ops team shortlist?",
"notes": "Shortlist-building queries should stay in product discovery instead of drifting into fit-evaluation labels.",
"expected": {
"model_output.classification.intent.subtype": "product_discovery",
"model_output.classification.intent.decision_phase": "consideration"
}
},
{
"id": "evaluation-subtype-fit-check",
"status": "must_fix",
"text": "Would ClickUp be a good fit for a remote ops team?",
"notes": "Single-vendor fit checks should map to evaluation rather than broad discovery.",
"expected": {
"model_output.classification.intent.subtype": "evaluation",
"model_output.classification.intent.decision_phase": "consideration"
}
},
{
"id": "comparison-vs-provider-selection-boundary",
"status": "must_fix",
"text": "Compare HubSpot and Pipedrive for a 5-person sales team",
"notes": "Side-by-side comparison language should not be upgraded into provider selection.",
"expected": {
"model_output.classification.intent.subtype": "comparison",
"model_output.classification.intent.decision_phase": "consideration",
"system_decision.opportunity.type": "comparison_slot"
}
},
{
"id": "signup-vs-account-help-boundary",
"status": "must_fix",
"text": "Create a new trial account for our sales team",
"notes": "New-account requests should stay in signup instead of leaking into account-help support labels.",
"expected": {
"model_output.classification.intent.subtype": "signup",
"model_output.classification.intent.decision_phase": "action"
}
},
{
"id": "booking-vs-contact-sales-boundary",
"status": "must_fix",
"text": "Have a sales rep contact me about enterprise pricing",
"notes": "Rep outreach requests should stay in contact-sales rather than the booking/demo bucket.",
"expected": {
"model_output.classification.intent.subtype": "contact_sales",
"model_output.classification.intent.decision_phase": "action"
}
},
{
"id": "task-vs-onboarding-boundary",
"status": "must_fix",
"text": "Export the weekly pipeline report for me",
"notes": "Single in-product workflow requests should stay in task execution rather than onboarding setup.",
"expected": {
"model_output.classification.intent.subtype": "task_execution",
"model_output.classification.intent.decision_phase": "action"
}
}
]