agentic-intent-classifier / artifacts /evaluation /intent_threshold_sweep.json
manikumargouni's picture
Upload folder using huggingface_hub
0584798 verified
{
"applied_threshold": 0.4,
"recommended_threshold": 0.4,
"results": [
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.0
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.1
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.15
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.2
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.25
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 0,
"intent_share_of_threshold_fallbacks": 0.0,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 1.0,
"policy_safe": 1
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0104,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.3
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0667,
"benchmark_phase_only_fallback_rate": 0.5333,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 1,
"intent_share_of_threshold_fallbacks": 0.1667,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 0.8333,
"policy_safe": 0
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [
"intent_type"
],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1842,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0417,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.35
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6,
"benchmark_intent_only_fallback_rate": 0.0667,
"benchmark_phase_only_fallback_rate": 0.4667,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.6,
"intent_only": 1,
"intent_share_of_threshold_fallbacks": 0.1667,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 0.8333,
"policy_safe": 0
},
"false_fallback_rate_on_obvious_prompts": 0.3333,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [
"intent_type"
],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1053,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.0625,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.4
},
{
"combined": {
"bad_allow_rate_on_safe_prompts": 0.0,
"benchmark_fallback_rate": 0.6667,
"benchmark_intent_only_fallback_rate": 0.1333,
"benchmark_phase_only_fallback_rate": 0.4667,
"fallback_responsibility": {
"both": 0,
"fallback_rate": 0.7,
"intent_only": 2,
"intent_share_of_threshold_fallbacks": 0.2857,
"phase_only": 5,
"phase_share_of_threshold_fallbacks": 0.7143,
"policy_safe": 0
},
"false_fallback_rate_on_obvious_prompts": 0.5,
"obvious_prompt_count": 6,
"safe_prompt_count": 4,
"suite_outputs": [
{
"decision_phase": "awareness",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "What is CRM software?",
"intent_confidence": 0.8488,
"intent_type": "informational",
"phase_confidence": 0.4134
},
{
"decision_phase": "research",
"expected_outcome": "pass",
"failed_components": [
"intent_type"
],
"fallback_applied": true,
"input": "Help me understand CRM basics",
"intent_confidence": 0.4226,
"intent_type": "informational",
"phase_confidence": 0.2602
},
{
"decision_phase": "consideration",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "HubSpot vs Zoho for a small team",
"intent_confidence": 0.6819,
"intent_type": "commercial",
"phase_confidence": 0.3331
},
{
"decision_phase": "decision",
"expected_outcome": "pass",
"failed_components": [],
"fallback_applied": false,
"input": "Which CRM should I buy for a 3-person startup?",
"intent_confidence": 0.7691,
"intent_type": "informational",
"phase_confidence": 0.2879
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Start my free trial",
"intent_confidence": 0.5429,
"intent_type": "transactional",
"phase_confidence": 0.1875
},
{
"decision_phase": "action",
"expected_outcome": "pass",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "Book a table for 2 tonight",
"intent_confidence": 0.5284,
"intent_type": "transactional",
"phase_confidence": 0.1716
},
{
"decision_phase": "support",
"expected_outcome": "fallback",
"failed_components": [
"intent_type"
],
"fallback_applied": true,
"input": "I cannot log into my account",
"intent_confidence": 0.3314,
"intent_type": "personal_reflection",
"phase_confidence": 0.275
},
{
"decision_phase": "action",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "go deeper",
"intent_confidence": 0.6527,
"intent_type": "ambiguous",
"phase_confidence": 0.1638
},
{
"decision_phase": "research",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "say more about that",
"intent_confidence": 0.6538,
"intent_type": "ambiguous",
"phase_confidence": 0.1674
},
{
"decision_phase": "awareness",
"expected_outcome": "fallback",
"failed_components": [
"decision_phase"
],
"fallback_applied": true,
"input": "what do you mean by that",
"intent_confidence": 0.4595,
"intent_type": "ambiguous",
"phase_confidence": 0.1718
}
],
"suite_path": "/Users/manikumargouni/Desktop/AdMesh/protocol/agentic-intent-classifier/examples/intent_threshold_sweep_suite.json"
},
"head": {
"ambiguous_bad_allow_rate": 0.1053,
"ambiguous_prompt_count": 38,
"obvious_false_fallback_rate": 0.1042,
"obvious_prompt_count": 96,
"safe_predicate_rate": 0.2917
},
"threshold": 0.45
}
],
"thresholds": [
0.0,
0.1,
0.15,
0.2,
0.25,
0.3,
0.35,
0.4,
0.45
]
}