diff --git "a/outputs/eval_post.json" "b/outputs/eval_post.json" new file mode 100644--- /dev/null +++ "b/outputs/eval_post.json" @@ -0,0 +1,15549 @@ +{ + "task": "all", + "tasks": [ + "task1", + "task2", + "task3" + ], + "episodes_per_policy": 30, + "adaptive": false, + "difficulty_controller": { + "adaptive": true, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "difficulty_controller_by_task_policy": { + "task1": { + "random": {}, + "heuristic": {}, + "oracle_lite": {}, + "trained": {} + }, + "task2": { + "random": {}, + "heuristic": {}, + "oracle_lite": {}, + "trained": {} + }, + "task3": { + "random": {}, + "heuristic": {}, + "oracle_lite": {}, + "trained": {} + } + }, + "summary": { + "random": { + "episodes": 90, + "avg_score": 0.6904, + "avg_completion_rate": 0.8131, + "avg_detection_rate": 0.7935, + "avg_trust_calibration": 0.4453, + "avg_steps": 26.2111 + }, + "heuristic": { + "episodes": 90, + "avg_score": 0.7817, + "avg_completion_rate": 0.8918, + "avg_detection_rate": 0.9178, + "avg_trust_calibration": 0.4373, + "avg_steps": 24.4 + }, + "oracle_lite": { + "episodes": 90, + "avg_score": 0.8405, + "avg_completion_rate": 0.8687, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5892, + "avg_steps": 29.4444 + }, + "trained": { + "episodes": 90, + "avg_score": 0.788, + "avg_completion_rate": 0.8979, + "avg_detection_rate": 0.9437, + "avg_trust_calibration": 0.4378, + "avg_steps": 24.5 + } + }, + "by_task": { + "task1": { + "random": { + "episodes": 30, + "avg_score": 0.7635, + "avg_completion_rate": 0.76, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 15.1333 + }, + "heuristic": { + "episodes": 30, + "avg_score": 0.8504, + "avg_completion_rate": 0.84, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 13.8333 + }, + "oracle_lite": { + "episodes": 30, + "avg_score": 0.9011, + "avg_completion_rate": 0.7167, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 16.0 + }, + "trained": { + "episodes": 30, + "avg_score": 0.8504, + "avg_completion_rate": 0.84, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.0, + "avg_steps": 13.8333 + } + }, + "task2": { + "random": { + "episodes": 30, + "avg_score": 0.6472, + "avg_completion_rate": 0.8644, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5829, + "avg_steps": 26.7667 + }, + "heuristic": { + "episodes": 30, + "avg_score": 0.7497, + "avg_completion_rate": 0.9288, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5737, + "avg_steps": 23.2333 + }, + "oracle_lite": { + "episodes": 30, + "avg_score": 0.7638, + "avg_completion_rate": 0.9045, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.8377, + "avg_steps": 30.0 + }, + "trained": { + "episodes": 30, + "avg_score": 0.7497, + "avg_completion_rate": 0.9288, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.5737, + "avg_steps": 23.2333 + } + }, + "task3": { + "random": { + "episodes": 30, + "avg_score": 0.6606, + "avg_completion_rate": 0.815, + "avg_detection_rate": 0.3806, + "avg_trust_calibration": 0.7531, + "avg_steps": 36.7333 + }, + "heuristic": { + "episodes": 30, + "avg_score": 0.7449, + "avg_completion_rate": 0.9067, + "avg_detection_rate": 0.7534, + "avg_trust_calibration": 0.7383, + "avg_steps": 36.1333 + }, + "oracle_lite": { + "episodes": 30, + "avg_score": 0.8567, + "avg_completion_rate": 0.985, + "avg_detection_rate": 1.0, + "avg_trust_calibration": 0.9299, + "avg_steps": 42.3333 + }, + "trained": { + "episodes": 30, + "avg_score": 0.7637, + "avg_completion_rate": 0.925, + "avg_detection_rate": 0.8312, + "avg_trust_calibration": 0.7396, + "avg_steps": 36.4333 + } + } + }, + "episodes": [ + { + "policy": "random", + "task_type": "task1", + "seed": 0, + "steps": 15, + "score": 0.6569, + "total_reward": 7.8825, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.914, + 0.962, + 0.962, + 0.02, + 0.962, + 0.962, + 0.962, + 0.3645, + 0.02, + 0.02, + 0.867 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 1, + "steps": 15, + "score": 0.7996, + "total_reward": 7.196, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.982, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.3165 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 2, + "steps": 15, + "score": 0.8129, + "total_reward": 8.1294, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.369, + 0.3024, + 0.962, + 0.962, + 0.914, + 0.962, + 0.867, + 0.867, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 3, + "steps": 14, + "score": 0.8084, + "total_reward": 10.5095, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.962, + 0.3455, + 0.867, + 0.962, + 0.946, + 0.962, + 0.3455, + 0.3645, + 0.867, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 4, + "steps": 15, + "score": 0.7814, + "total_reward": 8.5956, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.982, + 0.962, + 0.3645, + 0.914, + 0.962, + 0.3455, + 0.3136, + 0.962, + 0.914 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 5, + "steps": 15, + "score": 0.725, + "total_reward": 8.7, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.914, + 0.962, + 0.3165, + 0.3455, + 0.3455, + 0.867, + 0.946, + 0.962, + 0.3455, + 0.867 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 6, + "steps": 15, + "score": 0.8118, + "total_reward": 8.1182, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.914, + 0.914, + 0.962, + 0.962, + 0.962, + 0.962, + 0.934, + 0.867, + 0.3206 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 7, + "steps": 15, + "score": 0.9334, + "total_reward": 9.334, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.867, + 0.914, + 0.914, + 0.962, + 0.867, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 8, + "steps": 15, + "score": 0.8425, + "total_reward": 9.2675, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.369, + 0.962, + 0.914, + 0.962, + 0.962, + 0.3645, + 0.962, + 0.982, + 0.914 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 9, + "steps": 15, + "score": 0.7751, + "total_reward": 9.3011, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.962, + 0.962, + 0.867, + 0.3616, + 0.914, + 0.3645, + 0.02, + 0.982, + 0.962, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 10, + "steps": 15, + "score": 0.7653, + "total_reward": 8.418, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.982, + 0.914, + 0.982, + 0.962, + 0.02, + 0.3455, + 0.3645, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 11, + "steps": 15, + "score": 0.8199, + "total_reward": 9.8394, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.867, + 0.962, + 0.962, + 0.3504, + 0.914, + 0.982, + 0.962, + 0.982, + 0.02, + 0.914, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 12, + "steps": 15, + "score": 0.6163, + "total_reward": 7.3956, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.914, + 0.3455, + 0.962, + 0.898, + 0.962, + 0.914, + 0.02, + 0.3616, + 0.3455, + 0.3455 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 13, + "steps": 15, + "score": 0.7283, + "total_reward": 6.555, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.914, + 0.867, + 0.962, + 0.934, + 0.962, + 0.914, + 0.02 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 14, + "steps": 17, + "score": 0.8867, + "total_reward": 10.6405, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.867, + 0.914, + 0.914, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.914 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 15, + "steps": 16, + "score": 0.6915, + "total_reward": 9.6809, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3504, + 0.962, + 0.962, + 0.867, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.867, + 0.982, + 0.3455, + 0.3455, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 16, + "steps": 15, + "score": 0.7164, + "total_reward": 9.313, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.2975, + 0.02, + 0.982, + 0.02, + 0.3455, + 0.914, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 17, + "steps": 15, + "score": 0.6495, + "total_reward": 8.4439, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.02, + 0.3455, + 0.3136, + 0.867, + 0.982, + 0.962, + 0.3206, + 0.962, + 0.962, + 0.982, + 0.3826 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 18, + "steps": 15, + "score": 0.8235, + "total_reward": 9.8815, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.02, + 0.962, + 0.982, + 0.3645, + 0.962, + 0.867, + 0.962, + 0.914, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 19, + "steps": 15, + "score": 0.7588, + "total_reward": 8.347, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2975, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3455, + 0.02, + 0.93, + 0.982, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 20, + "steps": 15, + "score": 0.6444, + "total_reward": 7.7329, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.867, + 0.867, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.3686, + 0.3645 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 21, + "steps": 15, + "score": 0.8756, + "total_reward": 9.6315, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.914, + 0.914, + 0.962, + 0.3455, + 0.962, + 0.962, + 0.914, + 0.867 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 22, + "steps": 15, + "score": 0.6437, + "total_reward": 7.0809, + "completion_rate": 0.5, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3504, + 0.02, + 0.93, + 0.914, + 0.3645, + 0.3645, + 0.962, + 0.2975, + 0.914, + 0.982 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 23, + "steps": 15, + "score": 0.8985, + "total_reward": 10.7824, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.867, + 0.962, + 0.3504, + 0.982, + 0.962, + 0.962, + 0.867, + 0.962, + 0.962, + 0.982, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.6933, + "total_reward": 7.6267, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2975, + 0.982, + 0.982, + 0.3066, + 0.934, + 0.914, + 0.962, + 0.3686, + 0.02, + 0.93 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 25, + "steps": 15, + "score": 0.8266, + "total_reward": 9.0928, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.914, + 0.982, + 0.962, + 0.867, + 0.962, + 0.962, + 0.867, + 0.914, + 0.962, + 0.3504 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 26, + "steps": 15, + "score": 0.7833, + "total_reward": 7.8326, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.914, + 0.962, + 0.982, + 0.962, + 0.962, + 0.02, + 0.3206, + 0.914, + 0.898 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 27, + "steps": 16, + "score": 0.8311, + "total_reward": 9.1421, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.934, + 0.3455, + 0.3546, + 0.962, + 0.93, + 0.982, + 0.962, + 0.93, + 0.914, + 0.914 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 28, + "steps": 15, + "score": 0.7196, + "total_reward": 8.6356, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.914, + 0.962, + 0.962, + 0.914, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3826, + 0.02, + 0.3165 + ] + }, + { + "policy": "random", + "task_type": "task1", + "seed": 29, + "steps": 15, + "score": 0.5851, + "total_reward": 7.021, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.914, + 0.3455, + 0.962, + 0.02, + 0.914, + 0.3165, + 0.02, + 0.867, + 0.962, + 0.369 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 0, + "steps": 13, + "score": 0.753, + "total_reward": 10.5415, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.3455, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 1, + "steps": 12, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 2, + "steps": 11, + "score": 0.8612, + "total_reward": 10.3345, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.369, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 4, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 6, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.8166, + "total_reward": 9.7988, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.962, + 0.3826, + 0.3896, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8399, + "total_reward": 8.3989, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.785, + "total_reward": 10.2052, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 10, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.9003, + "total_reward": 9.0035, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 14, + "steps": 13, + "score": 0.7534, + "total_reward": 10.5473, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 15, + "steps": 13, + "score": 0.8312, + "total_reward": 11.6374, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3546, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 18, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 19, + "steps": 12, + "score": 0.8675, + "total_reward": 11.2779, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7993, + "total_reward": 8.7927, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 22, + "steps": 15, + "score": 0.772, + "total_reward": 12.3526, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.369, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 23, + "steps": 11, + "score": 0.8606, + "total_reward": 10.3271, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3616, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8161, + "total_reward": 9.7931, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3616, + 0.982, + 0.369, + 0.982, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 25, + "steps": 14, + "score": 0.6506, + "total_reward": 9.7585, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.946, + 0.369, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.7054, + "total_reward": 11.2865, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.3645, + 0.369, + 0.369, + 0.3645, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 27, + "steps": 15, + "score": 0.6937, + "total_reward": 9.0177, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.962, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.391, + 0.962, + 0.3525, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 28, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "heuristic", + "task_type": "task1", + "seed": 29, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 0, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 1, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 2, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 4, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 6, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 10, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 14, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 15, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 18, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 19, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7588, + "total_reward": 6.8295, + "completion_rate": 0.5, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 22, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 23, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3525, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 25, + "steps": 16, + "score": 0.7588, + "total_reward": 6.8295, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 27, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 28, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task1", + "seed": 29, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 0, + "steps": 13, + "score": 0.753, + "total_reward": 10.5415, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.3455, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 1, + "steps": 12, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 2, + "steps": 11, + "score": 0.8612, + "total_reward": 10.3345, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.369, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 3, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 4, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 5, + "steps": 16, + "score": 0.8266, + "total_reward": 7.439, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 6, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 7, + "steps": 16, + "score": 0.8166, + "total_reward": 9.7988, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.962, + 0.3826, + 0.3896, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 8, + "steps": 16, + "score": 0.8399, + "total_reward": 8.3989, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 9, + "steps": 16, + "score": 0.785, + "total_reward": 10.2052, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 10, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 11, + "steps": 16, + "score": 0.7843, + "total_reward": 10.196, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 12, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 13, + "steps": 16, + "score": 0.9003, + "total_reward": 9.0035, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 14, + "steps": 13, + "score": 0.7534, + "total_reward": 10.5473, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.3896, + 0.391, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 15, + "steps": 13, + "score": 0.8312, + "total_reward": 11.6374, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3546, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 16, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 17, + "steps": 16, + "score": 0.8943, + "total_reward": 8.0485, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 18, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 19, + "steps": 12, + "score": 0.8675, + "total_reward": 11.2779, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 20, + "steps": 16, + "score": 0.7993, + "total_reward": 8.7927, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.3756, + 0.962, + 0.962, + 0.3525, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 21, + "steps": 16, + "score": 0.962, + "total_reward": 8.658, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 22, + "steps": 15, + "score": 0.772, + "total_reward": 12.3526, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.369, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.3504, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 23, + "steps": 11, + "score": 0.8606, + "total_reward": 10.3271, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3616, + 0.3455, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 24, + "steps": 16, + "score": 0.8161, + "total_reward": 9.7931, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.3616, + 0.982, + 0.369, + 0.982, + 0.3645, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 25, + "steps": 14, + "score": 0.6506, + "total_reward": 9.7585, + "completion_rate": 0.7, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.946, + 0.369, + 0.982, + 0.3645, + 0.3645, + 0.982, + 0.3645, + 0.3645 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 26, + "steps": 16, + "score": 0.7054, + "total_reward": 11.2865, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.982, + 0.982, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.946, + 0.982, + 0.3645, + 0.3645, + 0.369, + 0.369, + 0.3645, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 27, + "steps": 15, + "score": 0.6937, + "total_reward": 9.0177, + "completion_rate": 0.6, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.3686, + 0.962, + 0.962, + 0.3896, + 0.391, + 0.391, + 0.391, + 0.962, + 0.3525, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 28, + "steps": 10, + "score": 0.962, + "total_reward": 10.582, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "trained", + "task_type": "task1", + "seed": 29, + "steps": 11, + "score": 0.911, + "total_reward": 10.9324, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.0, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.962, + 0.3504, + 0.962, + 0.962, + 0.962 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 0, + "steps": 25, + "score": 0.5298, + "total_reward": 9.5365, + "completion_rate": 0.667, + "detection_rate": 1.0, + "trust_calibration": 0.461, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9083, + 0.925, + 0.9167, + 0.02, + 0.8917, + 0.8833, + 0.8667, + 0.2233, + 0.02, + 0.02, + 0.725, + 0.8083, + 0.6917, + 0.775, + 0.1317, + 0.115, + 0.5949 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 1, + "steps": 31, + "score": 0.7252, + "total_reward": 13.054, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.569, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9833, + 0.8833, + 0.8583, + 0.875, + 0.7583, + 0.8417, + 0.165, + 0.7833, + 0.8, + 0.6833, + 0.735, + 0.1217, + 0.7167, + 0.7, + 0.7083, + 0.849 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 2, + "steps": 27, + "score": 0.6551, + "total_reward": 11.7913, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.579, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.262, + 0.9167, + 0.9083, + 0.8583, + 0.875, + 0.7583, + 0.7417, + 0.825, + 0.7083, + 0.8, + 0.6833, + 0.6667, + 0.75, + 0.0967, + 0.0983, + 0.766 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 3, + "steps": 28, + "score": 0.6154, + "total_reward": 12.9233, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.586, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.925, + 0.2717, + 0.8, + 0.8917, + 0.9333, + 0.875, + 0.2217, + 0.2233, + 0.7417, + 0.8333, + 0.1483, + 0.7917, + 0.1383, + 0.7333, + 0.75, + 0.7083, + 0.123, + 0.7686 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 4, + "steps": 28, + "score": 0.6575, + "total_reward": 12.4928, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.458, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9833, + 0.925, + 0.2817, + 0.8667, + 0.8833, + 0.23, + 0.2113, + 0.8417, + 0.8, + 0.775, + 0.7917, + 0.775, + 0.7583, + 0.75, + 0.1067, + 0.1313, + 0.7235 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 5, + "steps": 26, + "score": 0.6206, + "total_reward": 12.4128, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.408, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9, + 0.9167, + 0.24, + 0.2467, + 0.2383, + 0.7667, + 0.9083, + 0.85, + 0.1967, + 0.725, + 0.8667, + 0.165, + 0.7917, + 0.7833, + 0.7667, + 0.02, + 0.65, + 0.7061 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 6, + "steps": 31, + "score": 0.7065, + "total_reward": 12.7163, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.576, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9083, + 0.8833, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8833, + 0.7417, + 0.188, + 0.8667, + 0.13, + 0.7583, + 0.7667, + 0.7583, + 0.7083, + 0.7, + 0.8083 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 7, + "steps": 25, + "score": 0.7366, + "total_reward": 12.5218, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.776, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8333, + 0.8917, + 0.875, + 0.8917, + 0.775, + 0.8583, + 0.85, + 0.8333, + 0.825, + 0.1817, + 0.8, + 0.6833, + 0.825, + 0.02, + 0.7583, + 0.8784 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 8, + "steps": 25, + "score": 0.7329, + "total_reward": 13.9253, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.425, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.335, + 0.9167, + 0.8667, + 0.8833, + 0.875, + 0.2767, + 0.8583, + 0.9, + 0.8, + 0.8167, + 0.7, + 0.1233, + 0.775, + 0.7667, + 0.8083, + 0.75, + 0.7987 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 9, + "steps": 27, + "score": 0.7062, + "total_reward": 12.712, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.597, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9333, + 0.925, + 0.8083, + 0.278, + 0.8583, + 0.925, + 0.02, + 0.2233, + 0.8417, + 0.825, + 0.8167, + 0.735, + 0.6833, + 0.7333, + 0.65, + 0.8157 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 10, + "steps": 26, + "score": 0.5723, + "total_reward": 12.0174, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.855, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.925, + 0.9667, + 0.8667, + 0.2483, + 0.8667, + 0.02, + 0.205, + 0.8917, + 0.825, + 0.8167, + 0.775, + 0.1567, + 0.02, + 0.153, + 0.7667, + 0.8083, + 0.105, + 0.1067, + 0.8194 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 11, + "steps": 23, + "score": 0.7197, + "total_reward": 12.955, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.57, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8333, + 0.925, + 0.9167, + 0.2703, + 0.8667, + 0.2483, + 0.8667, + 0.9083, + 0.02, + 0.8083, + 0.825, + 0.8167, + 0.7683, + 0.7917, + 0.7833, + 0.7417, + 0.8063 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.6047, + "total_reward": 12.0935, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.344, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9, + 0.2717, + 0.9083, + 0.8267, + 0.8833, + 0.8333, + 0.02, + 0.2197, + 0.1883, + 0.18, + 0.7833, + 0.7917, + 0.7417, + 0.7583, + 0.65, + 0.7, + 0.0633, + 0.6839 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.6649, + "total_reward": 11.9681, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.271, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.8917, + 0.8, + 0.8833, + 0.2067, + 0.8583, + 0.8083, + 0.02, + 0.7833, + 0.6917, + 0.75, + 0.6583, + 0.75, + 0.7333, + 0.09, + 0.7, + 0.7014 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 14, + "steps": 26, + "score": 0.7146, + "total_reward": 13.5771, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.416, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.825, + 0.8833, + 0.8667, + 0.2383, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8333, + 0.7833, + 0.8, + 0.1797, + 0.7833, + 0.775, + 0.1217, + 0.65, + 0.7957 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 15, + "steps": 27, + "score": 0.5573, + "total_reward": 12.2603, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.606, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3037, + 0.9333, + 0.925, + 0.8083, + 0.265, + 0.2567, + 0.8433, + 0.925, + 0.7583, + 0.215, + 0.1967, + 0.1883, + 0.8167, + 0.8, + 0.1567, + 0.675, + 0.7583, + 0.75, + 0.7417, + 0.0883, + 0.732 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 16, + "steps": 23, + "score": 0.5543, + "total_reward": 11.0864, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.437, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.2217, + 0.02, + 0.925, + 0.02, + 0.2133, + 0.8167, + 0.825, + 0.1817, + 0.02, + 0.8, + 0.7583, + 0.775, + 0.1317, + 0.6298 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 17, + "steps": 27, + "score": 0.5694, + "total_reward": 11.9565, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.688, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.02, + 0.28, + 0.2613, + 0.7917, + 0.2483, + 0.875, + 0.2213, + 0.85, + 0.8417, + 0.7933, + 0.233, + 0.8667, + 0.7, + 0.7833, + 0.7667, + 0.7583, + 0.168, + 0.0703, + 0.7609 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 18, + "steps": 23, + "score": 0.6662, + "total_reward": 11.3256, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.602, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.925, + 0.02, + 0.9, + 0.2567, + 0.2483, + 0.875, + 0.7583, + 0.85, + 0.8083, + 0.825, + 0.8083, + 0.85, + 0.02, + 0.75, + 0.7306 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 19, + "steps": 27, + "score": 0.7003, + "total_reward": 12.6055, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.625, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2633, + 0.9167, + 0.9083, + 0.8917, + 0.8833, + 0.23, + 0.02, + 0.825, + 0.8917, + 0.825, + 0.7083, + 0.8, + 0.6833, + 0.7417, + 0.7583, + 0.7083, + 0.8255 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 20, + "steps": 32, + "score": 0.5557, + "total_reward": 12.7811, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.636, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.8167, + 0.8, + 0.8917, + 0.8833, + 0.237, + 0.8667, + 0.85, + 0.2037, + 0.2213, + 0.19, + 0.8083, + 0.8, + 0.1997, + 0.7833, + 0.775, + 0.6583, + 0.0787, + 0.725, + 0.1347, + 0.7083, + 0.7861 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 21, + "steps": 23, + "score": 0.7485, + "total_reward": 12.7247, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.523, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.8917, + 0.875, + 0.8917, + 0.2383, + 0.8667, + 0.8583, + 0.8167, + 0.725, + 0.8083, + 0.8, + 0.02, + 0.7833, + 0.7667, + 0.7897 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 22, + "steps": 31, + "score": 0.4891, + "total_reward": 10.2706, + "completion_rate": 0.667, + "detection_rate": 1.0, + "trust_calibration": 0.544, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3037, + 0.02, + 0.8917, + 0.8667, + 0.2483, + 0.24, + 0.8667, + 0.18, + 0.8083, + 0.875, + 0.1817, + 0.165, + 0.7583, + 0.13, + 0.1217, + 0.75, + 0.0983, + 0.725, + 0.7167, + 0.6236 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 23, + "steps": 20, + "score": 0.7708, + "total_reward": 13.1031, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.755, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8333, + 0.925, + 0.2787, + 0.9583, + 0.9, + 0.8917, + 0.775, + 0.8667, + 0.85, + 0.8917, + 0.825, + 0.8167, + 0.8083, + 0.8, + 0.7917, + 0.8711 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 24, + "steps": 31, + "score": 0.5498, + "total_reward": 9.8972, + "completion_rate": 0.667, + "detection_rate": 1.0, + "trust_calibration": 0.809, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2633, + 0.975, + 0.9667, + 0.243, + 0.9083, + 0.8333, + 0.85, + 0.2297, + 0.02, + 0.7917, + 0.02, + 0.1647, + 0.7417, + 0.09, + 0.7333, + 0.7, + 0.7165 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 25, + "steps": 23, + "score": 0.791, + "total_reward": 13.4466, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.623, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9, + 0.9667, + 0.9083, + 0.7917, + 0.8833, + 0.875, + 0.7583, + 0.8167, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6917, + 0.8333, + 0.775, + 0.8679 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 26, + "steps": 29, + "score": 0.5823, + "total_reward": 11.646, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.895, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9, + 0.9167, + 0.9583, + 0.9, + 0.8917, + 0.02, + 0.2297, + 0.8167, + 0.76, + 0.8083, + 0.7917, + 0.1813, + 0.13, + 0.0953, + 0.75, + 0.7917, + 0.1413, + 0.08, + 0.8333 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 27, + "steps": 29, + "score": 0.6402, + "total_reward": 12.1644, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.703, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9583, + 0.28, + 0.2847, + 0.9083, + 0.8667, + 0.2483, + 0.8667, + 0.825, + 0.8083, + 0.7917, + 0.825, + 0.02, + 0.75, + 0.8167, + 0.65, + 0.0703, + 0.725, + 0.8094 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 28, + "steps": 23, + "score": 0.7219, + "total_reward": 12.9944, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.671, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9083, + 0.9167, + 0.9083, + 0.8667, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.258, + 0.02, + 0.165, + 0.8167, + 0.8, + 0.8417, + 0.7833, + 0.6667, + 0.8414 + ] + }, + { + "policy": "random", + "task_type": "task2", + "seed": 29, + "steps": 27, + "score": 0.5586, + "total_reward": 10.614, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.478, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9, + 0.2717, + 0.9083, + 0.02, + 0.8583, + 0.8917, + 0.02, + 0.7417, + 0.8333, + 0.19, + 0.1817, + 0.7667, + 0.7833, + 0.7417, + 0.1133, + 0.0817, + 0.644 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 0, + "steps": 31, + "score": 0.6145, + "total_reward": 12.2902, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.72, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.2467, + 0.875, + 0.8583, + 0.8417, + 0.19, + 0.8083, + 0.7917, + 0.775, + 0.7583, + 0.7417, + 0.09, + 0.0733, + 0.7719 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 1, + "steps": 17, + "score": 0.768, + "total_reward": 13.8236, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.282, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7053 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 2, + "steps": 17, + "score": 0.7237, + "total_reward": 13.0266, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.284, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.2883, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.6626 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 4, + "steps": 17, + "score": 0.7999, + "total_reward": 14.3981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.426, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.7991 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 6, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7481 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 7, + "steps": 28, + "score": 0.7465, + "total_reward": 13.4373, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.833, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.3163, + 0.318, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8983 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 8, + "steps": 17, + "score": 0.7992, + "total_reward": 14.3856, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.39, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2953, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7866 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 9, + "steps": 28, + "score": 0.7248, + "total_reward": 13.7712, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.3147, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8986 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 10, + "steps": 16, + "score": 0.797, + "total_reward": 13.5485, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.432, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.212, + 0.2037, + 0.8333, + 0.825, + 0.7578 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.6763, + "total_reward": 12.8505, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.825, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.0817, + 0.8522 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.7935, + "total_reward": 13.4903, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.839, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2883, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9436 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 14, + "steps": 18, + "score": 0.7309, + "total_reward": 13.8869, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.264, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.2733, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6989 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 15, + "steps": 18, + "score": 0.7649, + "total_reward": 14.5326, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.33, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3097, + 0.9333, + 0.925, + 0.9167, + 0.2703, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.2287, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.8083, + 0.7656 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 18, + "steps": 15, + "score": 0.8749, + "total_reward": 13.998, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.748 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 19, + "steps": 17, + "score": 0.7966, + "total_reward": 14.3395, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.279, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2967, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.262, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7475 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6558, + "total_reward": 11.8048, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.82, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8071 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 22, + "steps": 22, + "score": 0.6604, + "total_reward": 15.1886, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.471, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.9333, + 0.925, + 0.2787, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.237, + 0.8667, + 0.2203, + 0.85, + 0.8417, + 0.1953, + 0.825, + 0.1787, + 0.8083, + 0.8, + 0.1537, + 0.7833, + 0.775, + 0.8149 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 23, + "steps": 16, + "score": 0.793, + "total_reward": 13.4804, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.212, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.28, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.6808 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.6768, + "total_reward": 12.8598, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.824, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.975, + 0.3267, + 0.9583, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.8518 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 25, + "steps": 21, + "score": 0.5985, + "total_reward": 13.1666, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.2383, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.1883, + 0.18, + 0.8167, + 0.1633, + 0.155, + 0.1467, + 0.7833, + 0.7683 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 26, + "steps": 22, + "score": 0.5962, + "total_reward": 13.1159, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.324, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.8683, + 0.265, + 0.2567, + 0.9333, + 0.925, + 0.2317, + 0.2233, + 0.26, + 0.1983, + 0.825, + 0.8167, + 0.8083, + 0.162, + 0.7917, + 0.1453, + 0.775, + 0.6336 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6573, + "total_reward": 13.146, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8533 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 28, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9979, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7479 + ] + }, + { + "policy": "heuristic", + "task_type": "task2", + "seed": 29, + "steps": 17, + "score": 0.7998, + "total_reward": 14.3965, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.421, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.2453, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.2037, + 0.8333, + 0.825, + 0.8167, + 0.7975 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 0, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 1, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 2, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8385, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.843, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8585 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 4, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 6, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 7, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1942, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9442 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 8, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8383, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.843, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8583 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 9, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 10, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.0983, + 0.7167, + 0.8573 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 14, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8376, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.841, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8576 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 15, + "steps": 30, + "score": 0.6973, + "total_reward": 11.1569, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.9167, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8119 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 18, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 19, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6117, + "total_reward": 9.7864, + "completion_rate": 0.667, + "detection_rate": 1.0, + "trust_calibration": 0.795, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.7114 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 22, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4728, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.817, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.0817, + 0.7628 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 23, + "steps": 30, + "score": 0.7399, + "total_reward": 11.8385, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.843, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2983, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8585 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8376, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.841, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8576 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 25, + "steps": 30, + "score": 0.7399, + "total_reward": 11.838, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.842, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.2817, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.1817, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.858 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 26, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.0983, + 0.7167, + 0.8573 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6972, + "total_reward": 11.156, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.831, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.0817, + 0.811 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 28, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task2", + "seed": 29, + "steps": 30, + "score": 0.7398, + "total_reward": 11.8373, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.1817, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8573 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 0, + "steps": 31, + "score": 0.6145, + "total_reward": 12.2902, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.72, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.2467, + 0.875, + 0.8583, + 0.8417, + 0.19, + 0.8083, + 0.7917, + 0.775, + 0.7583, + 0.7417, + 0.09, + 0.0733, + 0.7719 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 1, + "steps": 17, + "score": 0.768, + "total_reward": 13.8236, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.282, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7053 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 2, + "steps": 17, + "score": 0.7237, + "total_reward": 13.0266, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.284, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.2883, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.6626 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 3, + "steps": 30, + "score": 0.7823, + "total_reward": 12.5171, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.9021 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 4, + "steps": 17, + "score": 0.7999, + "total_reward": 14.3981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.426, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.1787, + 0.7991 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 5, + "steps": 30, + "score": 0.6545, + "total_reward": 10.4723, + "completion_rate": 0.733, + "detection_rate": 1.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.2483, + 0.8667, + 0.215, + 0.8333, + 0.8167, + 0.165, + 0.7833, + 0.1317, + 0.75, + 0.7333, + 0.7167, + 0.7623 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 6, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9981, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7481 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 7, + "steps": 28, + "score": 0.7465, + "total_reward": 13.4373, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.833, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.3163, + 0.318, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8983 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 8, + "steps": 17, + "score": 0.7992, + "total_reward": 14.3856, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.39, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2953, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.2537, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7866 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 9, + "steps": 28, + "score": 0.7248, + "total_reward": 13.7712, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.3147, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.8986 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 10, + "steps": 16, + "score": 0.797, + "total_reward": 13.5485, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.432, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.212, + 0.2037, + 0.8333, + 0.825, + 0.7578 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 11, + "steps": 30, + "score": 0.6763, + "total_reward": 12.8505, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.825, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.0817, + 0.8522 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 12, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 13, + "steps": 30, + "score": 0.7935, + "total_reward": 13.4903, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.839, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.2883, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9436 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 14, + "steps": 18, + "score": 0.7309, + "total_reward": 13.8869, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.264, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.2733, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.187, + 0.8167, + 0.8083, + 0.6989 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 15, + "steps": 18, + "score": 0.7649, + "total_reward": 14.5326, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.33, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3097, + 0.9333, + 0.925, + 0.9167, + 0.2703, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.2287, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.8083, + 0.7656 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 16, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 17, + "steps": 30, + "score": 0.7823, + "total_reward": 12.517, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.902 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 18, + "steps": 15, + "score": 0.8749, + "total_reward": 13.998, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.748 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 19, + "steps": 17, + "score": 0.7966, + "total_reward": 14.3395, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.279, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.2967, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.262, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.8167, + 0.7475 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 20, + "steps": 30, + "score": 0.6558, + "total_reward": 11.8048, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.82, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.3147, + 0.9, + 0.8833, + 0.2317, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.0983, + 0.7167, + 0.8071 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 21, + "steps": 30, + "score": 0.8246, + "total_reward": 13.1941, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.84, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9333, + 0.9167, + 0.9, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.9441 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 22, + "steps": 22, + "score": 0.6604, + "total_reward": 15.1886, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.471, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3517, + 0.9333, + 0.925, + 0.2787, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.237, + 0.8667, + 0.2203, + 0.85, + 0.8417, + 0.1953, + 0.825, + 0.1787, + 0.8083, + 0.8, + 0.1537, + 0.7833, + 0.775, + 0.8149 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 23, + "steps": 16, + "score": 0.793, + "total_reward": 13.4804, + "completion_rate": 0.933, + "detection_rate": 1.0, + "trust_calibration": 0.212, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.28, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.825, + 0.6808 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 24, + "steps": 30, + "score": 0.6768, + "total_reward": 12.8598, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.824, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.3113, + 0.975, + 0.3267, + 0.9583, + 0.265, + 0.8833, + 0.8667, + 0.85, + 0.8333, + 0.8167, + 0.8, + 0.7833, + 0.7667, + 0.115, + 0.7333, + 0.7167, + 0.8518 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 25, + "steps": 21, + "score": 0.5985, + "total_reward": 13.1666, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.2733, + 0.265, + 0.8917, + 0.2383, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.1883, + 0.18, + 0.8167, + 0.1633, + 0.155, + 0.1467, + 0.7833, + 0.7683 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 26, + "steps": 22, + "score": 0.5962, + "total_reward": 13.1159, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.324, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.99, + 0.9833, + 0.975, + 0.2817, + 0.8683, + 0.265, + 0.2567, + 0.9333, + 0.925, + 0.2317, + 0.2233, + 0.26, + 0.1983, + 0.825, + 0.8167, + 0.8083, + 0.162, + 0.7917, + 0.1453, + 0.775, + 0.6336 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 27, + "steps": 30, + "score": 0.6573, + "total_reward": 13.146, + "completion_rate": 0.867, + "detection_rate": 1.0, + "trust_calibration": 0.829, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.313, + 0.9167, + 0.9083, + 0.318, + 0.3117, + 0.3033, + 0.8667, + 0.85, + 0.1983, + 0.8167, + 0.8, + 0.1483, + 0.7667, + 0.75, + 0.7333, + 0.7167, + 0.8533 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 28, + "steps": 15, + "score": 0.8749, + "total_reward": 13.9979, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.28, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.8833, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.8417, + 0.8333, + 0.7479 + ] + }, + { + "policy": "trained", + "task_type": "task2", + "seed": 29, + "steps": 17, + "score": 0.7998, + "total_reward": 14.3965, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.421, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9417, + 0.9333, + 0.925, + 0.9167, + 0.9083, + 0.9, + 0.8917, + 0.2453, + 0.875, + 0.8667, + 0.8583, + 0.85, + 0.2037, + 0.8333, + 0.825, + 0.8167, + 0.7975 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 0, + "steps": 36, + "score": 0.6105, + "total_reward": 15.2622, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.884, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.9233, + 0.9211, + 0.02, + 0.9144, + 0.9122, + 0.9078, + 0.3556, + 0.02, + 0.02, + 0.8467, + 0.8922, + 0.8378, + 0.8833, + 0.9061, + 0.3067, + 0.02, + 0.8722, + 0.073, + 0.8306, + 0.9061, + 0.3397, + 0.3044, + 0.5035 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 1, + "steps": 40, + "score": 0.7205, + "total_reward": 18.0135, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.662, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9506, + 0.8889, + 0.8822, + 0.91, + 0.8556, + 0.9011, + 0.3167, + 0.8622, + 0.89, + 0.8356, + 0.8633, + 0.3011, + 0.8444, + 0.8378, + 0.8656, + 0.8261, + 0.8589, + 0.8961, + 0.02, + 0.3, + 0.8106, + 0.2933, + 0.7916 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 2, + "steps": 37, + "score": 0.7627, + "total_reward": 17.5411, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.752, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.3203, + 0.9211, + 0.9189, + 0.8822, + 0.91, + 0.8556, + 0.8511, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8311, + 0.8767, + 0.2944, + 0.8972, + 0.87, + 0.8656, + 0.8789, + 0.02, + 0.8722, + 0.8207 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 3, + "steps": 36, + "score": 0.6303, + "total_reward": 16.3887, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.798, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9233, + 0.3411, + 0.8667, + 0.9144, + 0.9372, + 0.91, + 0.3278, + 0.3556, + 0.8511, + 0.8989, + 0.2922, + 0.8878, + 0.3056, + 0.8489, + 0.8767, + 0.8922, + 0.333, + 0.2878, + 0.8283, + 0.8589, + 0.3297, + 0.3552, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 4, + "steps": 41, + "score": 0.6894, + "total_reward": 18.6138, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.405, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9506, + 0.9233, + 0.3711, + 0.8844, + 0.9122, + 0.33, + 0.3186, + 0.9011, + 0.8667, + 0.86, + 0.8878, + 0.8833, + 0.8789, + 0.8767, + 0.8994, + 0.3352, + 0.8678, + 0.3033, + 0.8239, + 0.8744, + 0.8678, + 0.2656, + 0.2933, + 0.2911, + 0.7076 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 5, + "steps": 31, + "score": 0.6062, + "total_reward": 15.1538, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.816, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8933, + 0.9211, + 0.3367, + 0.3344, + 0.3322, + 0.8578, + 0.9306, + 0.9033, + 0.3211, + 0.8467, + 0.9194, + 0.32, + 0.8878, + 0.8856, + 0.8811, + 0.02, + 0.8394, + 0.0752, + 0.87, + 0.8678, + 0.02, + 0.8883, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 6, + "steps": 39, + "score": 0.6337, + "total_reward": 15.8429, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.872, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.8889, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9006, + 0.8511, + 0.3197, + 0.9194, + 0.28, + 0.8556, + 0.8811, + 0.8789, + 0.8422, + 0.8856, + 0.3311, + 0.8589, + 0.0597, + 0.3222, + 0.27, + 0.8728, + 0.02, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 7, + "steps": 32, + "score": 0.7179, + "total_reward": 15.793, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.869, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.8911, + 0.8867, + 0.9144, + 0.86, + 0.9056, + 0.9033, + 0.8989, + 0.8967, + 0.3444, + 0.89, + 0.8356, + 0.9083, + 0.02, + 0.8789, + 0.8744, + 0.87, + 0.8928, + 0.8633, + 0.3111, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 8, + "steps": 31, + "score": 0.7087, + "total_reward": 16.3004, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.701, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3733, + 0.9211, + 0.8844, + 0.9122, + 0.91, + 0.3578, + 0.9056, + 0.9283, + 0.8667, + 0.8944, + 0.84, + 0.3456, + 0.8833, + 0.8811, + 0.8589, + 0.8767, + 0.0774, + 0.835, + 0.8856, + 0.8633, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 9, + "steps": 37, + "score": 0.7151, + "total_reward": 17.877, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.558, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9256, + 0.9233, + 0.8689, + 0.3597, + 0.8822, + 0.935, + 0.02, + 0.3556, + 0.9011, + 0.8967, + 0.8944, + 0.84, + 0.8356, + 0.8489, + 0.8244, + 0.835, + 0.3178, + 0.8656, + 0.8261, + 0.8217, + 0.3044, + 0.85, + 0.7724 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 10, + "steps": 38, + "score": 0.6037, + "total_reward": 17.5072, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.772, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9233, + 0.9461, + 0.8844, + 0.3622, + 0.9078, + 0.02, + 0.3233, + 0.9261, + 0.8967, + 0.8944, + 0.86, + 0.3378, + 0.02, + 0.3263, + 0.8811, + 0.3289, + 0.2967, + 0.8994, + 0.8722, + 0.8678, + 0.3386, + 0.3463, + 0.02, + 0.3089, + 0.8544, + 0.355, + 0.7709 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 11, + "steps": 32, + "score": 0.5881, + "total_reward": 14.703, + "completion_rate": 0.7, + "detection_rate": 0.3333, + "trust_calibration": 0.743, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.9233, + 0.9211, + 0.3459, + 0.8844, + 0.3622, + 0.9078, + 0.9306, + 0.02, + 0.8689, + 0.8967, + 0.8944, + 0.8722, + 0.8878, + 0.8856, + 0.9228, + 0.0819, + 0.2967, + 0.3244, + 0.8722, + 0.02, + 0.3356, + 0.3011, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 12, + "steps": 36, + "score": 0.6249, + "total_reward": 14.9974, + "completion_rate": 0.75, + "detection_rate": 0.3333, + "trust_calibration": 0.716, + "adversarial_detections": 1, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.8933, + 0.3411, + 0.9189, + 0.8644, + 0.9122, + 0.8756, + 0.02, + 0.3441, + 0.3189, + 0.3167, + 0.8622, + 0.8878, + 0.8511, + 0.8789, + 0.8244, + 0.8878, + 0.2856, + 0.0663, + 0.8589, + 0.8961, + 0.8772, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 13, + "steps": 38, + "score": 0.7872, + "total_reward": 18.1053, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.64, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8911, + 0.8667, + 0.9122, + 0.3278, + 0.9056, + 0.8689, + 0.02, + 0.8622, + 0.8378, + 0.8533, + 0.8289, + 0.8767, + 0.8722, + 0.895, + 0.8856, + 0.8633, + 0.8611, + 0.9017, + 0.8939, + 0.3, + 0.8081 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 14, + "steps": 35, + "score": 0.6988, + "total_reward": 18.1679, + "completion_rate": 0.95, + "detection_rate": 0.0, + "trust_calibration": 0.661, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8733, + 0.8889, + 0.8844, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.8989, + 0.8622, + 0.89, + 0.3408, + 0.8856, + 0.8833, + 0.3011, + 0.8267, + 0.8372, + 0.87, + 0.0708, + 0.8833, + 0.2811, + 0.8839, + 0.8544, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 15, + "steps": 38, + "score": 0.68, + "total_reward": 19.0388, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.774, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.9256, + 0.9233, + 0.8689, + 0.3667, + 0.3644, + 0.8922, + 0.935, + 0.8556, + 0.3533, + 0.3211, + 0.3189, + 0.8944, + 0.89, + 0.3378, + 0.8333, + 0.8789, + 0.8767, + 0.8744, + 0.2922, + 0.333, + 0.8656, + 0.8261, + 0.8567, + 0.3, + 0.8478, + 0.8066 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 16, + "steps": 33, + "score": 0.5966, + "total_reward": 15.511, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.691, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3044, + 0.02, + 0.935, + 0.02, + 0.3256, + 0.8711, + 0.8967, + 0.3444, + 0.02, + 0.89, + 0.8556, + 0.8833, + 0.3311, + 0.3289, + 0.8744, + 0.8878, + 0.8633, + 0.3141, + 0.8589, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 17, + "steps": 35, + "score": 0.5891, + "total_reward": 16.4939, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.795, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.02, + 0.3433, + 0.3319, + 0.8644, + 0.3622, + 0.91, + 0.3286, + 0.9033, + 0.9011, + 0.8789, + 0.3697, + 0.9194, + 0.84, + 0.8856, + 0.8811, + 0.8789, + 0.3597, + 0.2692, + 0.87, + 0.2878, + 0.8656, + 0.0663, + 0.8239, + 0.8817, + 0.02, + 0.4835 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 18, + "steps": 35, + "score": 0.6548, + "total_reward": 16.3705, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.573, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9233, + 0.02, + 0.9167, + 0.3644, + 0.3622, + 0.91, + 0.8556, + 0.9033, + 0.8689, + 0.8967, + 0.8922, + 0.915, + 0.02, + 0.8533, + 0.8789, + 0.2967, + 0.3422, + 0.3078, + 0.8656, + 0.8611, + 0.2789, + 0.3297, + 0.7281 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 19, + "steps": 38, + "score": 0.6912, + "total_reward": 17.2799, + "completion_rate": 0.9, + "detection_rate": 0.0, + "trust_calibration": 0.834, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3156, + 0.9211, + 0.9189, + 0.9144, + 0.9122, + 0.33, + 0.02, + 0.9133, + 0.9261, + 0.8967, + 0.8422, + 0.89, + 0.8356, + 0.8511, + 0.8789, + 0.8922, + 0.87, + 0.3178, + 0.8811, + 0.8589, + 0.8544, + 0.87, + 0.3108, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 20, + "steps": 44, + "score": 0.6149, + "total_reward": 19.0606, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.859, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.8711, + 0.8667, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.9033, + 0.3281, + 0.3519, + 0.3467, + 0.8922, + 0.89, + 0.3608, + 0.8856, + 0.8833, + 0.8289, + 0.2714, + 0.87, + 0.3508, + 0.8656, + 0.3089, + 0.0597, + 0.8172, + 0.053, + 0.8728, + 0.8083, + 0.3439, + 0.2567, + 0.5146 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 21, + "steps": 30, + "score": 0.7401, + "total_reward": 15.543, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.8911, + 0.8867, + 0.9144, + 0.3322, + 0.9078, + 0.9056, + 0.8711, + 0.8467, + 0.8922, + 0.89, + 0.02, + 0.8856, + 0.8811, + 0.8589, + 0.8744, + 0.87, + 0.8856, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 22, + "steps": 45, + "score": 0.5529, + "total_reward": 16.5871, + "completion_rate": 0.7, + "detection_rate": 0.0, + "trust_calibration": 0.709, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3548, + 0.02, + 0.9311, + 0.8844, + 0.3622, + 0.36, + 0.9078, + 0.2933, + 0.8689, + 0.9217, + 0.3444, + 0.32, + 0.8556, + 0.3033, + 0.3011, + 0.8767, + 0.3022, + 0.87, + 0.8678, + 0.8811, + 0.0619, + 0.8544, + 0.3022, + 0.8478, + 0.8083, + 0.3119, + 0.0397, + 0.8322, + 0.4222 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 23, + "steps": 31, + "score": 0.7921, + "total_reward": 18.2193, + "completion_rate": 0.95, + "detection_rate": 0.5, + "trust_calibration": 0.847, + "adversarial_detections": 1, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8756, + 0.9233, + 0.3481, + 0.9439, + 0.9167, + 0.9144, + 0.86, + 0.9078, + 0.9033, + 0.9261, + 0.8967, + 0.8944, + 0.8922, + 0.89, + 0.8878, + 0.02, + 0.9228, + 0.0819, + 0.8744, + 0.87, + 0.8833, + 0.7254 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 24, + "steps": 45, + "score": 0.6293, + "total_reward": 16.3622, + "completion_rate": 0.75, + "detection_rate": 1.0, + "trust_calibration": 0.813, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3156, + 0.9483, + 0.9461, + 0.3197, + 0.9072, + 0.8756, + 0.9033, + 0.3541, + 0.02, + 0.9044, + 0.02, + 0.3608, + 0.8511, + 0.2967, + 0.8722, + 0.8356, + 0.8111, + 0.2867, + 0.8544, + 0.845, + 0.3156, + 0.8061, + 0.8367, + 0.7972, + 0.7658 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 25, + "steps": 34, + "score": 0.6745, + "total_reward": 16.8613, + "completion_rate": 0.85, + "detection_rate": 0.0, + "trust_calibration": 0.809, + "adversarial_detections": 0, + "adversarial_poisonings": 2, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8933, + 0.9461, + 0.9189, + 0.8644, + 0.9122, + 0.91, + 0.8556, + 0.8711, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.8378, + 0.3356, + 0.8833, + 0.8489, + 0.0797, + 0.3244, + 0.3222, + 0.8878, + 0.8906, + 0.9061, + 0.2967, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 26, + "steps": 43, + "score": 0.5813, + "total_reward": 17.4397, + "completion_rate": 0.75, + "detection_rate": 0.0, + "trust_calibration": 0.815, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8933, + 0.9211, + 0.9439, + 0.9167, + 0.9144, + 0.02, + 0.3308, + 0.8711, + 0.8467, + 0.8922, + 0.8878, + 0.3486, + 0.3033, + 0.2759, + 0.8767, + 0.3244, + 0.3452, + 0.29, + 0.8156, + 0.8633, + 0.2889, + 0.0597, + 0.8544, + 0.3372, + 0.8478, + 0.2956, + 0.2811, + 0.2889, + 0.4707 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 27, + "steps": 40, + "score": 0.5674, + "total_reward": 15.3205, + "completion_rate": 0.7, + "detection_rate": 0.25, + "trust_calibration": 0.816, + "adversarial_detections": 1, + "adversarial_poisonings": 3, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9206, + 0.3433, + 0.3541, + 0.9189, + 0.9244, + 0.3622, + 0.9078, + 0.9133, + 0.8689, + 0.8644, + 0.885, + 0.02, + 0.8533, + 0.9061, + 0.8267, + 0.9139, + 0.073, + 0.3356, + 0.3441, + 0.0619, + 0.8994, + 0.2722, + 0.323, + 0.3308, + 0.8433, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 28, + "steps": 32, + "score": 0.6685, + "total_reward": 16.0443, + "completion_rate": 0.8, + "detection_rate": 0.0, + "trust_calibration": 0.793, + "adversarial_detections": 0, + "adversarial_poisonings": 1, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.8956, + 0.9211, + 0.9189, + 0.8844, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3763, + 0.02, + 0.3167, + 0.8944, + 0.89, + 0.9128, + 0.8856, + 0.8461, + 0.8589, + 0.3267, + 0.8372, + 0.9128, + 0.2933, + 0.3461, + 0.01 + ] + }, + { + "policy": "random", + "task_type": "task3", + "seed": 29, + "steps": 40, + "score": 0.6868, + "total_reward": 17.8577, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.907, + "adversarial_detections": 1, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.8933, + 0.3411, + 0.9189, + 0.02, + 0.8822, + 0.9028, + 0.02, + 0.8511, + 0.8989, + 0.3467, + 0.3444, + 0.8578, + 0.8856, + 0.8511, + 0.2989, + 0.9094, + 0.87, + 0.8856, + 0.3641, + 0.8567, + 0.3572, + 0.8895, + 0.8083, + 0.8353 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 0, + "steps": 43, + "score": 0.7114, + "total_reward": 18.4969, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.729, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8611, + 0.2867, + 0.8522, + 0.8478, + 0.8433, + 0.8389, + 0.7841 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 1, + "steps": 29, + "score": 0.7083, + "total_reward": 17.707, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6632 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 2, + "steps": 29, + "score": 0.6919, + "total_reward": 17.2983, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.561, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.3456, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6065 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 3, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8008, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.843, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8675 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 4, + "steps": 29, + "score": 0.7165, + "total_reward": 17.9128, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.721, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.664 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 5, + "steps": 46, + "score": 0.7558, + "total_reward": 18.1385, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.832, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8229 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 6, + "steps": 27, + "score": 0.6991, + "total_reward": 16.778, + "completion_rate": 0.85, + "detection_rate": 0.4, + "trust_calibration": 0.725, + "adversarial_detections": 4, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6387 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 7, + "steps": 42, + "score": 0.7756, + "total_reward": 19.3902, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8478 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 8, + "steps": 44, + "score": 0.809, + "total_reward": 19.4157, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.853, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3526, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8654 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 9, + "steps": 40, + "score": 0.782, + "total_reward": 19.5499, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.837, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8528 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 10, + "steps": 31, + "score": 0.712, + "total_reward": 17.8008, + "completion_rate": 0.85, + "detection_rate": 0.625, + "trust_calibration": 0.448, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.3281, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.3356, + 0.6281 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 11, + "steps": 40, + "score": 0.7732, + "total_reward": 18.5566, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8349 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 12, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 13, + "steps": 39, + "score": 0.833, + "total_reward": 18.3252, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.811, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8485 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 14, + "steps": 29, + "score": 0.6889, + "total_reward": 17.9127, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.609, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.3689, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.6353 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 15, + "steps": 30, + "score": 0.6847, + "total_reward": 18.4869, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.635, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3608, + 0.9256, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.0863, + 0.0841, + 0.9184, + 0.9139, + 0.9095, + 0.6404 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 16, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 17, + "steps": 46, + "score": 0.8048, + "total_reward": 19.316, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.842, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8605 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 18, + "steps": 26, + "score": 0.6967, + "total_reward": 16.7213, + "completion_rate": 0.85, + "detection_rate": 0.3333, + "trust_calibration": 0.701, + "adversarial_detections": 3, + "adversarial_poisonings": 6, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.6149 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 19, + "steps": 20, + "score": 0.6193, + "total_reward": 13.0053, + "completion_rate": 0.65, + "detection_rate": 0.0, + "trust_calibration": 0.576, + "adversarial_detections": 0, + "adversarial_poisonings": 5, + "status": "failed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3437, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.01 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 20, + "steps": 46, + "score": 0.7498, + "total_reward": 19.4938, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8412 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 21, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8676 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 22, + "steps": 36, + "score": 0.7334, + "total_reward": 21.2675, + "completion_rate": 1.0, + "detection_rate": 0.8, + "trust_calibration": 0.747, + "adversarial_detections": 4, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.9256, + 0.9233, + 0.3481, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.3326, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.3214, + 0.8922, + 0.89, + 0.3148, + 0.8856, + 0.0863, + 0.9206, + 0.9161, + 0.9117, + 0.9073, + 0.8789, + 0.8544, + 0.7968 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 23, + "steps": 28, + "score": 0.6922, + "total_reward": 17.3057, + "completion_rate": 0.85, + "detection_rate": 0.4444, + "trust_calibration": 0.645, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.3433, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6298 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 24, + "steps": 46, + "score": 0.7725, + "total_reward": 20.0838, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.836, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.9483, + 0.3711, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8591 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 25, + "steps": 34, + "score": 0.6755, + "total_reward": 18.9148, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.71, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.3189, + 0.3167, + 0.8944, + 0.3122, + 0.31, + 0.3078, + 0.8856, + 0.9011, + 0.8967, + 0.8922, + 0.3078, + 0.3033, + 0.8789, + 0.782 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 26, + "steps": 34, + "score": 0.6561, + "total_reward": 19.0282, + "completion_rate": 0.8, + "detection_rate": 0.6667, + "trust_calibration": 0.467, + "adversarial_detections": 2, + "adversarial_poisonings": 1, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.3578, + 0.3556, + 0.3533, + 0.3289, + 0.8967, + 0.8944, + 0.8922, + 0.317, + 0.8878, + 0.3126, + 0.8833, + 0.0841, + 0.9184, + 0.9139, + 0.8878, + 0.8656, + 0.3163, + 0.3419, + 0.6213 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 27, + "steps": 46, + "score": 0.7256, + "total_reward": 20.3155, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.828, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.8395 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 28, + "steps": 28, + "score": 0.7355, + "total_reward": 17.6509, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.725, + "adversarial_detections": 5, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.6841 + ] + }, + { + "policy": "heuristic", + "task_type": "task3", + "seed": 29, + "steps": 28, + "score": 0.7258, + "total_reward": 18.144, + "completion_rate": 0.9, + "detection_rate": 0.5, + "trust_calibration": 0.709, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.3281, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.6801 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 0, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8904 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 1, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 2, + "steps": 42, + "score": 0.8422, + "total_reward": 18.5276, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8724 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 3, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 4, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1154, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 5, + "steps": 46, + "score": 0.7916, + "total_reward": 18.9976, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.917, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8618 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 6, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8523, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 7, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 8, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3315, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8857 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 9, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 10, + "steps": 42, + "score": 0.8421, + "total_reward": 18.5263, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.928, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.871 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 11, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 12, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 13, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 14, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8853 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 15, + "steps": 46, + "score": 0.8162, + "total_reward": 19.5883, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.93, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8825 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 16, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8523, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 17, + "steps": 42, + "score": 0.867, + "total_reward": 19.0739, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.935, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8903 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 18, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8108, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 19, + "steps": 42, + "score": 0.8689, + "total_reward": 19.1153, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8901 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 20, + "steps": 46, + "score": 0.7653, + "total_reward": 18.3663, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.909, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8423 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 21, + "steps": 40, + "score": 0.8958, + "total_reward": 18.8109, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.932, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.894 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 22, + "steps": 46, + "score": 0.7652, + "total_reward": 18.3659, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.908, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.8419 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 23, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4862, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3556, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8727 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 24, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8854 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 25, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3314, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.934, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.3511, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8857 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 26, + "steps": 42, + "score": 0.8403, + "total_reward": 18.4855, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.3067, + 0.3022, + 0.8678, + 0.8633, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.872 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 27, + "steps": 46, + "score": 0.8179, + "total_reward": 19.6285, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.924, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.2978, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8739, + 0.881 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 28, + "steps": 40, + "score": 0.8977, + "total_reward": 18.8524, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.931, + "adversarial_detections": 7, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.9095, + 0.905, + 0.9006, + 0.8961, + 0.8917, + 0.8873, + 0.8938 + ] + }, + { + "policy": "oracle_lite", + "task_type": "task3", + "seed": 29, + "steps": 44, + "score": 0.8405, + "total_reward": 19.3311, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.933, + "adversarial_detections": 6, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.3244, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.8589, + 0.8961, + 0.8917, + 0.8873, + 0.8828, + 0.8784, + 0.8854 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 0, + "steps": 46, + "score": 0.7123, + "total_reward": 18.5191, + "completion_rate": 0.85, + "detection_rate": 1.0, + "trust_calibration": 0.729, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.3344, + 0.91, + 0.9056, + 0.9011, + 0.3267, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.3, + 0.2956, + 0.8789, + 0.2844, + 0.8678, + 0.8611, + 0.8367, + 0.8322, + 0.7797 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 1, + "steps": 30, + "score": 0.7434, + "total_reward": 18.5852, + "completion_rate": 0.9, + "detection_rate": 0.625, + "trust_calibration": 0.721, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.716 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 2, + "steps": 28, + "score": 0.7198, + "total_reward": 17.2756, + "completion_rate": 0.85, + "detection_rate": 0.5556, + "trust_calibration": 0.559, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.3456, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.6418 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 3, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8008, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.843, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8675 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 4, + "steps": 28, + "score": 0.7455, + "total_reward": 17.8908, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.722, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.3414, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.7 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 5, + "steps": 46, + "score": 0.7558, + "total_reward": 18.1385, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.832, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.3422, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.32, + 0.8856, + 0.3111, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8229 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 6, + "steps": 27, + "score": 0.7645, + "total_reward": 17.5844, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.711, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.6995 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 7, + "steps": 42, + "score": 0.7756, + "total_reward": 19.3902, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.3919, + 0.3997, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.2933, + 0.8589, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8478 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 8, + "steps": 44, + "score": 0.809, + "total_reward": 19.4157, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.853, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3526, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.3333, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8654 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 9, + "steps": 40, + "score": 0.782, + "total_reward": 19.5499, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.837, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.3841, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8528 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 10, + "steps": 30, + "score": 0.7394, + "total_reward": 17.7466, + "completion_rate": 0.85, + "detection_rate": 0.7143, + "trust_calibration": 0.434, + "adversarial_detections": 5, + "adversarial_poisonings": 2, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.3303, + 0.3281, + 0.8989, + 0.0997, + 0.0974, + 0.9317, + 0.9273, + 0.9228, + 0.9184, + 0.9139, + 0.3378, + 0.6536 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 11, + "steps": 40, + "score": 0.7732, + "total_reward": 18.5566, + "completion_rate": 0.9, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.2978, + 0.2933, + 0.8589, + 0.8544, + 0.85, + 0.8456, + 0.8349 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 12, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 13, + "steps": 39, + "score": 0.833, + "total_reward": 18.3252, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.811, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3456, + 0.3433, + 0.9189, + 0.9144, + 0.91, + 0.9056, + 0.9011, + 0.8967, + 0.8922, + 0.8878, + 0.8833, + 0.8789, + 0.8744, + 0.87, + 0.8656, + 0.8611, + 0.8567, + 0.8522, + 0.8478, + 0.8485 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 14, + "steps": 30, + "score": 0.7228, + "total_reward": 18.7931, + "completion_rate": 0.9, + "detection_rate": 0.625, + "trust_calibration": 0.609, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.3689, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.3237, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.6881 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 15, + "steps": 30, + "score": 0.7426, + "total_reward": 19.3077, + "completion_rate": 0.95, + "detection_rate": 0.625, + "trust_calibration": 0.622, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3608, + 0.9256, + 0.9233, + 0.9211, + 0.3459, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.3348, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.8944, + 0.8922, + 0.093, + 0.0908, + 0.0886, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.7087 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 16, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.8411, + 0.8676 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 17, + "steps": 46, + "score": 0.8048, + "total_reward": 19.316, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.842, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.3467, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.28, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8605 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 18, + "steps": 27, + "score": 0.7333, + "total_reward": 17.5998, + "completion_rate": 0.9, + "detection_rate": 0.4444, + "trust_calibration": 0.701, + "adversarial_detections": 4, + "adversarial_poisonings": 5, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.0997, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6635 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 19, + "steps": 29, + "score": 0.7268, + "total_reward": 18.1697, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.66, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3478, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.3437, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.6822 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 20, + "steps": 46, + "score": 0.7498, + "total_reward": 19.4938, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.835, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.3841, + 0.9167, + 0.9122, + 0.3378, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.8456, + 0.8411, + 0.8367, + 0.8322, + 0.8412 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 21, + "steps": 42, + "score": 0.8546, + "total_reward": 18.8009, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.844, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9256, + 0.9211, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.8544, + 0.85, + 0.2756, + 0.8411, + 0.8676 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 22, + "steps": 35, + "score": 0.7608, + "total_reward": 21.3031, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.748, + "adversarial_detections": 5, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.3778, + 0.9256, + 0.9233, + 0.3481, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.337, + 0.9078, + 0.3326, + 0.9033, + 0.9011, + 0.3259, + 0.8967, + 0.3214, + 0.8922, + 0.89, + 0.3148, + 0.8856, + 0.9228, + 0.9184, + 0.9139, + 0.9095, + 0.905, + 0.8767, + 0.8592 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 23, + "steps": 27, + "score": 0.7184, + "total_reward": 17.2411, + "completion_rate": 0.85, + "detection_rate": 0.5, + "trust_calibration": 0.629, + "adversarial_detections": 4, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.3433, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.8989, + 0.8967, + 0.0974, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.6449 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 24, + "steps": 46, + "score": 0.7725, + "total_reward": 20.0838, + "completion_rate": 1.0, + "detection_rate": 1.0, + "trust_calibration": 0.836, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.3686, + 0.9483, + 0.3711, + 0.9167, + 0.9122, + 0.9078, + 0.9033, + 0.8989, + 0.8944, + 0.89, + 0.8856, + 0.8811, + 0.8767, + 0.3022, + 0.8678, + 0.8633, + 0.2889, + 0.8544, + 0.85, + 0.8456, + 0.2711, + 0.8367, + 0.8322, + 0.8591 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 25, + "steps": 34, + "score": 0.6755, + "total_reward": 18.9148, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.71, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.3689, + 0.3667, + 0.9144, + 0.3322, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.3189, + 0.3167, + 0.8944, + 0.3122, + 0.31, + 0.3078, + 0.8856, + 0.9011, + 0.8967, + 0.8922, + 0.3078, + 0.3033, + 0.8789, + 0.782 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 26, + "steps": 33, + "score": 0.6816, + "total_reward": 19.0854, + "completion_rate": 0.8, + "detection_rate": 1.0, + "trust_calibration": 0.492, + "adversarial_detections": 3, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9528, + 0.9506, + 0.9483, + 0.3711, + 0.8989, + 0.3667, + 0.3644, + 0.9372, + 0.935, + 0.3578, + 0.3556, + 0.3533, + 0.3289, + 0.8967, + 0.8944, + 0.8922, + 0.317, + 0.8878, + 0.3126, + 0.8833, + 0.9206, + 0.9161, + 0.9117, + 0.8856, + 0.8633, + 0.3141, + 0.7297 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 27, + "steps": 46, + "score": 0.7256, + "total_reward": 20.3155, + "completion_rate": 0.95, + "detection_rate": 1.0, + "trust_calibration": 0.828, + "adversarial_detections": 0, + "adversarial_poisonings": 0, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.3763, + 0.9211, + 0.9189, + 0.3997, + 0.3994, + 0.3972, + 0.9078, + 0.9033, + 0.3289, + 0.8944, + 0.89, + 0.3156, + 0.8811, + 0.8767, + 0.8722, + 0.8678, + 0.8633, + 0.8589, + 0.2844, + 0.85, + 0.2756, + 0.8411, + 0.8367, + 0.8322, + 0.8395 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 28, + "steps": 27, + "score": 0.7645, + "total_reward": 17.5845, + "completion_rate": 0.9, + "detection_rate": 0.5556, + "trust_calibration": 0.712, + "adversarial_detections": 5, + "adversarial_poisonings": 4, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.9122, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.9011, + 0.1019, + 0.0997, + 0.0974, + 0.0952, + 0.9295, + 0.925, + 0.9206, + 0.9161, + 0.6996 + ] + }, + { + "policy": "trained", + "task_type": "task3", + "seed": 29, + "steps": 29, + "score": 0.761, + "total_reward": 19.0244, + "completion_rate": 0.95, + "detection_rate": 0.625, + "trust_calibration": 0.709, + "adversarial_detections": 5, + "adversarial_poisonings": 3, + "status": "completed", + "difficulty_profile": { + "adaptive": false, + "episodes_seen": 0, + "rolling_detection_rate": 0.0, + "adversarial_threshold": 0.7, + "high_stakes_ratio": 0.35, + "verify_budget_penalty": 0, + "adversary_benign_confidence": 0.88, + "adversary_poison_confidence": 0.92 + }, + "rewards": [ + 0.9278, + 0.9256, + 0.9233, + 0.9211, + 0.9189, + 0.9167, + 0.9144, + 0.3392, + 0.91, + 0.9078, + 0.9056, + 0.9033, + 0.3281, + 0.8989, + 0.8967, + 0.8944, + 0.0952, + 0.093, + 0.0908, + 0.925, + 0.9206, + 0.9161, + 0.9117, + 0.7329 + ] + } + ] +}