sentinel-env / outputs /eval_post.json
XcodeAddy's picture
Add GPU trust environment and GRPO replay pipeline
a36db1b
{
"task": "all",
"tasks": [
"task1",
"task2",
"task3"
],
"episodes_per_policy": 30,
"adaptive": false,
"difficulty_controller": {
"adaptive": true,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"difficulty_controller_by_task_policy": {
"task1": {
"random": {},
"heuristic": {},
"oracle_lite": {},
"trained": {}
},
"task2": {
"random": {},
"heuristic": {},
"oracle_lite": {},
"trained": {}
},
"task3": {
"random": {},
"heuristic": {},
"oracle_lite": {},
"trained": {}
}
},
"summary": {
"random": {
"episodes": 90,
"avg_score": 0.6904,
"avg_completion_rate": 0.8131,
"avg_detection_rate": 0.7935,
"avg_trust_calibration": 0.4453,
"avg_steps": 26.2111
},
"heuristic": {
"episodes": 90,
"avg_score": 0.7817,
"avg_completion_rate": 0.8918,
"avg_detection_rate": 0.9178,
"avg_trust_calibration": 0.4373,
"avg_steps": 24.4
},
"oracle_lite": {
"episodes": 90,
"avg_score": 0.8405,
"avg_completion_rate": 0.8687,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.5892,
"avg_steps": 29.4444
},
"trained": {
"episodes": 90,
"avg_score": 0.788,
"avg_completion_rate": 0.8979,
"avg_detection_rate": 0.9437,
"avg_trust_calibration": 0.4378,
"avg_steps": 24.5
}
},
"by_task": {
"task1": {
"random": {
"episodes": 30,
"avg_score": 0.7635,
"avg_completion_rate": 0.76,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.0,
"avg_steps": 15.1333
},
"heuristic": {
"episodes": 30,
"avg_score": 0.8504,
"avg_completion_rate": 0.84,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.0,
"avg_steps": 13.8333
},
"oracle_lite": {
"episodes": 30,
"avg_score": 0.9011,
"avg_completion_rate": 0.7167,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.0,
"avg_steps": 16.0
},
"trained": {
"episodes": 30,
"avg_score": 0.8504,
"avg_completion_rate": 0.84,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.0,
"avg_steps": 13.8333
}
},
"task2": {
"random": {
"episodes": 30,
"avg_score": 0.6472,
"avg_completion_rate": 0.8644,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.5829,
"avg_steps": 26.7667
},
"heuristic": {
"episodes": 30,
"avg_score": 0.7497,
"avg_completion_rate": 0.9288,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.5737,
"avg_steps": 23.2333
},
"oracle_lite": {
"episodes": 30,
"avg_score": 0.7638,
"avg_completion_rate": 0.9045,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.8377,
"avg_steps": 30.0
},
"trained": {
"episodes": 30,
"avg_score": 0.7497,
"avg_completion_rate": 0.9288,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.5737,
"avg_steps": 23.2333
}
},
"task3": {
"random": {
"episodes": 30,
"avg_score": 0.6606,
"avg_completion_rate": 0.815,
"avg_detection_rate": 0.3806,
"avg_trust_calibration": 0.7531,
"avg_steps": 36.7333
},
"heuristic": {
"episodes": 30,
"avg_score": 0.7449,
"avg_completion_rate": 0.9067,
"avg_detection_rate": 0.7534,
"avg_trust_calibration": 0.7383,
"avg_steps": 36.1333
},
"oracle_lite": {
"episodes": 30,
"avg_score": 0.8567,
"avg_completion_rate": 0.985,
"avg_detection_rate": 1.0,
"avg_trust_calibration": 0.9299,
"avg_steps": 42.3333
},
"trained": {
"episodes": 30,
"avg_score": 0.7637,
"avg_completion_rate": 0.925,
"avg_detection_rate": 0.8312,
"avg_trust_calibration": 0.7396,
"avg_steps": 36.4333
}
}
},
"episodes": [
{
"policy": "random",
"task_type": "task1",
"seed": 0,
"steps": 15,
"score": 0.6569,
"total_reward": 7.8825,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.914,
0.962,
0.962,
0.02,
0.962,
0.962,
0.962,
0.3645,
0.02,
0.02,
0.867
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 1,
"steps": 15,
"score": 0.7996,
"total_reward": 7.196,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.982,
0.914,
0.914,
0.962,
0.867,
0.962,
0.3165
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 2,
"steps": 15,
"score": 0.8129,
"total_reward": 8.1294,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.369,
0.3024,
0.962,
0.962,
0.914,
0.962,
0.867,
0.867,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 3,
"steps": 14,
"score": 0.8084,
"total_reward": 10.5095,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.962,
0.3455,
0.867,
0.962,
0.946,
0.962,
0.3455,
0.3645,
0.867,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 4,
"steps": 15,
"score": 0.7814,
"total_reward": 8.5956,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.982,
0.962,
0.3645,
0.914,
0.962,
0.3455,
0.3136,
0.962,
0.914
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 5,
"steps": 15,
"score": 0.725,
"total_reward": 8.7,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.914,
0.962,
0.3165,
0.3455,
0.3455,
0.867,
0.946,
0.962,
0.3455,
0.867
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 6,
"steps": 15,
"score": 0.8118,
"total_reward": 8.1182,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.914,
0.914,
0.962,
0.962,
0.962,
0.962,
0.934,
0.867,
0.3206
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 7,
"steps": 15,
"score": 0.9334,
"total_reward": 9.334,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.867,
0.914,
0.914,
0.962,
0.867,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 8,
"steps": 15,
"score": 0.8425,
"total_reward": 9.2675,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.369,
0.962,
0.914,
0.962,
0.962,
0.3645,
0.962,
0.982,
0.914
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 9,
"steps": 15,
"score": 0.7751,
"total_reward": 9.3011,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.962,
0.962,
0.867,
0.3616,
0.914,
0.3645,
0.02,
0.982,
0.962,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 10,
"steps": 15,
"score": 0.7653,
"total_reward": 8.418,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.982,
0.914,
0.982,
0.962,
0.02,
0.3455,
0.3645,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 11,
"steps": 15,
"score": 0.8199,
"total_reward": 9.8394,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.867,
0.962,
0.962,
0.3504,
0.914,
0.982,
0.962,
0.982,
0.02,
0.914,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 12,
"steps": 15,
"score": 0.6163,
"total_reward": 7.3956,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.914,
0.3455,
0.962,
0.898,
0.962,
0.914,
0.02,
0.3616,
0.3455,
0.3455
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 13,
"steps": 15,
"score": 0.7283,
"total_reward": 6.555,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.914,
0.867,
0.962,
0.934,
0.962,
0.914,
0.02
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 14,
"steps": 17,
"score": 0.8867,
"total_reward": 10.6405,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.867,
0.914,
0.914,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.914
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 15,
"steps": 16,
"score": 0.6915,
"total_reward": 9.6809,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3504,
0.962,
0.962,
0.867,
0.3645,
0.3645,
0.982,
0.3645,
0.867,
0.982,
0.3455,
0.3455,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 16,
"steps": 15,
"score": 0.7164,
"total_reward": 9.313,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.2975,
0.02,
0.982,
0.02,
0.3455,
0.914,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 17,
"steps": 15,
"score": 0.6495,
"total_reward": 8.4439,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.02,
0.3455,
0.3136,
0.867,
0.982,
0.962,
0.3206,
0.962,
0.962,
0.982,
0.3826
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 18,
"steps": 15,
"score": 0.8235,
"total_reward": 9.8815,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.02,
0.962,
0.982,
0.3645,
0.962,
0.867,
0.962,
0.914,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 19,
"steps": 15,
"score": 0.7588,
"total_reward": 8.347,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2975,
0.962,
0.962,
0.962,
0.962,
0.3455,
0.02,
0.93,
0.982,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 20,
"steps": 15,
"score": 0.6444,
"total_reward": 7.7329,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.867,
0.867,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.3504,
0.3686,
0.3645
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 21,
"steps": 15,
"score": 0.8756,
"total_reward": 9.6315,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.914,
0.914,
0.962,
0.3455,
0.962,
0.962,
0.914,
0.867
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 22,
"steps": 15,
"score": 0.6437,
"total_reward": 7.0809,
"completion_rate": 0.5,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3504,
0.02,
0.93,
0.914,
0.3645,
0.3645,
0.962,
0.2975,
0.914,
0.982
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 23,
"steps": 15,
"score": 0.8985,
"total_reward": 10.7824,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.867,
0.962,
0.3504,
0.982,
0.962,
0.962,
0.867,
0.962,
0.962,
0.982,
0.962
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 24,
"steps": 16,
"score": 0.6933,
"total_reward": 7.6267,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2975,
0.982,
0.982,
0.3066,
0.934,
0.914,
0.962,
0.3686,
0.02,
0.93
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 25,
"steps": 15,
"score": 0.8266,
"total_reward": 9.0928,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.914,
0.982,
0.962,
0.867,
0.962,
0.962,
0.867,
0.914,
0.962,
0.3504
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 26,
"steps": 15,
"score": 0.7833,
"total_reward": 7.8326,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.914,
0.962,
0.982,
0.962,
0.962,
0.02,
0.3206,
0.914,
0.898
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 27,
"steps": 16,
"score": 0.8311,
"total_reward": 9.1421,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.934,
0.3455,
0.3546,
0.962,
0.93,
0.982,
0.962,
0.93,
0.914,
0.914
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 28,
"steps": 15,
"score": 0.7196,
"total_reward": 8.6356,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.914,
0.962,
0.962,
0.914,
0.962,
0.962,
0.962,
0.962,
0.3826,
0.02,
0.3165
]
},
{
"policy": "random",
"task_type": "task1",
"seed": 29,
"steps": 15,
"score": 0.5851,
"total_reward": 7.021,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.914,
0.3455,
0.962,
0.02,
0.914,
0.3165,
0.02,
0.867,
0.962,
0.369
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 0,
"steps": 13,
"score": 0.753,
"total_reward": 10.5415,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.3455,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 1,
"steps": 12,
"score": 0.7843,
"total_reward": 10.196,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 2,
"steps": 11,
"score": 0.8612,
"total_reward": 10.3345,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.369,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 3,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 4,
"steps": 11,
"score": 0.911,
"total_reward": 10.9324,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 5,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3525,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 6,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 7,
"steps": 16,
"score": 0.8166,
"total_reward": 9.7988,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.962,
0.3826,
0.3896,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 8,
"steps": 16,
"score": 0.8399,
"total_reward": 8.3989,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 9,
"steps": 16,
"score": 0.785,
"total_reward": 10.2052,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3756,
0.962,
0.3896,
0.391,
0.391,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 10,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 11,
"steps": 16,
"score": 0.7843,
"total_reward": 10.196,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 12,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 13,
"steps": 16,
"score": 0.9003,
"total_reward": 9.0035,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 14,
"steps": 13,
"score": 0.7534,
"total_reward": 10.5473,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.3756,
0.962,
0.3896,
0.391,
0.3645,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 15,
"steps": 13,
"score": 0.8312,
"total_reward": 11.6374,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3546,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 16,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 17,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 18,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 19,
"steps": 12,
"score": 0.8675,
"total_reward": 11.2779,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3455,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 20,
"steps": 16,
"score": 0.7993,
"total_reward": 8.7927,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.3756,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 21,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 22,
"steps": 15,
"score": 0.772,
"total_reward": 12.3526,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.369,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.3504,
0.962,
0.962,
0.3504,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 23,
"steps": 11,
"score": 0.8606,
"total_reward": 10.3271,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3616,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 24,
"steps": 16,
"score": 0.8161,
"total_reward": 9.7931,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3616,
0.982,
0.369,
0.982,
0.3645,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 25,
"steps": 14,
"score": 0.6506,
"total_reward": 9.7585,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.946,
0.369,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 26,
"steps": 16,
"score": 0.7054,
"total_reward": 11.2865,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.946,
0.982,
0.3645,
0.946,
0.982,
0.3645,
0.3645,
0.369,
0.369,
0.3645,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 27,
"steps": 15,
"score": 0.6937,
"total_reward": 9.0177,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.962,
0.962,
0.3896,
0.391,
0.391,
0.391,
0.962,
0.3525,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 28,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "heuristic",
"task_type": "task1",
"seed": 29,
"steps": 11,
"score": 0.911,
"total_reward": 10.9324,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 0,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 1,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 2,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 3,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 4,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3525,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 5,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3525,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 6,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 7,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 8,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.3525,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 9,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 10,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 11,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 12,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 13,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 14,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 15,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 16,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 17,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 18,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 19,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 20,
"steps": 16,
"score": 0.7588,
"total_reward": 6.8295,
"completion_rate": 0.5,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.3525,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 21,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 22,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 23,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3525,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 24,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 25,
"steps": 16,
"score": 0.7588,
"total_reward": 6.8295,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3525
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 26,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 27,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 28,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "oracle_lite",
"task_type": "task1",
"seed": 29,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3525
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 0,
"steps": 13,
"score": 0.753,
"total_reward": 10.5415,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.3455,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 1,
"steps": 12,
"score": 0.7843,
"total_reward": 10.196,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 2,
"steps": 11,
"score": 0.8612,
"total_reward": 10.3345,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.369,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 3,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 4,
"steps": 11,
"score": 0.911,
"total_reward": 10.9324,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 5,
"steps": 16,
"score": 0.8266,
"total_reward": 7.439,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3525,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 6,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 7,
"steps": 16,
"score": 0.8166,
"total_reward": 9.7988,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.962,
0.3826,
0.3896,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 8,
"steps": 16,
"score": 0.8399,
"total_reward": 8.3989,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3525,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 9,
"steps": 16,
"score": 0.785,
"total_reward": 10.2052,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.3756,
0.962,
0.3896,
0.391,
0.391,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 10,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 11,
"steps": 16,
"score": 0.7843,
"total_reward": 10.196,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 12,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 13,
"steps": 16,
"score": 0.9003,
"total_reward": 9.0035,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 14,
"steps": 13,
"score": 0.7534,
"total_reward": 10.5473,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.3756,
0.962,
0.3896,
0.391,
0.3645,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 15,
"steps": 13,
"score": 0.8312,
"total_reward": 11.6374,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3546,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 16,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 17,
"steps": 16,
"score": 0.8943,
"total_reward": 8.0485,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 18,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 19,
"steps": 12,
"score": 0.8675,
"total_reward": 11.2779,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3455,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 20,
"steps": 16,
"score": 0.7993,
"total_reward": 8.7927,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.3756,
0.962,
0.962,
0.3525,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 21,
"steps": 16,
"score": 0.962,
"total_reward": 8.658,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 22,
"steps": 15,
"score": 0.772,
"total_reward": 12.3526,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.369,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.3504,
0.962,
0.962,
0.3504,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 23,
"steps": 11,
"score": 0.8606,
"total_reward": 10.3271,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3616,
0.3455,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 24,
"steps": 16,
"score": 0.8161,
"total_reward": 9.7931,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.3616,
0.982,
0.369,
0.982,
0.3645,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 25,
"steps": 14,
"score": 0.6506,
"total_reward": 9.7585,
"completion_rate": 0.7,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.3645,
0.982,
0.946,
0.369,
0.982,
0.3645,
0.3645,
0.982,
0.3645,
0.3645
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 26,
"steps": 16,
"score": 0.7054,
"total_reward": 11.2865,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.982,
0.982,
0.982,
0.3645,
0.946,
0.982,
0.3645,
0.946,
0.982,
0.3645,
0.3645,
0.369,
0.369,
0.3645,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 27,
"steps": 15,
"score": 0.6937,
"total_reward": 9.0177,
"completion_rate": 0.6,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.3686,
0.962,
0.962,
0.3896,
0.391,
0.391,
0.391,
0.962,
0.3525,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 28,
"steps": 10,
"score": 0.962,
"total_reward": 10.582,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962
]
},
{
"policy": "trained",
"task_type": "task1",
"seed": 29,
"steps": 11,
"score": 0.911,
"total_reward": 10.9324,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.0,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.962,
0.3504,
0.962,
0.962,
0.962
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 0,
"steps": 25,
"score": 0.5298,
"total_reward": 9.5365,
"completion_rate": 0.667,
"detection_rate": 1.0,
"trust_calibration": 0.461,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9083,
0.925,
0.9167,
0.02,
0.8917,
0.8833,
0.8667,
0.2233,
0.02,
0.02,
0.725,
0.8083,
0.6917,
0.775,
0.1317,
0.115,
0.5949
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 1,
"steps": 31,
"score": 0.7252,
"total_reward": 13.054,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.569,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9833,
0.8833,
0.8583,
0.875,
0.7583,
0.8417,
0.165,
0.7833,
0.8,
0.6833,
0.735,
0.1217,
0.7167,
0.7,
0.7083,
0.849
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 2,
"steps": 27,
"score": 0.6551,
"total_reward": 11.7913,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.579,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3517,
0.262,
0.9167,
0.9083,
0.8583,
0.875,
0.7583,
0.7417,
0.825,
0.7083,
0.8,
0.6833,
0.6667,
0.75,
0.0967,
0.0983,
0.766
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 3,
"steps": 28,
"score": 0.6154,
"total_reward": 12.9233,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.586,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.925,
0.2717,
0.8,
0.8917,
0.9333,
0.875,
0.2217,
0.2233,
0.7417,
0.8333,
0.1483,
0.7917,
0.1383,
0.7333,
0.75,
0.7083,
0.123,
0.7686
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 4,
"steps": 28,
"score": 0.6575,
"total_reward": 12.4928,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.458,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9833,
0.925,
0.2817,
0.8667,
0.8833,
0.23,
0.2113,
0.8417,
0.8,
0.775,
0.7917,
0.775,
0.7583,
0.75,
0.1067,
0.1313,
0.7235
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 5,
"steps": 26,
"score": 0.6206,
"total_reward": 12.4128,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.408,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9,
0.9167,
0.24,
0.2467,
0.2383,
0.7667,
0.9083,
0.85,
0.1967,
0.725,
0.8667,
0.165,
0.7917,
0.7833,
0.7667,
0.02,
0.65,
0.7061
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 6,
"steps": 31,
"score": 0.7065,
"total_reward": 12.7163,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.576,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9083,
0.8833,
0.9,
0.8917,
0.8833,
0.875,
0.8833,
0.7417,
0.188,
0.8667,
0.13,
0.7583,
0.7667,
0.7583,
0.7083,
0.7,
0.8083
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 7,
"steps": 25,
"score": 0.7366,
"total_reward": 12.5218,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.776,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8333,
0.8917,
0.875,
0.8917,
0.775,
0.8583,
0.85,
0.8333,
0.825,
0.1817,
0.8,
0.6833,
0.825,
0.02,
0.7583,
0.8784
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 8,
"steps": 25,
"score": 0.7329,
"total_reward": 13.9253,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.425,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.335,
0.9167,
0.8667,
0.8833,
0.875,
0.2767,
0.8583,
0.9,
0.8,
0.8167,
0.7,
0.1233,
0.775,
0.7667,
0.8083,
0.75,
0.7987
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 9,
"steps": 27,
"score": 0.7062,
"total_reward": 12.712,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.597,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9333,
0.925,
0.8083,
0.278,
0.8583,
0.925,
0.02,
0.2233,
0.8417,
0.825,
0.8167,
0.735,
0.6833,
0.7333,
0.65,
0.8157
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 10,
"steps": 26,
"score": 0.5723,
"total_reward": 12.0174,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.855,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.925,
0.9667,
0.8667,
0.2483,
0.8667,
0.02,
0.205,
0.8917,
0.825,
0.8167,
0.775,
0.1567,
0.02,
0.153,
0.7667,
0.8083,
0.105,
0.1067,
0.8194
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 11,
"steps": 23,
"score": 0.7197,
"total_reward": 12.955,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.57,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8333,
0.925,
0.9167,
0.2703,
0.8667,
0.2483,
0.8667,
0.9083,
0.02,
0.8083,
0.825,
0.8167,
0.7683,
0.7917,
0.7833,
0.7417,
0.8063
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 12,
"steps": 30,
"score": 0.6047,
"total_reward": 12.0935,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.344,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9,
0.2717,
0.9083,
0.8267,
0.8833,
0.8333,
0.02,
0.2197,
0.1883,
0.18,
0.7833,
0.7917,
0.7417,
0.7583,
0.65,
0.7,
0.0633,
0.6839
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 13,
"steps": 30,
"score": 0.6649,
"total_reward": 11.9681,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.271,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.8917,
0.8,
0.8833,
0.2067,
0.8583,
0.8083,
0.02,
0.7833,
0.6917,
0.75,
0.6583,
0.75,
0.7333,
0.09,
0.7,
0.7014
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 14,
"steps": 26,
"score": 0.7146,
"total_reward": 13.5771,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.416,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.825,
0.8833,
0.8667,
0.2383,
0.875,
0.8667,
0.8583,
0.85,
0.8333,
0.7833,
0.8,
0.1797,
0.7833,
0.775,
0.1217,
0.65,
0.7957
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 15,
"steps": 27,
"score": 0.5573,
"total_reward": 12.2603,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.606,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3037,
0.9333,
0.925,
0.8083,
0.265,
0.2567,
0.8433,
0.925,
0.7583,
0.215,
0.1967,
0.1883,
0.8167,
0.8,
0.1567,
0.675,
0.7583,
0.75,
0.7417,
0.0883,
0.732
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 16,
"steps": 23,
"score": 0.5543,
"total_reward": 11.0864,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.437,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.2217,
0.02,
0.925,
0.02,
0.2133,
0.8167,
0.825,
0.1817,
0.02,
0.8,
0.7583,
0.775,
0.1317,
0.6298
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 17,
"steps": 27,
"score": 0.5694,
"total_reward": 11.9565,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.688,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.02,
0.28,
0.2613,
0.7917,
0.2483,
0.875,
0.2213,
0.85,
0.8417,
0.7933,
0.233,
0.8667,
0.7,
0.7833,
0.7667,
0.7583,
0.168,
0.0703,
0.7609
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 18,
"steps": 23,
"score": 0.6662,
"total_reward": 11.3256,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.602,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.925,
0.02,
0.9,
0.2567,
0.2483,
0.875,
0.7583,
0.85,
0.8083,
0.825,
0.8083,
0.85,
0.02,
0.75,
0.7306
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 19,
"steps": 27,
"score": 0.7003,
"total_reward": 12.6055,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.625,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2633,
0.9167,
0.9083,
0.8917,
0.8833,
0.23,
0.02,
0.825,
0.8917,
0.825,
0.7083,
0.8,
0.6833,
0.7417,
0.7583,
0.7083,
0.8255
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 20,
"steps": 32,
"score": 0.5557,
"total_reward": 12.7811,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.636,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.8167,
0.8,
0.8917,
0.8833,
0.237,
0.8667,
0.85,
0.2037,
0.2213,
0.19,
0.8083,
0.8,
0.1997,
0.7833,
0.775,
0.6583,
0.0787,
0.725,
0.1347,
0.7083,
0.7861
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 21,
"steps": 23,
"score": 0.7485,
"total_reward": 12.7247,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.523,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.8917,
0.875,
0.8917,
0.2383,
0.8667,
0.8583,
0.8167,
0.725,
0.8083,
0.8,
0.02,
0.7833,
0.7667,
0.7897
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 22,
"steps": 31,
"score": 0.4891,
"total_reward": 10.2706,
"completion_rate": 0.667,
"detection_rate": 1.0,
"trust_calibration": 0.544,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3037,
0.02,
0.8917,
0.8667,
0.2483,
0.24,
0.8667,
0.18,
0.8083,
0.875,
0.1817,
0.165,
0.7583,
0.13,
0.1217,
0.75,
0.0983,
0.725,
0.7167,
0.6236
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 23,
"steps": 20,
"score": 0.7708,
"total_reward": 13.1031,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.755,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8333,
0.925,
0.2787,
0.9583,
0.9,
0.8917,
0.775,
0.8667,
0.85,
0.8917,
0.825,
0.8167,
0.8083,
0.8,
0.7917,
0.8711
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 24,
"steps": 31,
"score": 0.5498,
"total_reward": 9.8972,
"completion_rate": 0.667,
"detection_rate": 1.0,
"trust_calibration": 0.809,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2633,
0.975,
0.9667,
0.243,
0.9083,
0.8333,
0.85,
0.2297,
0.02,
0.7917,
0.02,
0.1647,
0.7417,
0.09,
0.7333,
0.7,
0.7165
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 25,
"steps": 23,
"score": 0.791,
"total_reward": 13.4466,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.623,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9,
0.9667,
0.9083,
0.7917,
0.8833,
0.875,
0.7583,
0.8167,
0.8333,
0.187,
0.8167,
0.8083,
0.6917,
0.8333,
0.775,
0.8679
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 26,
"steps": 29,
"score": 0.5823,
"total_reward": 11.646,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.895,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9,
0.9167,
0.9583,
0.9,
0.8917,
0.02,
0.2297,
0.8167,
0.76,
0.8083,
0.7917,
0.1813,
0.13,
0.0953,
0.75,
0.7917,
0.1413,
0.08,
0.8333
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 27,
"steps": 29,
"score": 0.6402,
"total_reward": 12.1644,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.703,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9583,
0.28,
0.2847,
0.9083,
0.8667,
0.2483,
0.8667,
0.825,
0.8083,
0.7917,
0.825,
0.02,
0.75,
0.8167,
0.65,
0.0703,
0.725,
0.8094
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 28,
"steps": 23,
"score": 0.7219,
"total_reward": 12.9944,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.671,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9083,
0.9167,
0.9083,
0.8667,
0.8833,
0.875,
0.8667,
0.8583,
0.258,
0.02,
0.165,
0.8167,
0.8,
0.8417,
0.7833,
0.6667,
0.8414
]
},
{
"policy": "random",
"task_type": "task2",
"seed": 29,
"steps": 27,
"score": 0.5586,
"total_reward": 10.614,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.478,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9,
0.2717,
0.9083,
0.02,
0.8583,
0.8917,
0.02,
0.7417,
0.8333,
0.19,
0.1817,
0.7667,
0.7833,
0.7417,
0.1133,
0.0817,
0.644
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 0,
"steps": 31,
"score": 0.6145,
"total_reward": 12.2902,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.72,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.2467,
0.875,
0.8583,
0.8417,
0.19,
0.8083,
0.7917,
0.775,
0.7583,
0.7417,
0.09,
0.0733,
0.7719
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 1,
"steps": 17,
"score": 0.768,
"total_reward": 13.8236,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.282,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7053
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 2,
"steps": 17,
"score": 0.7237,
"total_reward": 13.0266,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.284,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3517,
0.2883,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.1787,
0.6626
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 3,
"steps": 30,
"score": 0.7823,
"total_reward": 12.5171,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.9021
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 4,
"steps": 17,
"score": 0.7999,
"total_reward": 14.3981,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.426,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.2537,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.1787,
0.7991
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 5,
"steps": 30,
"score": 0.6545,
"total_reward": 10.4723,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.816,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.2483,
0.8667,
0.215,
0.8333,
0.8167,
0.165,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.7623
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 6,
"steps": 15,
"score": 0.8749,
"total_reward": 13.9981,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.7481
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 7,
"steps": 28,
"score": 0.7465,
"total_reward": 13.4373,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.833,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.9167,
0.3163,
0.318,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.8983
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 8,
"steps": 17,
"score": 0.7992,
"total_reward": 14.3856,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.39,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.2953,
0.925,
0.9167,
0.9083,
0.9,
0.2537,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7866
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 9,
"steps": 28,
"score": 0.7248,
"total_reward": 13.7712,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.3147,
0.9083,
0.318,
0.3117,
0.3033,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.8986
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 10,
"steps": 16,
"score": 0.797,
"total_reward": 13.5485,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.432,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.212,
0.2037,
0.8333,
0.825,
0.7578
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 11,
"steps": 30,
"score": 0.6763,
"total_reward": 12.8505,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.825,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.0817,
0.8522
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 12,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 13,
"steps": 30,
"score": 0.7935,
"total_reward": 13.4903,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.839,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.2883,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9436
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 14,
"steps": 18,
"score": 0.7309,
"total_reward": 13.8869,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.264,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.3147,
0.2733,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.187,
0.8167,
0.8083,
0.6989
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 15,
"steps": 18,
"score": 0.7649,
"total_reward": 14.5326,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.33,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3097,
0.9333,
0.925,
0.9167,
0.2703,
0.9,
0.8917,
0.8833,
0.875,
0.2287,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.8083,
0.7656
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 16,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 17,
"steps": 30,
"score": 0.7823,
"total_reward": 12.517,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.902
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 18,
"steps": 15,
"score": 0.8749,
"total_reward": 13.998,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.748
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 19,
"steps": 17,
"score": 0.7966,
"total_reward": 14.3395,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.279,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2967,
0.9333,
0.925,
0.9167,
0.9083,
0.262,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7475
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 20,
"steps": 30,
"score": 0.6558,
"total_reward": 11.8048,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.82,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.3147,
0.9,
0.8833,
0.2317,
0.85,
0.8333,
0.8167,
0.8,
0.1483,
0.7667,
0.75,
0.0983,
0.7167,
0.8071
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 21,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 22,
"steps": 22,
"score": 0.6604,
"total_reward": 15.1886,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.471,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3517,
0.9333,
0.925,
0.2787,
0.9083,
0.9,
0.8917,
0.8833,
0.237,
0.8667,
0.2203,
0.85,
0.8417,
0.1953,
0.825,
0.1787,
0.8083,
0.8,
0.1537,
0.7833,
0.775,
0.8149
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 23,
"steps": 16,
"score": 0.793,
"total_reward": 13.4804,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.212,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.3113,
0.28,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.6808
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 24,
"steps": 30,
"score": 0.6768,
"total_reward": 12.8598,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.824,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.3113,
0.975,
0.3267,
0.9583,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.115,
0.7333,
0.7167,
0.8518
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 25,
"steps": 21,
"score": 0.5985,
"total_reward": 13.1666,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8917,
0.2383,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.1883,
0.18,
0.8167,
0.1633,
0.155,
0.1467,
0.7833,
0.7683
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 26,
"steps": 22,
"score": 0.5962,
"total_reward": 13.1159,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.324,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.8683,
0.265,
0.2567,
0.9333,
0.925,
0.2317,
0.2233,
0.26,
0.1983,
0.825,
0.8167,
0.8083,
0.162,
0.7917,
0.1453,
0.775,
0.6336
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 27,
"steps": 30,
"score": 0.6573,
"total_reward": 13.146,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.829,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.9167,
0.9083,
0.318,
0.3117,
0.3033,
0.8667,
0.85,
0.1983,
0.8167,
0.8,
0.1483,
0.7667,
0.75,
0.7333,
0.7167,
0.8533
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 28,
"steps": 15,
"score": 0.8749,
"total_reward": 13.9979,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.7479
]
},
{
"policy": "heuristic",
"task_type": "task2",
"seed": 29,
"steps": 17,
"score": 0.7998,
"total_reward": 14.3965,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.421,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.2453,
0.875,
0.8667,
0.8583,
0.85,
0.2037,
0.8333,
0.825,
0.8167,
0.7975
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 0,
"steps": 30,
"score": 0.7823,
"total_reward": 12.5171,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.9021
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 1,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 2,
"steps": 30,
"score": 0.7399,
"total_reward": 11.8385,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.2817,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.8585
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 3,
"steps": 30,
"score": 0.7823,
"total_reward": 12.5171,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.9021
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 4,
"steps": 30,
"score": 0.7823,
"total_reward": 12.5171,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.1983,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9021
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 5,
"steps": 30,
"score": 0.6545,
"total_reward": 10.4723,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.816,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.2483,
0.8667,
0.215,
0.8333,
0.8167,
0.165,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.7623
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 6,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 7,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1942,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9442
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 8,
"steps": 30,
"score": 0.7399,
"total_reward": 11.8383,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.2817,
0.9,
0.8833,
0.8667,
0.85,
0.1983,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.8583
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 9,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 10,
"steps": 30,
"score": 0.7398,
"total_reward": 11.8373,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.115,
0.0983,
0.7167,
0.8573
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 11,
"steps": 30,
"score": 0.7823,
"total_reward": 12.517,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.902
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 12,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 13,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 14,
"steps": 30,
"score": 0.7398,
"total_reward": 11.8376,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.841,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.2483,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.8576
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 15,
"steps": 30,
"score": 0.6973,
"total_reward": 11.1569,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.9167,
0.9,
0.8833,
0.2317,
0.85,
0.8333,
0.8167,
0.8,
0.1483,
0.7667,
0.75,
0.7333,
0.7167,
0.8119
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 16,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 17,
"steps": 30,
"score": 0.7823,
"total_reward": 12.517,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.902
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 18,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 19,
"steps": 30,
"score": 0.7823,
"total_reward": 12.517,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.215,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.902
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 20,
"steps": 30,
"score": 0.6117,
"total_reward": 9.7864,
"completion_rate": 0.667,
"detection_rate": 1.0,
"trust_calibration": 0.795,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.9167,
0.9,
0.2483,
0.8667,
0.85,
0.1983,
0.8167,
0.8,
0.7833,
0.7667,
0.115,
0.7333,
0.7167,
0.7114
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 21,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 22,
"steps": 30,
"score": 0.6545,
"total_reward": 10.4728,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.817,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.9167,
0.9,
0.2483,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.0817,
0.7628
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 23,
"steps": 30,
"score": 0.7399,
"total_reward": 11.8385,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2983,
0.2817,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.8585
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 24,
"steps": 30,
"score": 0.7398,
"total_reward": 11.8376,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.841,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.2483,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.8576
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 25,
"steps": 30,
"score": 0.7399,
"total_reward": 11.838,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.2817,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.1817,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.858
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 26,
"steps": 30,
"score": 0.7398,
"total_reward": 11.8373,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.115,
0.0983,
0.7167,
0.8573
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 27,
"steps": 30,
"score": 0.6972,
"total_reward": 11.156,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.831,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.0817,
0.811
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 28,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "oracle_lite",
"task_type": "task2",
"seed": 29,
"steps": 30,
"score": 0.7398,
"total_reward": 11.8373,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.1817,
0.8,
0.7833,
0.7667,
0.75,
0.0983,
0.7167,
0.8573
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 0,
"steps": 31,
"score": 0.6145,
"total_reward": 12.2902,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.72,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.2467,
0.875,
0.8583,
0.8417,
0.19,
0.8083,
0.7917,
0.775,
0.7583,
0.7417,
0.09,
0.0733,
0.7719
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 1,
"steps": 17,
"score": 0.768,
"total_reward": 13.8236,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.282,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7053
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 2,
"steps": 17,
"score": 0.7237,
"total_reward": 13.0266,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.284,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3517,
0.2883,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.1787,
0.6626
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 3,
"steps": 30,
"score": 0.7823,
"total_reward": 12.5171,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.9021
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 4,
"steps": 17,
"score": 0.7999,
"total_reward": 14.3981,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.426,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.2537,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.1787,
0.7991
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 5,
"steps": 30,
"score": 0.6545,
"total_reward": 10.4723,
"completion_rate": 0.733,
"detection_rate": 1.0,
"trust_calibration": 0.816,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.2483,
0.8667,
0.215,
0.8333,
0.8167,
0.165,
0.7833,
0.1317,
0.75,
0.7333,
0.7167,
0.7623
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 6,
"steps": 15,
"score": 0.8749,
"total_reward": 13.9981,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.7481
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 7,
"steps": 28,
"score": 0.7465,
"total_reward": 13.4373,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.833,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.9167,
0.3163,
0.318,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.8983
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 8,
"steps": 17,
"score": 0.7992,
"total_reward": 14.3856,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.39,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.2953,
0.925,
0.9167,
0.9083,
0.9,
0.2537,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7866
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 9,
"steps": 28,
"score": 0.7248,
"total_reward": 13.7712,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.3147,
0.9083,
0.318,
0.3117,
0.3033,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.8986
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 10,
"steps": 16,
"score": 0.797,
"total_reward": 13.5485,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.432,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.212,
0.2037,
0.8333,
0.825,
0.7578
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 11,
"steps": 30,
"score": 0.6763,
"total_reward": 12.8505,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.825,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.0817,
0.8522
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 12,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 13,
"steps": 30,
"score": 0.7935,
"total_reward": 13.4903,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.839,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.2883,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9436
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 14,
"steps": 18,
"score": 0.7309,
"total_reward": 13.8869,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.264,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.3147,
0.2733,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.187,
0.8167,
0.8083,
0.6989
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 15,
"steps": 18,
"score": 0.7649,
"total_reward": 14.5326,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.33,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3097,
0.9333,
0.925,
0.9167,
0.2703,
0.9,
0.8917,
0.8833,
0.875,
0.2287,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.8083,
0.7656
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 16,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 17,
"steps": 30,
"score": 0.7823,
"total_reward": 12.517,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.902
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 18,
"steps": 15,
"score": 0.8749,
"total_reward": 13.998,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.748
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 19,
"steps": 17,
"score": 0.7966,
"total_reward": 14.3395,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.279,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.2967,
0.9333,
0.925,
0.9167,
0.9083,
0.262,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.8167,
0.7475
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 20,
"steps": 30,
"score": 0.6558,
"total_reward": 11.8048,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.82,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.3147,
0.9,
0.8833,
0.2317,
0.85,
0.8333,
0.8167,
0.8,
0.1483,
0.7667,
0.75,
0.0983,
0.7167,
0.8071
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 21,
"steps": 30,
"score": 0.8246,
"total_reward": 13.1941,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.84,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9333,
0.9167,
0.9,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.75,
0.7333,
0.7167,
0.9441
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 22,
"steps": 22,
"score": 0.6604,
"total_reward": 15.1886,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.471,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3517,
0.9333,
0.925,
0.2787,
0.9083,
0.9,
0.8917,
0.8833,
0.237,
0.8667,
0.2203,
0.85,
0.8417,
0.1953,
0.825,
0.1787,
0.8083,
0.8,
0.1537,
0.7833,
0.775,
0.8149
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 23,
"steps": 16,
"score": 0.793,
"total_reward": 13.4804,
"completion_rate": 0.933,
"detection_rate": 1.0,
"trust_calibration": 0.212,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.3113,
0.28,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.825,
0.6808
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 24,
"steps": 30,
"score": 0.6768,
"total_reward": 12.8598,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.824,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.3113,
0.975,
0.3267,
0.9583,
0.265,
0.8833,
0.8667,
0.85,
0.8333,
0.8167,
0.8,
0.7833,
0.7667,
0.115,
0.7333,
0.7167,
0.8518
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 25,
"steps": 21,
"score": 0.5985,
"total_reward": 13.1666,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.2733,
0.265,
0.8917,
0.2383,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.1883,
0.18,
0.8167,
0.1633,
0.155,
0.1467,
0.7833,
0.7683
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 26,
"steps": 22,
"score": 0.5962,
"total_reward": 13.1159,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.324,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.99,
0.9833,
0.975,
0.2817,
0.8683,
0.265,
0.2567,
0.9333,
0.925,
0.2317,
0.2233,
0.26,
0.1983,
0.825,
0.8167,
0.8083,
0.162,
0.7917,
0.1453,
0.775,
0.6336
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 27,
"steps": 30,
"score": 0.6573,
"total_reward": 13.146,
"completion_rate": 0.867,
"detection_rate": 1.0,
"trust_calibration": 0.829,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.313,
0.9167,
0.9083,
0.318,
0.3117,
0.3033,
0.8667,
0.85,
0.1983,
0.8167,
0.8,
0.1483,
0.7667,
0.75,
0.7333,
0.7167,
0.8533
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 28,
"steps": 15,
"score": 0.8749,
"total_reward": 13.9979,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.28,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.8833,
0.875,
0.8667,
0.8583,
0.85,
0.8417,
0.8333,
0.7479
]
},
{
"policy": "trained",
"task_type": "task2",
"seed": 29,
"steps": 17,
"score": 0.7998,
"total_reward": 14.3965,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.421,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9417,
0.9333,
0.925,
0.9167,
0.9083,
0.9,
0.8917,
0.2453,
0.875,
0.8667,
0.8583,
0.85,
0.2037,
0.8333,
0.825,
0.8167,
0.7975
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 0,
"steps": 36,
"score": 0.6105,
"total_reward": 15.2622,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.884,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9233,
0.9211,
0.02,
0.9144,
0.9122,
0.9078,
0.3556,
0.02,
0.02,
0.8467,
0.8922,
0.8378,
0.8833,
0.9061,
0.3067,
0.02,
0.8722,
0.073,
0.8306,
0.9061,
0.3397,
0.3044,
0.5035
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 1,
"steps": 40,
"score": 0.7205,
"total_reward": 18.0135,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.662,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.8889,
0.8822,
0.91,
0.8556,
0.9011,
0.3167,
0.8622,
0.89,
0.8356,
0.8633,
0.3011,
0.8444,
0.8378,
0.8656,
0.8261,
0.8589,
0.8961,
0.02,
0.3,
0.8106,
0.2933,
0.7916
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 2,
"steps": 37,
"score": 0.7627,
"total_reward": 17.5411,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.752,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.3203,
0.9211,
0.9189,
0.8822,
0.91,
0.8556,
0.8511,
0.8967,
0.8422,
0.89,
0.8356,
0.8311,
0.8767,
0.2944,
0.8972,
0.87,
0.8656,
0.8789,
0.02,
0.8722,
0.8207
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 3,
"steps": 36,
"score": 0.6303,
"total_reward": 16.3887,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.798,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9233,
0.3411,
0.8667,
0.9144,
0.9372,
0.91,
0.3278,
0.3556,
0.8511,
0.8989,
0.2922,
0.8878,
0.3056,
0.8489,
0.8767,
0.8922,
0.333,
0.2878,
0.8283,
0.8589,
0.3297,
0.3552,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 4,
"steps": 41,
"score": 0.6894,
"total_reward": 18.6138,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.405,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9506,
0.9233,
0.3711,
0.8844,
0.9122,
0.33,
0.3186,
0.9011,
0.8667,
0.86,
0.8878,
0.8833,
0.8789,
0.8767,
0.8994,
0.3352,
0.8678,
0.3033,
0.8239,
0.8744,
0.8678,
0.2656,
0.2933,
0.2911,
0.7076
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 5,
"steps": 31,
"score": 0.6062,
"total_reward": 15.1538,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.816,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.9211,
0.3367,
0.3344,
0.3322,
0.8578,
0.9306,
0.9033,
0.3211,
0.8467,
0.9194,
0.32,
0.8878,
0.8856,
0.8811,
0.02,
0.8394,
0.0752,
0.87,
0.8678,
0.02,
0.8883,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 6,
"steps": 39,
"score": 0.6337,
"total_reward": 15.8429,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.872,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.8889,
0.9167,
0.9144,
0.9122,
0.91,
0.9006,
0.8511,
0.3197,
0.9194,
0.28,
0.8556,
0.8811,
0.8789,
0.8422,
0.8856,
0.3311,
0.8589,
0.0597,
0.3222,
0.27,
0.8728,
0.02,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 7,
"steps": 32,
"score": 0.7179,
"total_reward": 15.793,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.869,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.8911,
0.8867,
0.9144,
0.86,
0.9056,
0.9033,
0.8989,
0.8967,
0.3444,
0.89,
0.8356,
0.9083,
0.02,
0.8789,
0.8744,
0.87,
0.8928,
0.8633,
0.3111,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 8,
"steps": 31,
"score": 0.7087,
"total_reward": 16.3004,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.701,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3733,
0.9211,
0.8844,
0.9122,
0.91,
0.3578,
0.9056,
0.9283,
0.8667,
0.8944,
0.84,
0.3456,
0.8833,
0.8811,
0.8589,
0.8767,
0.0774,
0.835,
0.8856,
0.8633,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 9,
"steps": 37,
"score": 0.7151,
"total_reward": 17.877,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.558,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9256,
0.9233,
0.8689,
0.3597,
0.8822,
0.935,
0.02,
0.3556,
0.9011,
0.8967,
0.8944,
0.84,
0.8356,
0.8489,
0.8244,
0.835,
0.3178,
0.8656,
0.8261,
0.8217,
0.3044,
0.85,
0.7724
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 10,
"steps": 38,
"score": 0.6037,
"total_reward": 17.5072,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.772,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.9461,
0.8844,
0.3622,
0.9078,
0.02,
0.3233,
0.9261,
0.8967,
0.8944,
0.86,
0.3378,
0.02,
0.3263,
0.8811,
0.3289,
0.2967,
0.8994,
0.8722,
0.8678,
0.3386,
0.3463,
0.02,
0.3089,
0.8544,
0.355,
0.7709
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 11,
"steps": 32,
"score": 0.5881,
"total_reward": 14.703,
"completion_rate": 0.7,
"detection_rate": 0.3333,
"trust_calibration": 0.743,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.9211,
0.3459,
0.8844,
0.3622,
0.9078,
0.9306,
0.02,
0.8689,
0.8967,
0.8944,
0.8722,
0.8878,
0.8856,
0.9228,
0.0819,
0.2967,
0.3244,
0.8722,
0.02,
0.3356,
0.3011,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 12,
"steps": 36,
"score": 0.6249,
"total_reward": 14.9974,
"completion_rate": 0.75,
"detection_rate": 0.3333,
"trust_calibration": 0.716,
"adversarial_detections": 1,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.8933,
0.3411,
0.9189,
0.8644,
0.9122,
0.8756,
0.02,
0.3441,
0.3189,
0.3167,
0.8622,
0.8878,
0.8511,
0.8789,
0.8244,
0.8878,
0.2856,
0.0663,
0.8589,
0.8961,
0.8772,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 13,
"steps": 38,
"score": 0.7872,
"total_reward": 18.1053,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.64,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8911,
0.8667,
0.9122,
0.3278,
0.9056,
0.8689,
0.02,
0.8622,
0.8378,
0.8533,
0.8289,
0.8767,
0.8722,
0.895,
0.8856,
0.8633,
0.8611,
0.9017,
0.8939,
0.3,
0.8081
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 14,
"steps": 35,
"score": 0.6988,
"total_reward": 18.1679,
"completion_rate": 0.95,
"detection_rate": 0.0,
"trust_calibration": 0.661,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8733,
0.8889,
0.8844,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.8989,
0.8622,
0.89,
0.3408,
0.8856,
0.8833,
0.3011,
0.8267,
0.8372,
0.87,
0.0708,
0.8833,
0.2811,
0.8839,
0.8544,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 15,
"steps": 38,
"score": 0.68,
"total_reward": 19.0388,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.774,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.9256,
0.9233,
0.8689,
0.3667,
0.3644,
0.8922,
0.935,
0.8556,
0.3533,
0.3211,
0.3189,
0.8944,
0.89,
0.3378,
0.8333,
0.8789,
0.8767,
0.8744,
0.2922,
0.333,
0.8656,
0.8261,
0.8567,
0.3,
0.8478,
0.8066
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 16,
"steps": 33,
"score": 0.5966,
"total_reward": 15.511,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.691,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.3044,
0.02,
0.935,
0.02,
0.3256,
0.8711,
0.8967,
0.3444,
0.02,
0.89,
0.8556,
0.8833,
0.3311,
0.3289,
0.8744,
0.8878,
0.8633,
0.3141,
0.8589,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 17,
"steps": 35,
"score": 0.5891,
"total_reward": 16.4939,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.795,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.02,
0.3433,
0.3319,
0.8644,
0.3622,
0.91,
0.3286,
0.9033,
0.9011,
0.8789,
0.3697,
0.9194,
0.84,
0.8856,
0.8811,
0.8789,
0.3597,
0.2692,
0.87,
0.2878,
0.8656,
0.0663,
0.8239,
0.8817,
0.02,
0.4835
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 18,
"steps": 35,
"score": 0.6548,
"total_reward": 16.3705,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.573,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9233,
0.02,
0.9167,
0.3644,
0.3622,
0.91,
0.8556,
0.9033,
0.8689,
0.8967,
0.8922,
0.915,
0.02,
0.8533,
0.8789,
0.2967,
0.3422,
0.3078,
0.8656,
0.8611,
0.2789,
0.3297,
0.7281
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 19,
"steps": 38,
"score": 0.6912,
"total_reward": 17.2799,
"completion_rate": 0.9,
"detection_rate": 0.0,
"trust_calibration": 0.834,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3156,
0.9211,
0.9189,
0.9144,
0.9122,
0.33,
0.02,
0.9133,
0.9261,
0.8967,
0.8422,
0.89,
0.8356,
0.8511,
0.8789,
0.8922,
0.87,
0.3178,
0.8811,
0.8589,
0.8544,
0.87,
0.3108,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 20,
"steps": 44,
"score": 0.6149,
"total_reward": 19.0606,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.859,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.8711,
0.8667,
0.9144,
0.9122,
0.337,
0.9078,
0.9033,
0.3281,
0.3519,
0.3467,
0.8922,
0.89,
0.3608,
0.8856,
0.8833,
0.8289,
0.2714,
0.87,
0.3508,
0.8656,
0.3089,
0.0597,
0.8172,
0.053,
0.8728,
0.8083,
0.3439,
0.2567,
0.5146
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 21,
"steps": 30,
"score": 0.7401,
"total_reward": 15.543,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.8911,
0.8867,
0.9144,
0.3322,
0.9078,
0.9056,
0.8711,
0.8467,
0.8922,
0.89,
0.02,
0.8856,
0.8811,
0.8589,
0.8744,
0.87,
0.8856,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 22,
"steps": 45,
"score": 0.5529,
"total_reward": 16.5871,
"completion_rate": 0.7,
"detection_rate": 0.0,
"trust_calibration": 0.709,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3548,
0.02,
0.9311,
0.8844,
0.3622,
0.36,
0.9078,
0.2933,
0.8689,
0.9217,
0.3444,
0.32,
0.8556,
0.3033,
0.3011,
0.8767,
0.3022,
0.87,
0.8678,
0.8811,
0.0619,
0.8544,
0.3022,
0.8478,
0.8083,
0.3119,
0.0397,
0.8322,
0.4222
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 23,
"steps": 31,
"score": 0.7921,
"total_reward": 18.2193,
"completion_rate": 0.95,
"detection_rate": 0.5,
"trust_calibration": 0.847,
"adversarial_detections": 1,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8756,
0.9233,
0.3481,
0.9439,
0.9167,
0.9144,
0.86,
0.9078,
0.9033,
0.9261,
0.8967,
0.8944,
0.8922,
0.89,
0.8878,
0.02,
0.9228,
0.0819,
0.8744,
0.87,
0.8833,
0.7254
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 24,
"steps": 45,
"score": 0.6293,
"total_reward": 16.3622,
"completion_rate": 0.75,
"detection_rate": 1.0,
"trust_calibration": 0.813,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3156,
0.9483,
0.9461,
0.3197,
0.9072,
0.8756,
0.9033,
0.3541,
0.02,
0.9044,
0.02,
0.3608,
0.8511,
0.2967,
0.8722,
0.8356,
0.8111,
0.2867,
0.8544,
0.845,
0.3156,
0.8061,
0.8367,
0.7972,
0.7658
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 25,
"steps": 34,
"score": 0.6745,
"total_reward": 16.8613,
"completion_rate": 0.85,
"detection_rate": 0.0,
"trust_calibration": 0.809,
"adversarial_detections": 0,
"adversarial_poisonings": 2,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8933,
0.9461,
0.9189,
0.8644,
0.9122,
0.91,
0.8556,
0.8711,
0.8989,
0.3237,
0.8944,
0.8922,
0.8378,
0.3356,
0.8833,
0.8489,
0.0797,
0.3244,
0.3222,
0.8878,
0.8906,
0.9061,
0.2967,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 26,
"steps": 43,
"score": 0.5813,
"total_reward": 17.4397,
"completion_rate": 0.75,
"detection_rate": 0.0,
"trust_calibration": 0.815,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8933,
0.9211,
0.9439,
0.9167,
0.9144,
0.02,
0.3308,
0.8711,
0.8467,
0.8922,
0.8878,
0.3486,
0.3033,
0.2759,
0.8767,
0.3244,
0.3452,
0.29,
0.8156,
0.8633,
0.2889,
0.0597,
0.8544,
0.3372,
0.8478,
0.2956,
0.2811,
0.2889,
0.4707
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 27,
"steps": 40,
"score": 0.5674,
"total_reward": 15.3205,
"completion_rate": 0.7,
"detection_rate": 0.25,
"trust_calibration": 0.816,
"adversarial_detections": 1,
"adversarial_poisonings": 3,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9206,
0.3433,
0.3541,
0.9189,
0.9244,
0.3622,
0.9078,
0.9133,
0.8689,
0.8644,
0.885,
0.02,
0.8533,
0.9061,
0.8267,
0.9139,
0.073,
0.3356,
0.3441,
0.0619,
0.8994,
0.2722,
0.323,
0.3308,
0.8433,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 28,
"steps": 32,
"score": 0.6685,
"total_reward": 16.0443,
"completion_rate": 0.8,
"detection_rate": 0.0,
"trust_calibration": 0.793,
"adversarial_detections": 0,
"adversarial_poisonings": 1,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.8956,
0.9211,
0.9189,
0.8844,
0.9122,
0.91,
0.9078,
0.9056,
0.3763,
0.02,
0.3167,
0.8944,
0.89,
0.9128,
0.8856,
0.8461,
0.8589,
0.3267,
0.8372,
0.9128,
0.2933,
0.3461,
0.01
]
},
{
"policy": "random",
"task_type": "task3",
"seed": 29,
"steps": 40,
"score": 0.6868,
"total_reward": 17.8577,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.907,
"adversarial_detections": 1,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.8933,
0.3411,
0.9189,
0.02,
0.8822,
0.9028,
0.02,
0.8511,
0.8989,
0.3467,
0.3444,
0.8578,
0.8856,
0.8511,
0.2989,
0.9094,
0.87,
0.8856,
0.3641,
0.8567,
0.3572,
0.8895,
0.8083,
0.8353
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 0,
"steps": 43,
"score": 0.7114,
"total_reward": 18.4969,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.729,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.3344,
0.91,
0.9056,
0.9011,
0.3267,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.2956,
0.8611,
0.2867,
0.8522,
0.8478,
0.8433,
0.8389,
0.7841
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 1,
"steps": 29,
"score": 0.7083,
"total_reward": 17.707,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.721,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6632
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 2,
"steps": 29,
"score": 0.6919,
"total_reward": 17.2983,
"completion_rate": 0.85,
"detection_rate": 0.4444,
"trust_calibration": 0.561,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.3456,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6065
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 3,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8008,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8675
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 4,
"steps": 29,
"score": 0.7165,
"total_reward": 17.9128,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.721,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.3414,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.664
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 5,
"steps": 46,
"score": 0.7558,
"total_reward": 18.1385,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.832,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8229
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 6,
"steps": 27,
"score": 0.6991,
"total_reward": 16.778,
"completion_rate": 0.85,
"detection_rate": 0.4,
"trust_calibration": 0.725,
"adversarial_detections": 4,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.6387
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 7,
"steps": 42,
"score": 0.7756,
"total_reward": 19.3902,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.3919,
0.3997,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.28,
0.8456,
0.8411,
0.8478
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 8,
"steps": 44,
"score": 0.809,
"total_reward": 19.4157,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.853,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3526,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8367,
0.8654
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 9,
"steps": 40,
"score": 0.782,
"total_reward": 19.5499,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.837,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3841,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8528
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 10,
"steps": 31,
"score": 0.712,
"total_reward": 17.8008,
"completion_rate": 0.85,
"detection_rate": 0.625,
"trust_calibration": 0.448,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.3281,
0.8989,
0.0997,
0.0974,
0.0952,
0.9295,
0.925,
0.9206,
0.9161,
0.9117,
0.3356,
0.6281
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 11,
"steps": 40,
"score": 0.7732,
"total_reward": 18.5566,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.2933,
0.8589,
0.8544,
0.85,
0.8456,
0.8349
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 12,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 13,
"steps": 39,
"score": 0.833,
"total_reward": 18.3252,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.811,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3456,
0.3433,
0.9189,
0.9144,
0.91,
0.9056,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8567,
0.8522,
0.8478,
0.8485
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 14,
"steps": 29,
"score": 0.6889,
"total_reward": 17.9127,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.609,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.3689,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.3237,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.9206,
0.9161,
0.9117,
0.6353
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 15,
"steps": 30,
"score": 0.6847,
"total_reward": 18.4869,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.635,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3608,
0.9256,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.0863,
0.0841,
0.9184,
0.9139,
0.9095,
0.6404
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 16,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 17,
"steps": 46,
"score": 0.8048,
"total_reward": 19.316,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.28,
0.8456,
0.8411,
0.8367,
0.8322,
0.8605
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 18,
"steps": 26,
"score": 0.6967,
"total_reward": 16.7213,
"completion_rate": 0.85,
"detection_rate": 0.3333,
"trust_calibration": 0.701,
"adversarial_detections": 3,
"adversarial_poisonings": 6,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.6149
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 19,
"steps": 20,
"score": 0.6193,
"total_reward": 13.0053,
"completion_rate": 0.65,
"detection_rate": 0.0,
"trust_calibration": 0.576,
"adversarial_detections": 0,
"adversarial_poisonings": 5,
"status": "failed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.3437,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.01
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 20,
"steps": 46,
"score": 0.7498,
"total_reward": 19.4938,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.8456,
0.8411,
0.8367,
0.8322,
0.8412
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 21,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8676
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 22,
"steps": 36,
"score": 0.7334,
"total_reward": 21.2675,
"completion_rate": 1.0,
"detection_rate": 0.8,
"trust_calibration": 0.747,
"adversarial_detections": 4,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.9256,
0.9233,
0.3481,
0.9189,
0.9167,
0.9144,
0.9122,
0.337,
0.9078,
0.3326,
0.9033,
0.9011,
0.3259,
0.8967,
0.3214,
0.8922,
0.89,
0.3148,
0.8856,
0.0863,
0.9206,
0.9161,
0.9117,
0.9073,
0.8789,
0.8544,
0.7968
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 23,
"steps": 28,
"score": 0.6922,
"total_reward": 17.3057,
"completion_rate": 0.85,
"detection_rate": 0.4444,
"trust_calibration": 0.645,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.3433,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6298
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 24,
"steps": 46,
"score": 0.7725,
"total_reward": 20.0838,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.836,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.9483,
0.3711,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8591
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 25,
"steps": 34,
"score": 0.6755,
"total_reward": 18.9148,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.71,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.3189,
0.3167,
0.8944,
0.3122,
0.31,
0.3078,
0.8856,
0.9011,
0.8967,
0.8922,
0.3078,
0.3033,
0.8789,
0.782
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 26,
"steps": 34,
"score": 0.6561,
"total_reward": 19.0282,
"completion_rate": 0.8,
"detection_rate": 0.6667,
"trust_calibration": 0.467,
"adversarial_detections": 2,
"adversarial_poisonings": 1,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.8989,
0.3667,
0.3644,
0.9372,
0.935,
0.3578,
0.3556,
0.3533,
0.3289,
0.8967,
0.8944,
0.8922,
0.317,
0.8878,
0.3126,
0.8833,
0.0841,
0.9184,
0.9139,
0.8878,
0.8656,
0.3163,
0.3419,
0.6213
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 27,
"steps": 46,
"score": 0.7256,
"total_reward": 20.3155,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.828,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.2756,
0.8411,
0.8367,
0.8322,
0.8395
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 28,
"steps": 28,
"score": 0.7355,
"total_reward": 17.6509,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.725,
"adversarial_detections": 5,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.6841
]
},
{
"policy": "heuristic",
"task_type": "task3",
"seed": 29,
"steps": 28,
"score": 0.7258,
"total_reward": 18.144,
"completion_rate": 0.9,
"detection_rate": 0.5,
"trust_calibration": 0.709,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.3281,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.6801
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 0,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8904
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 1,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 2,
"steps": 42,
"score": 0.8422,
"total_reward": 18.5276,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8724
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 3,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 4,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1154,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 5,
"steps": 46,
"score": 0.7916,
"total_reward": 18.9976,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.917,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8618
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 6,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 7,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 8,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3315,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 9,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 10,
"steps": 42,
"score": 0.8421,
"total_reward": 18.5263,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.928,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.3022,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.871
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 11,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 12,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 13,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 14,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8853
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 15,
"steps": 46,
"score": 0.8162,
"total_reward": 19.5883,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.93,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.8825
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 16,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8523,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 17,
"steps": 42,
"score": 0.867,
"total_reward": 19.0739,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.935,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8903
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 18,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8108,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 19,
"steps": 42,
"score": 0.8689,
"total_reward": 19.1153,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8901
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 20,
"steps": 46,
"score": 0.7653,
"total_reward": 18.3663,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.909,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8423
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 21,
"steps": 40,
"score": 0.8958,
"total_reward": 18.8109,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.932,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.894
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 22,
"steps": 46,
"score": 0.7652,
"total_reward": 18.3659,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.908,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.2978,
0.8633,
0.2889,
0.8544,
0.85,
0.8873,
0.8828,
0.8784,
0.8739,
0.8419
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 23,
"steps": 42,
"score": 0.8403,
"total_reward": 18.4862,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3556,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.8727
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 24,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8854
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 25,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3314,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.934,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.3511,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8857
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 26,
"steps": 42,
"score": 0.8403,
"total_reward": 18.4855,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.3067,
0.3022,
0.8678,
0.8633,
0.9006,
0.8961,
0.8917,
0.8873,
0.8828,
0.872
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 27,
"steps": 46,
"score": 0.8179,
"total_reward": 19.6285,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.924,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.2978,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8739,
0.881
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 28,
"steps": 40,
"score": 0.8977,
"total_reward": 18.8524,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.931,
"adversarial_detections": 7,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.9095,
0.905,
0.9006,
0.8961,
0.8917,
0.8873,
0.8938
]
},
{
"policy": "oracle_lite",
"task_type": "task3",
"seed": 29,
"steps": 44,
"score": 0.8405,
"total_reward": 19.3311,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.933,
"adversarial_detections": 6,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.3244,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.8589,
0.8961,
0.8917,
0.8873,
0.8828,
0.8784,
0.8854
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 0,
"steps": 46,
"score": 0.7123,
"total_reward": 18.5191,
"completion_rate": 0.85,
"detection_rate": 1.0,
"trust_calibration": 0.729,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.3344,
0.91,
0.9056,
0.9011,
0.3267,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.3,
0.2956,
0.8789,
0.2844,
0.8678,
0.8611,
0.8367,
0.8322,
0.7797
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 1,
"steps": 30,
"score": 0.7434,
"total_reward": 18.5852,
"completion_rate": 0.9,
"detection_rate": 0.625,
"trust_calibration": 0.721,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.9095,
0.716
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 2,
"steps": 28,
"score": 0.7198,
"total_reward": 17.2756,
"completion_rate": 0.85,
"detection_rate": 0.5556,
"trust_calibration": 0.559,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.3456,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.6418
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 3,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8008,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.843,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.8411,
0.8675
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 4,
"steps": 28,
"score": 0.7455,
"total_reward": 17.8908,
"completion_rate": 0.9,
"detection_rate": 0.5556,
"trust_calibration": 0.722,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.3414,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.9273,
0.9228,
0.9184,
0.9139,
0.7
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 5,
"steps": 46,
"score": 0.7558,
"total_reward": 18.1385,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.832,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.3422,
0.9078,
0.3333,
0.8989,
0.8944,
0.32,
0.8856,
0.3111,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8229
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 6,
"steps": 27,
"score": 0.7645,
"total_reward": 17.5844,
"completion_rate": 0.9,
"detection_rate": 0.5556,
"trust_calibration": 0.711,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.9295,
0.925,
0.9206,
0.9161,
0.6995
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 7,
"steps": 42,
"score": 0.7756,
"total_reward": 19.3902,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.3919,
0.3997,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.2933,
0.8589,
0.8544,
0.28,
0.8456,
0.8411,
0.8478
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 8,
"steps": 44,
"score": 0.809,
"total_reward": 19.4157,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.853,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3526,
0.9211,
0.9167,
0.9122,
0.9078,
0.3333,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8367,
0.8654
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 9,
"steps": 40,
"score": 0.782,
"total_reward": 19.5499,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.837,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.3841,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8528
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 10,
"steps": 30,
"score": 0.7394,
"total_reward": 17.7466,
"completion_rate": 0.85,
"detection_rate": 0.7143,
"trust_calibration": 0.434,
"adversarial_detections": 5,
"adversarial_poisonings": 2,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.3303,
0.3281,
0.8989,
0.0997,
0.0974,
0.9317,
0.9273,
0.9228,
0.9184,
0.9139,
0.3378,
0.6536
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 11,
"steps": 40,
"score": 0.7732,
"total_reward": 18.5566,
"completion_rate": 0.9,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.2978,
0.2933,
0.8589,
0.8544,
0.85,
0.8456,
0.8349
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 12,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 13,
"steps": 39,
"score": 0.833,
"total_reward": 18.3252,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.811,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3456,
0.3433,
0.9189,
0.9144,
0.91,
0.9056,
0.9011,
0.8967,
0.8922,
0.8878,
0.8833,
0.8789,
0.8744,
0.87,
0.8656,
0.8611,
0.8567,
0.8522,
0.8478,
0.8485
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 14,
"steps": 30,
"score": 0.7228,
"total_reward": 18.7931,
"completion_rate": 0.9,
"detection_rate": 0.625,
"trust_calibration": 0.609,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.3689,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.3237,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.6881
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 15,
"steps": 30,
"score": 0.7426,
"total_reward": 19.3077,
"completion_rate": 0.95,
"detection_rate": 0.625,
"trust_calibration": 0.622,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3608,
0.9256,
0.9233,
0.9211,
0.3459,
0.9167,
0.9144,
0.9122,
0.91,
0.3348,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.8944,
0.8922,
0.093,
0.0908,
0.0886,
0.9228,
0.9184,
0.9139,
0.9095,
0.7087
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 16,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.8411,
0.8676
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 17,
"steps": 46,
"score": 0.8048,
"total_reward": 19.316,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.842,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.3467,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.2889,
0.8544,
0.28,
0.8456,
0.8411,
0.8367,
0.8322,
0.8605
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 18,
"steps": 27,
"score": 0.7333,
"total_reward": 17.5998,
"completion_rate": 0.9,
"detection_rate": 0.4444,
"trust_calibration": 0.701,
"adversarial_detections": 4,
"adversarial_poisonings": 5,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.0997,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.6635
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 19,
"steps": 29,
"score": 0.7268,
"total_reward": 18.1697,
"completion_rate": 0.9,
"detection_rate": 0.5556,
"trust_calibration": 0.66,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3478,
0.9256,
0.9233,
0.9211,
0.9189,
0.3437,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.6822
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 20,
"steps": 46,
"score": 0.7498,
"total_reward": 19.4938,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.835,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.3841,
0.9167,
0.9122,
0.3378,
0.9033,
0.8989,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.8456,
0.8411,
0.8367,
0.8322,
0.8412
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 21,
"steps": 42,
"score": 0.8546,
"total_reward": 18.8009,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.844,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9256,
0.9211,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.8544,
0.85,
0.2756,
0.8411,
0.8676
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 22,
"steps": 35,
"score": 0.7608,
"total_reward": 21.3031,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.748,
"adversarial_detections": 5,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.3778,
0.9256,
0.9233,
0.3481,
0.9189,
0.9167,
0.9144,
0.9122,
0.337,
0.9078,
0.3326,
0.9033,
0.9011,
0.3259,
0.8967,
0.3214,
0.8922,
0.89,
0.3148,
0.8856,
0.9228,
0.9184,
0.9139,
0.9095,
0.905,
0.8767,
0.8592
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 23,
"steps": 27,
"score": 0.7184,
"total_reward": 17.2411,
"completion_rate": 0.85,
"detection_rate": 0.5,
"trust_calibration": 0.629,
"adversarial_detections": 4,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.3433,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.8989,
0.8967,
0.0974,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.6449
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 24,
"steps": 46,
"score": 0.7725,
"total_reward": 20.0838,
"completion_rate": 1.0,
"detection_rate": 1.0,
"trust_calibration": 0.836,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.3686,
0.9483,
0.3711,
0.9167,
0.9122,
0.9078,
0.9033,
0.8989,
0.8944,
0.89,
0.8856,
0.8811,
0.8767,
0.3022,
0.8678,
0.8633,
0.2889,
0.8544,
0.85,
0.8456,
0.2711,
0.8367,
0.8322,
0.8591
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 25,
"steps": 34,
"score": 0.6755,
"total_reward": 18.9148,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.71,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.3689,
0.3667,
0.9144,
0.3322,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.3189,
0.3167,
0.8944,
0.3122,
0.31,
0.3078,
0.8856,
0.9011,
0.8967,
0.8922,
0.3078,
0.3033,
0.8789,
0.782
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 26,
"steps": 33,
"score": 0.6816,
"total_reward": 19.0854,
"completion_rate": 0.8,
"detection_rate": 1.0,
"trust_calibration": 0.492,
"adversarial_detections": 3,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9528,
0.9506,
0.9483,
0.3711,
0.8989,
0.3667,
0.3644,
0.9372,
0.935,
0.3578,
0.3556,
0.3533,
0.3289,
0.8967,
0.8944,
0.8922,
0.317,
0.8878,
0.3126,
0.8833,
0.9206,
0.9161,
0.9117,
0.8856,
0.8633,
0.3141,
0.7297
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 27,
"steps": 46,
"score": 0.7256,
"total_reward": 20.3155,
"completion_rate": 0.95,
"detection_rate": 1.0,
"trust_calibration": 0.828,
"adversarial_detections": 0,
"adversarial_poisonings": 0,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.3763,
0.9211,
0.9189,
0.3997,
0.3994,
0.3972,
0.9078,
0.9033,
0.3289,
0.8944,
0.89,
0.3156,
0.8811,
0.8767,
0.8722,
0.8678,
0.8633,
0.8589,
0.2844,
0.85,
0.2756,
0.8411,
0.8367,
0.8322,
0.8395
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 28,
"steps": 27,
"score": 0.7645,
"total_reward": 17.5845,
"completion_rate": 0.9,
"detection_rate": 0.5556,
"trust_calibration": 0.712,
"adversarial_detections": 5,
"adversarial_poisonings": 4,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.9122,
0.91,
0.9078,
0.9056,
0.9033,
0.9011,
0.1019,
0.0997,
0.0974,
0.0952,
0.9295,
0.925,
0.9206,
0.9161,
0.6996
]
},
{
"policy": "trained",
"task_type": "task3",
"seed": 29,
"steps": 29,
"score": 0.761,
"total_reward": 19.0244,
"completion_rate": 0.95,
"detection_rate": 0.625,
"trust_calibration": 0.709,
"adversarial_detections": 5,
"adversarial_poisonings": 3,
"status": "completed",
"difficulty_profile": {
"adaptive": false,
"episodes_seen": 0,
"rolling_detection_rate": 0.0,
"adversarial_threshold": 0.7,
"high_stakes_ratio": 0.35,
"verify_budget_penalty": 0,
"adversary_benign_confidence": 0.88,
"adversary_poison_confidence": 0.92
},
"rewards": [
0.9278,
0.9256,
0.9233,
0.9211,
0.9189,
0.9167,
0.9144,
0.3392,
0.91,
0.9078,
0.9056,
0.9033,
0.3281,
0.8989,
0.8967,
0.8944,
0.0952,
0.093,
0.0908,
0.925,
0.9206,
0.9161,
0.9117,
0.7329
]
}
]
}