Add files using upload-large-folder tool

Browse files

Files changed (50) hide show

.hydra/config.yaml +211 -0
seed_1000/iteration_001/agent:Alice_rewards.csv +64 -0
seed_1000/iteration_001/agent:Bob_rewards.csv +64 -0
seed_1000/iteration_001/agent_trainer_log/basic_training_metrics_2025-08-20___08-14-30.json +44 -0
seed_1000/iteration_001/mgid:10345015_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:10754920_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:11645879_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:11816286_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:12802451_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:13550175_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:13655342_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:14228271_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:15733057_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:15865660_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:16317016_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:16358604_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:16527427_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:16830985_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:17030622_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:17339176_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:18768830_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:18955940_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:21242036_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:21773434_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:22674133_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:22908920_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:23104248_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:24698011_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:26297032_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:27954762_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:28124274_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:28146091_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:28169614_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:29594838_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:31696950_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:33566731_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:33816209_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:33855620_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:40496757_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:57359612_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:61016791_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:80373395_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:87817039_rollout_tree.json +0 -0
seed_1000/iteration_001/mgid:92400515_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:11888093_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:12569295_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:21829048_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:22669760_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:29038958_rollout_tree.json +0 -0
seed_1000/iteration_002/mgid:29723555_rollout_tree.json +0 -0

.hydra/config.yaml ADDED Viewed

	@@ -0,0 +1,211 @@

+experiment:
+  name: ipd_prev_ad_align_qwen2.5_7b
+  method: generate_and_train
+  description: no description
+  nb_epochs: 1000
+  nb_matches_per_iteration: 64
+  reinit_matches_each_it: true
+  checkpoint_every_n_iterations: 50
+  start_epoch: 0
+  resume_experiment: true
+  base_seed: 1000
+common_agent_kwargs:
+  max_errors: 0
+  allow_reasoning: false
+  max_reasoning_chars: 0
+  cooperate_string: <A>
+  defect_string: <B>
+  strategy_prompt: ''
+  intro_prompt: "Welcome to an iterated game. You are playing against another agent.\n\
+    \nGame Rules:\n1. In each round, both agents simultaneously choose to either play\
+    \ A or B.\n2. The payoffs for each round are as follows:\n   - If both agents\
+    \ play A, both receive 3 points.\n   - If both agents play B, both receive 1 points.\n\
+    \   - If one agent plays A and the other plays B, the player who plays A receives\
+    \ 0 points and the player who plays B receives 5 points.\n3. The goal is to maximize\
+    \ your total points over all rounds.\n4. You will see the previous play of the\
+    \ other player after the completion of a round.\n\nIn order to play A, you must\
+    \ write \"<A>\" and nothing else.\nIn order to play B, you must write \"<B>\"\
+    \ and nothing else.\n\nDuring the entire game, you must only write \"<A>\" or\
+    \ \"<B>\". If you write anything else, your response will be considered as an\
+    \ error.\n\nThe game starts now:\n"
+  goal_prompt: 'Your goal is to maximize your utility.
+    '
+agent_0_id: Alice
+agent_1_id: Bob
+agent_ids:
+- Alice
+- Bob
+markov_games:
+  runner_method_name: AlternativeActionsRunner
+  runner_kwargs:
+    nb_alternative_actions: 1
+    max_depth: 10
+  simulation_class_name: IPD
+  simulation_init_args:
+    rounds_per_game: 10
+    cooperate_actions:
+    - <A>
+    defect_actions:
+    - <B>
+    agent_ids: ${agent_ids}
+    reward: 3
+    punishment: 1
+    temptation: 5
+    sucker: 0
+  agents:
+    0:
+      agent_id: ${agent_0_id}
+      agent_class_name: IPDAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        max_errors: 0
+        allow_reasoning: false
+        max_reasoning_chars: 0
+        cooperate_string: <A>
+        defect_string: <B>
+        strategy_prompt: ''
+        intro_prompt: "Welcome to an iterated game. You are playing against another\
+          \ agent.\n\nGame Rules:\n1. In each round, both agents simultaneously choose\
+          \ to either play A or B.\n2. The payoffs for each round are as follows:\n\
+          \   - If both agents play A, both receive 3 points.\n   - If both agents\
+          \ play B, both receive 1 points.\n   - If one agent plays A and the other\
+          \ plays B, the player who plays A receives 0 points and the player who plays\
+          \ B receives 5 points.\n3. The goal is to maximize your total points over\
+          \ all rounds.\n4. You will see the previous play of the other player after\
+          \ the completion of a round.\n\nIn order to play A, you must write \"<A>\"\
+          \ and nothing else.\nIn order to play B, you must write \"<B>\" and nothing\
+          \ else.\n\nDuring the entire game, you must only write \"<A>\" or \"<B>\"\
+          . If you write anything else, your response will be considered as an error.\n\
+          \nThe game starts now:\n"
+        goal_prompt: 'Your goal is to maximize your utility.
+          '
+    1:
+      agent_id: ${agent_1_id}
+      agent_class_name: IPDAgent
+      policy_id: base_llm/agent_adapter
+      init_kwargs:
+        max_errors: 0
+        allow_reasoning: false
+        max_reasoning_chars: 0
+        cooperate_string: <A>
+        defect_string: <B>
+        strategy_prompt: ''
+        intro_prompt: "Welcome to an iterated game. You are playing against another\
+          \ agent.\n\nGame Rules:\n1. In each round, both agents simultaneously choose\
+          \ to either play A or B.\n2. The payoffs for each round are as follows:\n\
+          \   - If both agents play A, both receive 3 points.\n   - If both agents\
+          \ play B, both receive 1 points.\n   - If one agent plays A and the other\
+          \ plays B, the player who plays A receives 0 points and the player who plays\
+          \ B receives 5 points.\n3. The goal is to maximize your total points over\
+          \ all rounds.\n4. You will see the previous play of the other player after\
+          \ the completion of a round.\n\nIn order to play A, you must write \"<A>\"\
+          \ and nothing else.\nIn order to play B, you must write \"<B>\" and nothing\
+          \ else.\n\nDuring the entire game, you must only write \"<A>\" or \"<B>\"\
+          . If you write anything else, your response will be considered as an error.\n\
+          \nThe game starts now:\n"
+        goal_prompt: 'Your goal is to maximize your utility.
+          '
+  log_func: log_ipd_match
+  run_batched_matches_args:
+    nb_parallel_matches: -1
+temperature: 1.0
+models:
+  base_llm:
+    class: LeanLocalLLM
+    init_args:
+      llm_id: base_llm
+      model_name: Qwen/Qwen2.5-7B-Instruct
+      inference_backend: vllm
+      hf_kwargs:
+        device_map: auto
+        torch_dtype: bfloat16
+        max_memory:
+          0: 15GiB
+        attn_implementation: flash_attention_2
+      inference_backend_init_kwargs:
+        enable_prefix_caching: true
+        max_model_len: 10000.0
+        gpu_memory_utilization: 0.5
+        dtype: bfloat16
+        trust_remote_code: true
+        max_lora_rank: 32
+      inference_backend_sampling_params:
+        temperature: 1.0
+        top_p: 1.0
+        max_tokens: 400
+        top_k: -1
+      adapter_configs:
+        agent_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+        critic_adapter:
+          task_type: CAUSAL_LM
+          r: 32
+          lora_alpha: 64
+          lora_dropout: 0.0
+          target_modules: all-linear
+critics:
+  agent_critic:
+    module_pointer:
+    - base_llm
+    - critic_adapter
+optimizers:
+  agent_optimizer:
+    module_pointer:
+    - base_llm
+    - agent_adapter
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 1.0e-06
+      weight_decay: 0.0
+  critic_optimizer:
+    module_pointer: agent_critic
+    optimizer_class_name: torch.optim.Adam
+    init_args:
+      lr: 1.0e-06
+      weight_decay: 0.0
+trainers:
+  agent_trainer:
+    class: TrainerAdAlign
+    module_pointers:
+      policy:
+      - base_llm
+      - agent_adapter
+      policy_optimizer: agent_optimizer
+      critic: agent_critic
+      critic_optimizer: critic_optimizer
+    kwargs:
+      entropy_coeff: 0.0
+      kl_coeff: 0.0
+      gradient_clipping: 1.0
+      restrict_tokens: null
+      mini_batch_size: 4
+      use_gradient_checkpointing: true
+      temperature: ${temperature}
+      device: cuda:0
+      use_gae: false
+      skip_discounted_state_visitation: true
+      gae_lambda_for_credits: 0.92
+      gae_lambda_for_targets: 0.92
+      discount_factor: 0.9
+      use_rloo: true
+      enable_tokenwise_logging: false
+      pg_loss_normalization: batch
+      reward_normalizing_constant: 5.0
+      ad_align_force_coop_first_step: false
+      ad_align_clipping: null
+      ad_align_gamma: 0.9
+      ad_align_exclude_k_equals_t: false
+      ad_align_use_sign: false
+      ad_align_beta: 0.1
+      use_old_ad_align: true
+      use_time_regularization: false
+      rloo_branch: false
+train_on_which_data:
+  agent_trainer: ${agent_ids}

seed_1000/iteration_001/agent:Alice_rewards.csv ADDED Viewed

	@@ -0,0 +1,64 @@

+1,5,1,5,1,5,1,5,1,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,1,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3
+1,5,1,5,1,5,1,5,1,5
+1,5,0,1,5,0,1,5,0,1
+1,0,1,3,1,5,0,1,5,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,5,1,0,0,0,0,0,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,1,5,1,5,1,5,1,5
+1,5,0,1,5,0,1,5,0,1
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,5,1,0,5,1,0,5,1
+1,5,1,5,1,5,1,5,1,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,3,5,1,0,3,5,1,0
+1,0,1,0,1,0,1,0,1,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,0,1,5,0,1,5,0,1
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,5,1,0,5,1,0,5,1
+1,0,5,1,0,5,1,0,5,1
+1,3,1,3,1,3,1,3,1,3
+1,5,1,3,0,1,3,0,1,3
+1,5,1,5,1,5,1,5,1,5
+1,5,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3
+1,0,1,0,1,0,1,0,1,0
+1,3,1,5,0,1,5,0,1,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,1,1,1,1,1,1,1,1
+1,1,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3

seed_1000/iteration_001/agent:Bob_rewards.csv ADDED Viewed

	@@ -0,0 +1,64 @@

+1,0,1,0,1,0,1,0,1,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,1,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3
+1,0,1,0,1,0,1,0,1,0
+1,0,5,1,0,5,1,0,5,1
+1,5,1,3,1,0,5,1,0,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,0,1,5,5,5,5,5,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,1,0,1,0,1,0,1,0
+1,0,5,1,0,5,1,0,5,1
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,0,1,5,0,1,5,0,1
+1,0,1,0,1,0,1,0,1,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,3,0,1,5,3,0,1,5
+1,5,1,5,1,5,1,5,1,5
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,5,1,0,5,1,0,5,1
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,5,0,1,5,0,1,5,0,1
+1,5,0,1,5,0,1,5,0,1
+1,3,1,3,1,3,1,3,1,3
+1,0,1,3,5,1,3,5,1,3
+1,0,1,0,1,0,1,0,1,0
+1,0,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3
+1,5,1,5,1,5,1,5,1,5
+1,3,1,0,5,1,0,5,1,0
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,3,1,3,1,3,1,3,1,3
+1,0,1,1,1,1,1,1,1,1
+1,1,1,1,1,1,1,1,1,1
+1,3,1,3,1,3,1,3,1,3

seed_1000/iteration_001/agent_trainer_log/basic_training_metrics_2025-08-20___08-14-30.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+    "nb_rollouts": [
+        128
+    ],
+    "loss_mb_total": {
+        "value_mb_total": [
+            0.39206385612487793,
+            -0.016617843881249428,
+            -0.1133829802274704,
+            0.0943688377737999,
+            -3.867032289505005,
+            -0.8449669480323792,
+            -0.37973618507385254,
+            -2.167485237121582,
+            0.25572967529296875,
+            -0.37973612546920776,
+            0.14474299550056458,
+            -0.37973615527153015,
+            -0.06369704753160477,
+            -3.867032289505005,
+            -0.5333160161972046,
+            -0.5891353487968445,
+            1.1343384981155396,
+            -0.12959033250808716,
+            0.42394113540649414,
+            0.663422167301178,
+            -0.15981315076351166,
+            -2.8817927837371826,
+            -2.994982957839966,
+            1.0230556726455688,
+            0.8202357888221741,
+            -2.994982957839966,
+            0.027223097160458565,
+            -2.994982957839966,
+            0.4894254505634308,
+            -0.15981315076351166,
+            0.7859982252120972,
+            -3.225872278213501
+        ]
+    },
+    "gradient_norm": [
+        27.744529724121094
+    ]
+}

seed_1000/iteration_001/mgid:10345015_rollout_tree.json ADDED Viewed