diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-math-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..08309e5f427026fc374718aa9c227c2c880f75a9 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae6f2f953a2e0134afe27549123ddbea7e47be8e3fed951e1a1b18caeb97309 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fe479aacc33ba9f4bc05d8b3707894acdf61991 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64bd024d93301f49cca52194ec8c6edba3e14fbbdeb5091be6443686ccd7275b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a5ce045d777a83dede1e5cbf120c6b714591263 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4b82048832400d48c27fc1e4a4b13c873a18f840f97b885d9762606cb166b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..28196616f9639ceb75c6399f945e121e19965948 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-2/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb3efd8564ab67a6fbe332a9161406e64249a6abf96f6a9ef33057368ade6c2 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d83ef23c15da84cd0d6ef37ae0ae1d6a195196b --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd6963468df4eb3950191ce3358d33a869d48c424dc07f8f3da843b1e547073 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ddf6f1fdbd4d70f47c72740c787aed9c1618542 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9031c7954e9f03c66619337bd6770ff6e86afc214bc7d501266ef8d32acf8ef7 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b270d52d43c03cc5d9ed760aaea404264af0a54 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9572c8e08b60b7db34074772c2584142e9797a6f6aa356102b4eb671df33a4ad +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..65948257c93cb5c61fef454877125f408441047c --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-3/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d3b8b8d60ea1c4d3436c336914c04cc633181ea7a2113437b71332e160f3bb +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..3269e6f0c5a2135ee472e38de573fc65b93890b5 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aece7d6f231589a1ec4f5235a6f7759048a559c38b408b285b164bd013094a4e +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f9b9180bc8dee899885b451a808195f7bca7ddf --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba48ea290ffdb230717d1fe569906f069b47924b5e042c29734bc5a58f790ec0 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..d78ec2b84cca681d96c49a89ad366238bfd20e9f --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c74caba96ac45a61856a0321149cca2cc80fc964fad97ffb33e37972e5978c +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..49957f289d2379de47ef1e49aff8a5921e230324 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-1e-4/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d05edef2bdaa268135132c883f4459af025ae5d42ab7d23b18ded44faa74b0b +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_128/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_192/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_256/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_320/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_384/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_448/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_512/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82880e5b3ee9ff5afc8685a78c288fdf152d0d3 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_0.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a4f2c1d2fe318f79337df36e788b212e0969e9ce6d7507af0d9de7144a7bdd +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt new file mode 100644 index 0000000000000000000000000000000000000000..d37a3e66b2462517ced286cad74d7b55f8e93008 --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_1.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fbc77625712e3e59ade3aa4ae98acf935c7d333fc5e03d7e9b02ce6c153661 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt new file mode 100644 index 0000000000000000000000000000000000000000..87e5cb1311c8d76c911e940f661b00c23b5c771d --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_2.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4254f19d405e2c571485764cb421f5d6c312eec3bfeaa1e102f1ffede110d99 +size 2713 diff --git a/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt new file mode 100644 index 0000000000000000000000000000000000000000..b665c102dc0bc568f42c9d8aaf2423845b8a46ab --- /dev/null +++ b/qwen-3b-lora/Qwen/Qwen2.5-3B-Instruct-polaris-GRPO-SGD-5e-3/global_step_64/actor/optim_world_size_4_rank_3.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836ac7a5d5c56bd4ffe55692157cae0749e61e4b3b4b3fd2ccd39885f7a10100 +size 2713