Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.model +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json +43 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/data.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.model +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json +43 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/data.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt +3 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/fsdp_config.json +4 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/config.json +27 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/generation_config.json +6 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json +24 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.json +0 -0
- models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.model +3 -0
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15e52594085f2834c419cad4c2e090ca52dcd6f44aaf0f85964e0dfccad7f3e9
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ced4b0873f5bff0d22a9d07881c350edc848fc69c539714f7c560a626cf2538b
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf1646cbd6b054443bee01d29bcff790fcfd073b96136d65907c6ca5a0b00310
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4b054797c3d8cae7ba7c726a492bc8f99f7921a3c41e23972aad8b6dce519d8
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3b63b67daea91f208ecf5b1dc3191e918a5a45ef036cfedf5d462af55857424
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25263959fe3fba722e12ed174353768fd717f98923ff83d9bc2a9249d9a4f929
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14555b6f6dc160df9d8d04c30492de8319e01da0330982652b94d1670fc6b828
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e50c5dc10c55bed84102e1c1c75f9b067000a4b2815fcf581f0bbf699bdee9b5
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:475873b866d94a2096a4c55136ae833069514f365efced488689479e84da45da
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b5fc9fdf1233c62ff80b42a06f54add91fea552b0f609709b1d4a51634d3f35
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0f8d2f7ea6d983b1ecac4056cda59bb78af9d27b8ad3bb1ef295834d68a3bf6f
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:323007cc2e5bfc1dd59008e8f64c9f7d831b9e527a307d76e9632a70b659931d
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6c796d145decbe527c0837a07586b707ff00f51b007caefa06c58c9e772f9af7
|
| 3 |
+
size 1492
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eaf702286540c18b6fa73179bddf04d6dfbf098439eea6849033528709e84367
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed16f9149bcbf3db8218004285f9e3edafa1be1e66092e39d8b5e7603a923303
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c154864129db82ca383b05ca4facd6fb144a12b2b857c053946574c5b9e6fc7a
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01e573691c0a9182271df11cc6e9eafb4366c13d37f656d511cbe1bd2f0834a7
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
}
|
| 30 |
+
},
|
| 31 |
+
"bos_token": "<s>",
|
| 32 |
+
"clean_up_tokenization_spaces": false,
|
| 33 |
+
"eos_token": "</s>",
|
| 34 |
+
"extra_special_tokens": {},
|
| 35 |
+
"legacy": false,
|
| 36 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 37 |
+
"pad_token": "</s>",
|
| 38 |
+
"padding_side": "right",
|
| 39 |
+
"sp_model_kwargs": {},
|
| 40 |
+
"tokenizer_class": "LlamaTokenizer",
|
| 41 |
+
"unk_token": "<unk>",
|
| 42 |
+
"use_default_system_prompt": false
|
| 43 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3de66b369a44f40f7fe23968ea23896c7c0a45d431a8ccfab9dce99394f093b5
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a503c537d71fe133ea1003713ea6359c132b7e16c3ea03071741d6b6deb1185
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a04c44282a47e6a99e42fcc4857437ee43f975a130705a34c77310293434faeb
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:437b146a5039dfc25aa9852dd4c18d51e585e09db4208f0f4e7dc61d3ad8c57f
|
| 3 |
+
size 1139342746
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a9d24966d7d56c0ecf75f82c2186913d427b3f74339ddea0e749e8fe9204e26
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35eb5a18b072f783a1e160307f28e2932f3c2902211129d16b18b7ba9900b5d4
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c2283cf1776d6c0eb3d4d61502fd09e5f1f78cfa6e210d07f31f10c69f04f63
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d49d5110fed1dc5c9a694fc04067624bf42faf5240d484cfb424af6c759fb957
|
| 3 |
+
size 2278786874
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/data.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2182280655cacb51b3535095d34ff488c2a59de5928aa64e382cf29090ee1e83
|
| 3 |
+
size 1492
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cdbfdfac642bab167b124d8505210c8fc1947ce4481e8b5241cc4e53356005a
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed08f5edcc4eb4e3e45cf52e13417ba503ffcf0e49ea1ec01dbc83888632fa29
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1c09733dece55d4db1c9da26f25368d2a712b7347dfb0914966f0a81de2a269
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a31c37f2130e6848550f37ca2b66ca664598332fe410912906c9e59eeb6423e
|
| 3 |
+
size 14632
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/fsdp_config.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"FSDP_version": 2,
|
| 3 |
+
"world_size": 4
|
| 4 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/config.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"OlmoForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 1,
|
| 8 |
+
"clip_qkv": null,
|
| 9 |
+
"dtype": "float32",
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 2048,
|
| 16 |
+
"model_type": "olmo",
|
| 17 |
+
"num_attention_heads": 16,
|
| 18 |
+
"num_hidden_layers": 16,
|
| 19 |
+
"num_key_value_heads": 16,
|
| 20 |
+
"pad_token_id": 2,
|
| 21 |
+
"rope_scaling": null,
|
| 22 |
+
"rope_theta": 10000.0,
|
| 23 |
+
"tie_word_embeddings": true,
|
| 24 |
+
"transformers_version": "4.57.3",
|
| 25 |
+
"use_cache": true,
|
| 26 |
+
"vocab_size": 32000
|
| 27 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": 50279,
|
| 4 |
+
"pad_token_id": 1,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<s>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "</s>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "</s>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<unk>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": false,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
| 3 |
+
size 499723
|