DatPySci commited on
Commit
55699fa
·
verified ·
1 Parent(s): c97bcea

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt +3 -0
  2. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt +3 -0
  3. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt +3 -0
  4. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt +3 -0
  5. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/fsdp_config.json +4 -0
  6. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/config.json +27 -0
  7. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/generation_config.json +6 -0
  8. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json +24 -0
  9. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.json +0 -0
  10. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.model +3 -0
  11. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json +43 -0
  12. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt +3 -0
  13. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt +3 -0
  14. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt +3 -0
  15. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt +3 -0
  16. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt +3 -0
  17. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt +3 -0
  18. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt +3 -0
  19. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt +3 -0
  20. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/data.pt +3 -0
  21. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt +3 -0
  22. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt +3 -0
  23. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt +3 -0
  24. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt +3 -0
  25. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/fsdp_config.json +4 -0
  26. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/config.json +27 -0
  27. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/generation_config.json +6 -0
  28. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json +24 -0
  29. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.json +0 -0
  30. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.model +3 -0
  31. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json +43 -0
  32. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt +3 -0
  33. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt +3 -0
  34. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt +3 -0
  35. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt +3 -0
  36. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt +3 -0
  37. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt +3 -0
  38. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt +3 -0
  39. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt +3 -0
  40. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/data.pt +3 -0
  41. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt +3 -0
  42. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt +3 -0
  43. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt +3 -0
  44. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt +3 -0
  45. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/fsdp_config.json +4 -0
  46. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/config.json +27 -0
  47. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/generation_config.json +6 -0
  48. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json +24 -0
  49. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.json +0 -0
  50. models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.model +3 -0
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e52594085f2834c419cad4c2e090ca52dcd6f44aaf0f85964e0dfccad7f3e9
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced4b0873f5bff0d22a9d07881c350edc848fc69c539714f7c560a626cf2538b
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf1646cbd6b054443bee01d29bcff790fcfd073b96136d65907c6ca5a0b00310
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4b054797c3d8cae7ba7c726a492bc8f99f7921a3c41e23972aad8b6dce519d8
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b63b67daea91f208ecf5b1dc3191e918a5a45ef036cfedf5d462af55857424
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25263959fe3fba722e12ed174353768fd717f98923ff83d9bc2a9249d9a4f929
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14555b6f6dc160df9d8d04c30492de8319e01da0330982652b94d1670fc6b828
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e50c5dc10c55bed84102e1c1c75f9b067000a4b2815fcf581f0bbf699bdee9b5
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475873b866d94a2096a4c55136ae833069514f365efced488689479e84da45da
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b5fc9fdf1233c62ff80b42a06f54add91fea552b0f609709b1d4a51634d3f35
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f8d2f7ea6d983b1ecac4056cda59bb78af9d27b8ad3bb1ef295834d68a3bf6f
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:323007cc2e5bfc1dd59008e8f64c9f7d831b9e527a307d76e9632a70b659931d
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_1000/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c796d145decbe527c0837a07586b707ff00f51b007caefa06c58c9e772f9af7
3
+ size 1492
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaf702286540c18b6fa73179bddf04d6dfbf098439eea6849033528709e84367
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed16f9149bcbf3db8218004285f9e3edafa1be1e66092e39d8b5e7603a923303
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c154864129db82ca383b05ca4facd6fb144a12b2b857c053946574c5b9e6fc7a
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e573691c0a9182271df11cc6e9eafb4366c13d37f656d511cbe1bd2f0834a7
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3de66b369a44f40f7fe23968ea23896c7c0a45d431a8ccfab9dce99394f093b5
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a503c537d71fe133ea1003713ea6359c132b7e16c3ea03071741d6b6deb1185
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04c44282a47e6a99e42fcc4857437ee43f975a130705a34c77310293434faeb
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437b146a5039dfc25aa9852dd4c18d51e585e09db4208f0f4e7dc61d3ad8c57f
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9d24966d7d56c0ecf75f82c2186913d427b3f74339ddea0e749e8fe9204e26
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35eb5a18b072f783a1e160307f28e2932f3c2902211129d16b18b7ba9900b5d4
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2283cf1776d6c0eb3d4d61502fd09e5f1f78cfa6e210d07f31f10c69f04f63
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49d5110fed1dc5c9a694fc04067624bf42faf5240d484cfb424af6c759fb957
3
+ size 2278786874
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_250/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2182280655cacb51b3535095d34ff488c2a59de5928aa64e382cf29090ee1e83
3
+ size 1492
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cdbfdfac642bab167b124d8505210c8fc1947ce4481e8b5241cc4e53356005a
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed08f5edcc4eb4e3e45cf52e13417ba503ffcf0e49ea1ec01dbc83888632fa29
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1c09733dece55d4db1c9da26f25368d2a712b7347dfb0914966f0a81de2a269
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a31c37f2130e6848550f37ca2b66ca664598332fe410912906c9e59eeb6423e
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step12000-GRPO/global_step_500/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723