DatPySci commited on
Commit
df60c7b
·
verified ·
1 Parent(s): 55699fa

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt +3 -0
  2. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt +3 -0
  3. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt +3 -0
  4. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt +3 -0
  5. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/fsdp_config.json +4 -0
  6. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/config.json +27 -0
  7. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/generation_config.json +6 -0
  8. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json +24 -0
  9. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.json +0 -0
  10. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.model +3 -0
  11. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json +43 -0
  12. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt +3 -0
  13. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt +3 -0
  14. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt +3 -0
  15. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt +3 -0
  16. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt +3 -0
  17. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt +3 -0
  18. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt +3 -0
  19. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt +3 -0
  20. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/data.pt +3 -0
  21. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt +3 -0
  22. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt +3 -0
  23. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt +3 -0
  24. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt +3 -0
  25. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/fsdp_config.json +4 -0
  26. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/config.json +27 -0
  27. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/generation_config.json +6 -0
  28. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json +24 -0
  29. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.json +0 -0
  30. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.model +3 -0
  31. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json +43 -0
  32. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt +3 -0
  33. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt +3 -0
  34. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt +3 -0
  35. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt +3 -0
  36. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt +3 -0
  37. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt +3 -0
  38. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt +3 -0
  39. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt +3 -0
  40. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/data.pt +3 -0
  41. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt +3 -0
  42. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt +3 -0
  43. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt +3 -0
  44. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt +3 -0
  45. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/fsdp_config.json +4 -0
  46. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/config.json +27 -0
  47. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/generation_config.json +6 -0
  48. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json +24 -0
  49. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.json +0 -0
  50. models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.model +3 -0
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ef32b0629e0b30bbdb397b53597aa2e25a49add874131dea247b0df72bca03a
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79ad9fc8d772a19aa7933ce95b505db7aafe290b1bb87c22e686ae468eb1acb8
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42bcf184d06a6b3332fc8013ee2cb6818ca684ee7020fb39d9ae6df18d941dd8
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e43e3d64c9c0d0222115cce784d417cf35dc1ab705ed140f182f0b6ae38b4de
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad0d73bf0b511601a43fb98d50e184ef58f6595a39a6dda9d60bf0bafa55f18
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c484a4d6bdbe165301a37aaf103796e594253163a8d6c73e264b745f6358a4b0
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ad6fe76f8d9e2c075745f98222747d4981e0f6513b452ba9c21eeda3a0d5f5
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d58068b1376ec4bb52629bd2a650c7ad7bf7f2ba6def2b4e2855c6f6e20272
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8dcf647cb8a2c50abfaabac25f6bcab8b51a41778b303ed53db88b00702077cb
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c428ec33be1506e0721570d249843d7cb1d4347851e1e5cf861a880d9db5f6d7
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea523b93ff86f87515cd5c25ba37a055d1d686c5945fa781b2f918d8f153570
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/actor/optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c5e67afe258967e898d5659ec89192545addd4192befb3116f776fa70fcb6f
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_1000/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427b58a85ba4be279e72ef011f1bd23c3f5a5bc8e30a64b5d6c103b57ea59d8a
3
+ size 1492
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a45820130574fe7834cd0517c241dbd9002dcf31d01e0f29883a9e6e4d7e1661
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6da9f947eaf434f2cd11e52af4256ecd9315c0397ca6e6f86bc04f67ae2f8be5
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f887a4ac969b3cd71ec62da615bae59d890bb43f740c76ee80dff718ee001d4
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2479c6e7450c04ed7b87d52c879629d33fe5e56cc1d8fff60d63b933c172d150
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/huggingface/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "extra_special_tokens": {},
35
+ "legacy": false,
36
+ "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "</s>",
38
+ "padding_side": "right",
39
+ "sp_model_kwargs": {},
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6ec41a699da9f76949e01a3e1b883a70ec1250ae6659eaa31cf427af0fc3cfc
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:603e588223f39b3d6ad4781bc515b68c56a2c354df0c3cb3b7b009c90b6512bb
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9854ab6bb52b47c7f0386d54f7c0249730ed24050aef7ce7c3ea8400f3ec0aa7
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/model_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f4cea1ec7e384eac774c872d7f301458849e2f9d6c3530279c46f300338f0f
3
+ size 1139342746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d75a88ca52377e3de1df4601642c7ad2c4c2166dfa3e42832fdba87c413450c
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41b07e52ee80f3b3184d9cca7bf3f9076bc8cce6fb34cdbf5f85ba44979d616
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b95ca64220f240fca6e8540f0a1bc79e6f0c883bc813823ae5f02aeedba69a
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/actor/optim_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6a04b89daa83b0d642f29950d28d63aa6165386145a2b001fe6057fad8181d
3
+ size 2278786746
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_250/data.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9c59089524c193d50f366c2c4ea80593f2c1ce90bdafe2244cc45c67b9f4bc9
3
+ size 1492
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f47cf8f463fadbf107624a37f9a76395da4b400a4e629bd60e58c241911a9f4
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_1.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f382d5ff96d18e30fa84172caa7cfa5bd962986c132059e65bde49939632c806
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_2.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f6b3b6643a66656d1108b285a2c589a53094c3a8e621379bf30da489212bf0
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/extra_state_world_size_4_rank_3.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cccb1f5dc44de358675f5f2de593c756271a623ee55352fc1cf13e5d3a164593
3
+ size 14632
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/fsdp_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "FSDP_version": 2,
3
+ "world_size": 4
4
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "OlmoForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "clip_qkv": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 2048,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 8192,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "olmo",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 16,
19
+ "num_key_value_heads": 16,
20
+ "pad_token_id": 2,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": true,
24
+ "transformers_version": "4.57.3",
25
+ "use_cache": true,
26
+ "vocab_size": 32000
27
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": 50279,
4
+ "pad_token_id": 1,
5
+ "transformers_version": "4.57.3"
6
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/OLMo-1B-RL/OLMo-1B-gsm8k-step15000-GRPO/global_step_500/actor/huggingface/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723