youuor7r commited on
Commit
d20e7f0
·
1 Parent(s): 8bc77ef

Add logs and weights via LFS

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. gemma3-1B-ddm/added_tokens.json +4 -0
  3. gemma3-1B-ddm/all_results.json +13 -0
  4. gemma3-1B-ddm/checkpoint-120000/added_tokens.json +4 -0
  5. gemma3-1B-ddm/checkpoint-120000/config.json +34 -0
  6. gemma3-1B-ddm/checkpoint-120000/optimizer.pt +3 -0
  7. gemma3-1B-ddm/checkpoint-120000/pytorch_model.bin +3 -0
  8. gemma3-1B-ddm/checkpoint-120000/rng_state_0.pth +3 -0
  9. gemma3-1B-ddm/checkpoint-120000/rng_state_1.pth +3 -0
  10. gemma3-1B-ddm/checkpoint-120000/rng_state_2.pth +3 -0
  11. gemma3-1B-ddm/checkpoint-120000/rng_state_3.pth +3 -0
  12. gemma3-1B-ddm/checkpoint-120000/scheduler.pt +3 -0
  13. gemma3-1B-ddm/checkpoint-120000/special_tokens_map.json +43 -0
  14. gemma3-1B-ddm/checkpoint-120000/tokenizer.json +3 -0
  15. gemma3-1B-ddm/checkpoint-120000/tokenizer.model +3 -0
  16. gemma3-1B-ddm/checkpoint-120000/tokenizer_config.json +0 -0
  17. gemma3-1B-ddm/checkpoint-120000/trainer_state.json +0 -0
  18. gemma3-1B-ddm/checkpoint-120000/training_args.bin +3 -0
  19. gemma3-1B-ddm/checkpoint-240000/added_tokens.json +4 -0
  20. gemma3-1B-ddm/checkpoint-240000/config.json +34 -0
  21. gemma3-1B-ddm/checkpoint-240000/optimizer.pt +3 -0
  22. gemma3-1B-ddm/checkpoint-240000/pytorch_model.bin +3 -0
  23. gemma3-1B-ddm/checkpoint-240000/rng_state_0.pth +3 -0
  24. gemma3-1B-ddm/checkpoint-240000/rng_state_1.pth +3 -0
  25. gemma3-1B-ddm/checkpoint-240000/rng_state_2.pth +3 -0
  26. gemma3-1B-ddm/checkpoint-240000/rng_state_3.pth +3 -0
  27. gemma3-1B-ddm/checkpoint-240000/scheduler.pt +3 -0
  28. gemma3-1B-ddm/checkpoint-240000/special_tokens_map.json +43 -0
  29. gemma3-1B-ddm/checkpoint-240000/tokenizer.json +3 -0
  30. gemma3-1B-ddm/checkpoint-240000/tokenizer.model +3 -0
  31. gemma3-1B-ddm/checkpoint-240000/tokenizer_config.json +0 -0
  32. gemma3-1B-ddm/checkpoint-240000/trainer_state.json +0 -0
  33. gemma3-1B-ddm/checkpoint-240000/training_args.bin +3 -0
  34. gemma3-1B-ddm/checkpoint-360000/added_tokens.json +4 -0
  35. gemma3-1B-ddm/checkpoint-360000/config.json +34 -0
  36. gemma3-1B-ddm/checkpoint-360000/optimizer.pt +3 -0
  37. gemma3-1B-ddm/checkpoint-360000/pytorch_model.bin +3 -0
  38. gemma3-1B-ddm/checkpoint-360000/rng_state_0.pth +3 -0
  39. gemma3-1B-ddm/checkpoint-360000/rng_state_1.pth +3 -0
  40. gemma3-1B-ddm/checkpoint-360000/rng_state_2.pth +3 -0
  41. gemma3-1B-ddm/checkpoint-360000/rng_state_3.pth +3 -0
  42. gemma3-1B-ddm/checkpoint-360000/scheduler.pt +3 -0
  43. gemma3-1B-ddm/checkpoint-360000/special_tokens_map.json +43 -0
  44. gemma3-1B-ddm/checkpoint-360000/tokenizer.json +3 -0
  45. gemma3-1B-ddm/checkpoint-360000/tokenizer.model +3 -0
  46. gemma3-1B-ddm/checkpoint-360000/tokenizer_config.json +0 -0
  47. gemma3-1B-ddm/checkpoint-360000/trainer_state.json +0 -0
  48. gemma3-1B-ddm/checkpoint-360000/training_args.bin +3 -0
  49. gemma3-1B-ddm/checkpoint-480000/added_tokens.json +4 -0
  50. gemma3-1B-ddm/checkpoint-480000/config.json +34 -0
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  *tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
  *tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ *.jsonl filter=lfs diff=lfs merge=lfs -text
gemma3-1B-ddm/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-ddm/all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 6.96756649017334,
4
+ "eval_runtime": 8.0075,
5
+ "eval_samples_per_second": 0.375,
6
+ "eval_steps_per_second": 0.125,
7
+ "perplexity": 1061.6361026099812,
8
+ "total_flos": 1.6465434123436032e+19,
9
+ "train_loss": 1.934643552494049,
10
+ "train_runtime": 49285.6778,
11
+ "train_samples_per_second": 155.826,
12
+ "train_steps_per_second": 9.739
13
+ }
gemma3-1B-ddm/checkpoint-120000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-ddm/checkpoint-120000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DiscreteDiffusionModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 6912,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "gemma3_text",
19
+ "num_attention_heads": 4,
20
+ "num_hidden_layers": 26,
21
+ "num_key_value_heads": 1,
22
+ "pad_token_id": 0,
23
+ "query_pre_attn_scalar": 256,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_local_base_freq": 10000,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000,
28
+ "sliding_window": 512,
29
+ "sliding_window_pattern": 6,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.50.3",
32
+ "use_cache": true,
33
+ "vocab_size": 262144
34
+ }
gemma3-1B-ddm/checkpoint-120000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:430fdc91ffa83fa4472ea0f3b01ffb9bf1056a70fc3237ad4c11704eda34eb86
3
+ size 7999400602
gemma3-1B-ddm/checkpoint-120000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b01dbd3ec2a0fb70c1b9bd9c0c5a8a3dcacc97b5c4db06e18837264333fa49b
3
+ size 3999670746
gemma3-1B-ddm/checkpoint-120000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e894b7e50fa42075917102d282ab5b9c6306d1d9e60e1fdaa52f656fba147d9
3
+ size 15024
gemma3-1B-ddm/checkpoint-120000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b296ea55c7f783b1daa5e92b0ad057bc4a2d0e55449b0b2be2144ca07f57569d
3
+ size 15024
gemma3-1B-ddm/checkpoint-120000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4140e7db1b230a40394c2fc9604e2bc53e71ec0472f783ef6af3428b27ed6a
3
+ size 15024
gemma3-1B-ddm/checkpoint-120000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dac173b6861602b7be0fbc84d79ba5d5c14651a1aa0db57575d4c3d2c1bcb5d
3
+ size 15024
gemma3-1B-ddm/checkpoint-120000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
3
+ size 1064
gemma3-1B-ddm/checkpoint-120000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-1B-ddm/checkpoint-120000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-1B-ddm/checkpoint-120000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-1B-ddm/checkpoint-120000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-120000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-120000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
3
+ size 5560
gemma3-1B-ddm/checkpoint-240000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-ddm/checkpoint-240000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DiscreteDiffusionModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 6912,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "gemma3_text",
19
+ "num_attention_heads": 4,
20
+ "num_hidden_layers": 26,
21
+ "num_key_value_heads": 1,
22
+ "pad_token_id": 0,
23
+ "query_pre_attn_scalar": 256,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_local_base_freq": 10000,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000,
28
+ "sliding_window": 512,
29
+ "sliding_window_pattern": 6,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.50.3",
32
+ "use_cache": true,
33
+ "vocab_size": 262144
34
+ }
gemma3-1B-ddm/checkpoint-240000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c54e12fc1870292424af15e56b96c876a1c85d5562a60c9e186049eb09b721
3
+ size 7999400602
gemma3-1B-ddm/checkpoint-240000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59e02e1a04e54dda53c0160d00a45dee1880b1cce1e2c914599aea09d74c23e5
3
+ size 3999670746
gemma3-1B-ddm/checkpoint-240000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68c9e3b83ce71f9827dc396ecbd67988cf9ea3d66952c77ee055a422e9b661fd
3
+ size 15024
gemma3-1B-ddm/checkpoint-240000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0960ec35cd3f25dad206f3aa4facdf51b93cd3c5bba76304f53595c5da58bc30
3
+ size 15024
gemma3-1B-ddm/checkpoint-240000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0d19526c545ff371fee64088dc86447f743531f3c38187afb54260ebd50861
3
+ size 15024
gemma3-1B-ddm/checkpoint-240000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:355736795ed0c22e385ae4358c372730a386e0e4c449db70d4d161b9a2128328
3
+ size 15024
gemma3-1B-ddm/checkpoint-240000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125e71a4d2f4439d4912051c36abe8fcb76a0edb9d5fefe8edbadf2cca30613e
3
+ size 1064
gemma3-1B-ddm/checkpoint-240000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-1B-ddm/checkpoint-240000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-1B-ddm/checkpoint-240000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-1B-ddm/checkpoint-240000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-240000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-240000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
3
+ size 5560
gemma3-1B-ddm/checkpoint-360000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-ddm/checkpoint-360000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DiscreteDiffusionModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 6912,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "gemma3_text",
19
+ "num_attention_heads": 4,
20
+ "num_hidden_layers": 26,
21
+ "num_key_value_heads": 1,
22
+ "pad_token_id": 0,
23
+ "query_pre_attn_scalar": 256,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_local_base_freq": 10000,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000,
28
+ "sliding_window": 512,
29
+ "sliding_window_pattern": 6,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.50.3",
32
+ "use_cache": true,
33
+ "vocab_size": 262144
34
+ }
gemma3-1B-ddm/checkpoint-360000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5606663fb2c713f7872faea34b9a04ef2d64b8d66524fc648c6dc16b5f32d0ed
3
+ size 7999400602
gemma3-1B-ddm/checkpoint-360000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd686f1931ecc6545320617d366da822a4cce6f5af12501c4a775a7551ade70d
3
+ size 3999670746
gemma3-1B-ddm/checkpoint-360000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92a967c21dce6495ab72c951463ab42ebf26b2cfaa8dfd1fe15aded1b4f77b89
3
+ size 15024
gemma3-1B-ddm/checkpoint-360000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214be0da0c26984a5c4b665e3ce7b02a22471a0e5b32d021baade307b66e298d
3
+ size 15024
gemma3-1B-ddm/checkpoint-360000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b006ef34673b6ab5b272b75393124ad1f31683a13e6f1f4e2630d779f91f0f3
3
+ size 15024
gemma3-1B-ddm/checkpoint-360000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a045675926bf97f5dbb71ec5c8d3eb2a73978fe65eb1e0978a4489243f38215
3
+ size 15024
gemma3-1B-ddm/checkpoint-360000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5541de360ff9f209dfc8db93cd15be6870a749a69f7a2ec6722274d6424c495b
3
+ size 1064
gemma3-1B-ddm/checkpoint-360000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-1B-ddm/checkpoint-360000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-1B-ddm/checkpoint-360000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-1B-ddm/checkpoint-360000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-360000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-ddm/checkpoint-360000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
3
+ size 5560
gemma3-1B-ddm/checkpoint-480000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-ddm/checkpoint-480000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DiscreteDiffusionModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 6912,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "gemma3_text",
19
+ "num_attention_heads": 4,
20
+ "num_hidden_layers": 26,
21
+ "num_key_value_heads": 1,
22
+ "pad_token_id": 0,
23
+ "query_pre_attn_scalar": 256,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_local_base_freq": 10000,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000,
28
+ "sliding_window": 512,
29
+ "sliding_window_pattern": 6,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.50.3",
32
+ "use_cache": true,
33
+ "vocab_size": 262144
34
+ }