youuor7r commited on
Commit
17e4d32
·
0 Parent(s):

Clean initial commit with LFS

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +37 -0
  2. README.md +3 -0
  3. gemma3-1B-ddm/trainer_log.jsonl +0 -0
  4. gemma3-1B-pt/checkpoint-120000/added_tokens.json +4 -0
  5. gemma3-1B-pt/checkpoint-120000/config.json +34 -0
  6. gemma3-1B-pt/checkpoint-120000/generation_config.json +13 -0
  7. gemma3-1B-pt/checkpoint-120000/optimizer.pt +3 -0
  8. gemma3-1B-pt/checkpoint-120000/pytorch_model.bin +3 -0
  9. gemma3-1B-pt/checkpoint-120000/rng_state_0.pth +3 -0
  10. gemma3-1B-pt/checkpoint-120000/rng_state_1.pth +3 -0
  11. gemma3-1B-pt/checkpoint-120000/rng_state_2.pth +3 -0
  12. gemma3-1B-pt/checkpoint-120000/rng_state_3.pth +3 -0
  13. gemma3-1B-pt/checkpoint-120000/scheduler.pt +3 -0
  14. gemma3-1B-pt/checkpoint-120000/special_tokens_map.json +43 -0
  15. gemma3-1B-pt/checkpoint-120000/tokenizer.json +3 -0
  16. gemma3-1B-pt/checkpoint-120000/tokenizer.model +3 -0
  17. gemma3-1B-pt/checkpoint-120000/tokenizer_config.json +0 -0
  18. gemma3-1B-pt/checkpoint-120000/trainer_state.json +0 -0
  19. gemma3-1B-pt/checkpoint-120000/training_args.bin +3 -0
  20. gemma3-1B-pt/trainer_log.jsonl +0 -0
  21. gemma3-270m-ddm/added_tokens.json +4 -0
  22. gemma3-270m-ddm/all_results.json +13 -0
  23. gemma3-270m-ddm/checkpoint-120000/added_tokens.json +4 -0
  24. gemma3-270m-ddm/checkpoint-120000/config.json +56 -0
  25. gemma3-270m-ddm/checkpoint-120000/optimizer.pt +3 -0
  26. gemma3-270m-ddm/checkpoint-120000/pytorch_model.bin +3 -0
  27. gemma3-270m-ddm/checkpoint-120000/rng_state.pth +3 -0
  28. gemma3-270m-ddm/checkpoint-120000/scheduler.pt +3 -0
  29. gemma3-270m-ddm/checkpoint-120000/special_tokens_map.json +43 -0
  30. gemma3-270m-ddm/checkpoint-120000/tokenizer.json +3 -0
  31. gemma3-270m-ddm/checkpoint-120000/tokenizer.model +3 -0
  32. gemma3-270m-ddm/checkpoint-120000/tokenizer_config.json +0 -0
  33. gemma3-270m-ddm/checkpoint-120000/trainer_state.json +0 -0
  34. gemma3-270m-ddm/checkpoint-120000/training_args.bin +3 -0
  35. gemma3-270m-ddm/checkpoint-240000/added_tokens.json +4 -0
  36. gemma3-270m-ddm/checkpoint-240000/config.json +56 -0
  37. gemma3-270m-ddm/checkpoint-240000/optimizer.pt +3 -0
  38. gemma3-270m-ddm/checkpoint-240000/pytorch_model.bin +3 -0
  39. gemma3-270m-ddm/checkpoint-240000/rng_state.pth +3 -0
  40. gemma3-270m-ddm/checkpoint-240000/scheduler.pt +3 -0
  41. gemma3-270m-ddm/checkpoint-240000/special_tokens_map.json +43 -0
  42. gemma3-270m-ddm/checkpoint-240000/tokenizer.json +3 -0
  43. gemma3-270m-ddm/checkpoint-240000/tokenizer.model +3 -0
  44. gemma3-270m-ddm/checkpoint-240000/tokenizer_config.json +0 -0
  45. gemma3-270m-ddm/checkpoint-240000/trainer_state.json +0 -0
  46. gemma3-270m-ddm/checkpoint-240000/training_args.bin +3 -0
  47. gemma3-270m-ddm/checkpoint-360000/added_tokens.json +4 -0
  48. gemma3-270m-ddm/checkpoint-360000/config.json +56 -0
  49. gemma3-270m-ddm/checkpoint-360000/optimizer.pt +3 -0
  50. gemma3-270m-ddm/checkpoint-360000/pytorch_model.bin +3 -0
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ *tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
gemma3-1B-ddm/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-pt/checkpoint-120000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-1B-pt/checkpoint-120000/config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma3ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": null,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": null,
12
+ "head_dim": 256,
13
+ "hidden_activation": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 6912,
17
+ "max_position_embeddings": 32768,
18
+ "model_type": "gemma3_text",
19
+ "num_attention_heads": 4,
20
+ "num_hidden_layers": 26,
21
+ "num_key_value_heads": 1,
22
+ "pad_token_id": 0,
23
+ "query_pre_attn_scalar": 256,
24
+ "rms_norm_eps": 1e-06,
25
+ "rope_local_base_freq": 10000,
26
+ "rope_scaling": null,
27
+ "rope_theta": 1000000,
28
+ "sliding_window": 512,
29
+ "sliding_window_pattern": 6,
30
+ "torch_dtype": "float32",
31
+ "transformers_version": "4.50.3",
32
+ "use_cache": false,
33
+ "vocab_size": 262146
34
+ }
gemma3-1B-pt/checkpoint-120000/generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 2,
3
+ "cache_implementation": "hybrid",
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 1,
7
+ 106
8
+ ],
9
+ "pad_token_id": 0,
10
+ "top_k": 64,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.50.3"
13
+ }
gemma3-1B-pt/checkpoint-120000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8a92151db63a81a698d230e1b20dec67caa1a26870a7a8bd2fd5f402f62327
3
+ size 7999400602
gemma3-1B-pt/checkpoint-120000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:324b917d15865905000bc576786eeda6e6180ee9c697c17db5933983253cae70
3
+ size 3999668058
gemma3-1B-pt/checkpoint-120000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b5fa4985ea13e683f6cfcd107c46260dfd8a9a5411fcb6ecf90747bafbae7b6
3
+ size 15024
gemma3-1B-pt/checkpoint-120000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a391ee6404bb07343ff7b9ec9578ba2e77224ea20f62687c8cd109e35c607754
3
+ size 15024
gemma3-1B-pt/checkpoint-120000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3b87ab1f04ccb08812d6d65103ffcb7504a5c5d8dbcead8ccbedbbd3f42242
3
+ size 15024
gemma3-1B-pt/checkpoint-120000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e9c65e9398d2f42b1d82790dfb178f12b9e7d25fea491c48aa2cb96a324ce9
3
+ size 15024
gemma3-1B-pt/checkpoint-120000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
3
+ size 1064
gemma3-1B-pt/checkpoint-120000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-1B-pt/checkpoint-120000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-1B-pt/checkpoint-120000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-1B-pt/checkpoint-120000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-pt/checkpoint-120000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-1B-pt/checkpoint-120000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:319ce1c7ab697eca8dcb5848c93a6b3f91f03bb2ecf75c55ac3763290641c176
3
+ size 5560
gemma3-1B-pt/trainer_log.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-270m-ddm/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-270m-ddm/all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 10.049214363098145,
4
+ "eval_runtime": 7.0093,
5
+ "eval_samples_per_second": 1.427,
6
+ "eval_steps_per_second": 0.428,
7
+ "perplexity": 23137.601949056367,
8
+ "total_flos": 5.9174692061184e+17,
9
+ "train_loss": 9.62966806418101,
10
+ "train_runtime": 83147.3938,
11
+ "train_samples_per_second": 23.092,
12
+ "train_steps_per_second": 5.773
13
+ }
gemma3-270m-ddm/checkpoint-120000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-270m-ddm/checkpoint-120000/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "DiscreteDiffusionModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "sliding_window_pattern": 6,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.50.3",
53
+ "use_bidirectional_attention": false,
54
+ "use_cache": true,
55
+ "vocab_size": 262144
56
+ }
gemma3-270m-ddm/checkpoint-120000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e18ab8cf1dd72162887ceddc753d433bda169e7d846e39d368a81563c3fe937
3
+ size 2144993594
gemma3-270m-ddm/checkpoint-120000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ca1ce5e21b0de29aede5a6ded446371f672b692f489540fc9600f870a45317
3
+ size 1072479482
gemma3-270m-ddm/checkpoint-120000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d56b981fbeeb6b3ce4c934fb4192fb848ef9132e287b0c4bb648efe597ad4418
3
+ size 14244
gemma3-270m-ddm/checkpoint-120000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
3
+ size 1064
gemma3-270m-ddm/checkpoint-120000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-270m-ddm/checkpoint-120000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-270m-ddm/checkpoint-120000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-270m-ddm/checkpoint-120000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-270m-ddm/checkpoint-120000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-270m-ddm/checkpoint-120000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32fd4894f278427918119990b13eb00d0dc868801bb0657770dc401403f32b22
3
+ size 5560
gemma3-270m-ddm/checkpoint-240000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-270m-ddm/checkpoint-240000/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "DiscreteDiffusionModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "sliding_window_pattern": 6,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.50.3",
53
+ "use_bidirectional_attention": false,
54
+ "use_cache": true,
55
+ "vocab_size": 262144
56
+ }
gemma3-270m-ddm/checkpoint-240000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:571d6bca660c39a1203f10b8cf646e5998bec4503e222eb488bf0c255489337f
3
+ size 2144993594
gemma3-270m-ddm/checkpoint-240000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848669456fd3423f3d625c78bea22dde5526d6d8c4141376b983af90e7249914
3
+ size 1072479482
gemma3-270m-ddm/checkpoint-240000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f91ad6dea72095bead20cef99aae007cbafa1ca2ea31bd6504f91f84388d5b6
3
+ size 14244
gemma3-270m-ddm/checkpoint-240000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125e71a4d2f4439d4912051c36abe8fcb76a0edb9d5fefe8edbadf2cca30613e
3
+ size 1064
gemma3-270m-ddm/checkpoint-240000/special_tokens_map.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "[MASK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "boi_token": "<start_of_image>",
12
+ "bos_token": {
13
+ "content": "<bos>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eoi_token": "<end_of_image>",
20
+ "eos_token": {
21
+ "content": "<eos>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "image_token": "<image_soft_token>",
28
+ "mask_token": "[MASK]",
29
+ "pad_token": {
30
+ "content": "<pad>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "unk_token": {
37
+ "content": "<unk>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ }
43
+ }
gemma3-270m-ddm/checkpoint-240000/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
3
+ size 33384751
gemma3-270m-ddm/checkpoint-240000/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
3
+ size 4689074
gemma3-270m-ddm/checkpoint-240000/tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-270m-ddm/checkpoint-240000/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
gemma3-270m-ddm/checkpoint-240000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32fd4894f278427918119990b13eb00d0dc868801bb0657770dc401403f32b22
3
+ size 5560
gemma3-270m-ddm/checkpoint-360000/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<image_soft_token>": 262144,
3
+ "[MASK]": 262145
4
+ }
gemma3-270m-ddm/checkpoint-360000/config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_sliding_window_pattern": 6,
3
+ "architectures": [
4
+ "DiscreteDiffusionModel"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "attn_logit_softcapping": null,
9
+ "bos_token_id": 2,
10
+ "cache_implementation": "hybrid",
11
+ "eos_token_id": 1,
12
+ "final_logit_softcapping": null,
13
+ "head_dim": 256,
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 640,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 2048,
18
+ "layer_types": [
19
+ "sliding_attention",
20
+ "sliding_attention",
21
+ "sliding_attention",
22
+ "sliding_attention",
23
+ "sliding_attention",
24
+ "full_attention",
25
+ "sliding_attention",
26
+ "sliding_attention",
27
+ "sliding_attention",
28
+ "sliding_attention",
29
+ "sliding_attention",
30
+ "full_attention",
31
+ "sliding_attention",
32
+ "sliding_attention",
33
+ "sliding_attention",
34
+ "sliding_attention",
35
+ "sliding_attention",
36
+ "full_attention"
37
+ ],
38
+ "max_position_embeddings": 32768,
39
+ "model_type": "gemma3_text",
40
+ "num_attention_heads": 4,
41
+ "num_hidden_layers": 18,
42
+ "num_key_value_heads": 1,
43
+ "pad_token_id": 0,
44
+ "query_pre_attn_scalar": 256,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_local_base_freq": 10000.0,
47
+ "rope_scaling": null,
48
+ "rope_theta": 1000000.0,
49
+ "sliding_window": 512,
50
+ "sliding_window_pattern": 6,
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.50.3",
53
+ "use_bidirectional_attention": false,
54
+ "use_cache": true,
55
+ "vocab_size": 262144
56
+ }
gemma3-270m-ddm/checkpoint-360000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01087dc495739e657664892673626e968813189351bf655d830e018f7efda58a
3
+ size 2144993594
gemma3-270m-ddm/checkpoint-360000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5790e80e0075c33f208358b7cf3d9b34b342a9b14c6cbdd3c22255a0625cc32b
3
+ size 1072479482