Add logs and weights via LFS
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +1 -0
- gemma3-1B-ddm/added_tokens.json +4 -0
- gemma3-1B-ddm/all_results.json +13 -0
- gemma3-1B-ddm/checkpoint-120000/added_tokens.json +4 -0
- gemma3-1B-ddm/checkpoint-120000/config.json +34 -0
- gemma3-1B-ddm/checkpoint-120000/optimizer.pt +3 -0
- gemma3-1B-ddm/checkpoint-120000/pytorch_model.bin +3 -0
- gemma3-1B-ddm/checkpoint-120000/rng_state_0.pth +3 -0
- gemma3-1B-ddm/checkpoint-120000/rng_state_1.pth +3 -0
- gemma3-1B-ddm/checkpoint-120000/rng_state_2.pth +3 -0
- gemma3-1B-ddm/checkpoint-120000/rng_state_3.pth +3 -0
- gemma3-1B-ddm/checkpoint-120000/scheduler.pt +3 -0
- gemma3-1B-ddm/checkpoint-120000/special_tokens_map.json +43 -0
- gemma3-1B-ddm/checkpoint-120000/tokenizer.json +3 -0
- gemma3-1B-ddm/checkpoint-120000/tokenizer.model +3 -0
- gemma3-1B-ddm/checkpoint-120000/tokenizer_config.json +0 -0
- gemma3-1B-ddm/checkpoint-120000/trainer_state.json +0 -0
- gemma3-1B-ddm/checkpoint-120000/training_args.bin +3 -0
- gemma3-1B-ddm/checkpoint-240000/added_tokens.json +4 -0
- gemma3-1B-ddm/checkpoint-240000/config.json +34 -0
- gemma3-1B-ddm/checkpoint-240000/optimizer.pt +3 -0
- gemma3-1B-ddm/checkpoint-240000/pytorch_model.bin +3 -0
- gemma3-1B-ddm/checkpoint-240000/rng_state_0.pth +3 -0
- gemma3-1B-ddm/checkpoint-240000/rng_state_1.pth +3 -0
- gemma3-1B-ddm/checkpoint-240000/rng_state_2.pth +3 -0
- gemma3-1B-ddm/checkpoint-240000/rng_state_3.pth +3 -0
- gemma3-1B-ddm/checkpoint-240000/scheduler.pt +3 -0
- gemma3-1B-ddm/checkpoint-240000/special_tokens_map.json +43 -0
- gemma3-1B-ddm/checkpoint-240000/tokenizer.json +3 -0
- gemma3-1B-ddm/checkpoint-240000/tokenizer.model +3 -0
- gemma3-1B-ddm/checkpoint-240000/tokenizer_config.json +0 -0
- gemma3-1B-ddm/checkpoint-240000/trainer_state.json +0 -0
- gemma3-1B-ddm/checkpoint-240000/training_args.bin +3 -0
- gemma3-1B-ddm/checkpoint-360000/added_tokens.json +4 -0
- gemma3-1B-ddm/checkpoint-360000/config.json +34 -0
- gemma3-1B-ddm/checkpoint-360000/optimizer.pt +3 -0
- gemma3-1B-ddm/checkpoint-360000/pytorch_model.bin +3 -0
- gemma3-1B-ddm/checkpoint-360000/rng_state_0.pth +3 -0
- gemma3-1B-ddm/checkpoint-360000/rng_state_1.pth +3 -0
- gemma3-1B-ddm/checkpoint-360000/rng_state_2.pth +3 -0
- gemma3-1B-ddm/checkpoint-360000/rng_state_3.pth +3 -0
- gemma3-1B-ddm/checkpoint-360000/scheduler.pt +3 -0
- gemma3-1B-ddm/checkpoint-360000/special_tokens_map.json +43 -0
- gemma3-1B-ddm/checkpoint-360000/tokenizer.json +3 -0
- gemma3-1B-ddm/checkpoint-360000/tokenizer.model +3 -0
- gemma3-1B-ddm/checkpoint-360000/tokenizer_config.json +0 -0
- gemma3-1B-ddm/checkpoint-360000/trainer_state.json +0 -0
- gemma3-1B-ddm/checkpoint-360000/training_args.bin +3 -0
- gemma3-1B-ddm/checkpoint-480000/added_tokens.json +4 -0
- gemma3-1B-ddm/checkpoint-480000/config.json +34 -0
.gitattributes
CHANGED
|
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
*tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
*.jsonl filter=lfs diff=lfs merge=lfs -text
|
gemma3-1B-ddm/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144,
|
| 3 |
+
"[MASK]": 262145
|
| 4 |
+
}
|
gemma3-1B-ddm/all_results.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1.0,
|
| 3 |
+
"eval_loss": 6.96756649017334,
|
| 4 |
+
"eval_runtime": 8.0075,
|
| 5 |
+
"eval_samples_per_second": 0.375,
|
| 6 |
+
"eval_steps_per_second": 0.125,
|
| 7 |
+
"perplexity": 1061.6361026099812,
|
| 8 |
+
"total_flos": 1.6465434123436032e+19,
|
| 9 |
+
"train_loss": 1.934643552494049,
|
| 10 |
+
"train_runtime": 49285.6778,
|
| 11 |
+
"train_samples_per_second": 155.826,
|
| 12 |
+
"train_steps_per_second": 9.739
|
| 13 |
+
}
|
gemma3-1B-ddm/checkpoint-120000/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144,
|
| 3 |
+
"[MASK]": 262145
|
| 4 |
+
}
|
gemma3-1B-ddm/checkpoint-120000/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DiscreteDiffusionModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"attn_logit_softcapping": null,
|
| 8 |
+
"bos_token_id": 2,
|
| 9 |
+
"cache_implementation": "hybrid",
|
| 10 |
+
"eos_token_id": 1,
|
| 11 |
+
"final_logit_softcapping": null,
|
| 12 |
+
"head_dim": 256,
|
| 13 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 14 |
+
"hidden_size": 1152,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 6912,
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"model_type": "gemma3_text",
|
| 19 |
+
"num_attention_heads": 4,
|
| 20 |
+
"num_hidden_layers": 26,
|
| 21 |
+
"num_key_value_heads": 1,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"query_pre_attn_scalar": 256,
|
| 24 |
+
"rms_norm_eps": 1e-06,
|
| 25 |
+
"rope_local_base_freq": 10000,
|
| 26 |
+
"rope_scaling": null,
|
| 27 |
+
"rope_theta": 1000000,
|
| 28 |
+
"sliding_window": 512,
|
| 29 |
+
"sliding_window_pattern": 6,
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.50.3",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 262144
|
| 34 |
+
}
|
gemma3-1B-ddm/checkpoint-120000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:430fdc91ffa83fa4472ea0f3b01ffb9bf1056a70fc3237ad4c11704eda34eb86
|
| 3 |
+
size 7999400602
|
gemma3-1B-ddm/checkpoint-120000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b01dbd3ec2a0fb70c1b9bd9c0c5a8a3dcacc97b5c4db06e18837264333fa49b
|
| 3 |
+
size 3999670746
|
gemma3-1B-ddm/checkpoint-120000/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e894b7e50fa42075917102d282ab5b9c6306d1d9e60e1fdaa52f656fba147d9
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-120000/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b296ea55c7f783b1daa5e92b0ad057bc4a2d0e55449b0b2be2144ca07f57569d
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-120000/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e4140e7db1b230a40394c2fc9604e2bc53e71ec0472f783ef6af3428b27ed6a
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-120000/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3dac173b6861602b7be0fbc84d79ba5d5c14651a1aa0db57575d4c3d2c1bcb5d
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-120000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
|
| 3 |
+
size 1064
|
gemma3-1B-ddm/checkpoint-120000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "[MASK]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
}
|
| 10 |
+
],
|
| 11 |
+
"boi_token": "<start_of_image>",
|
| 12 |
+
"bos_token": {
|
| 13 |
+
"content": "<bos>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eoi_token": "<end_of_image>",
|
| 20 |
+
"eos_token": {
|
| 21 |
+
"content": "<eos>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false
|
| 26 |
+
},
|
| 27 |
+
"image_token": "<image_soft_token>",
|
| 28 |
+
"mask_token": "[MASK]",
|
| 29 |
+
"pad_token": {
|
| 30 |
+
"content": "<pad>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false
|
| 35 |
+
},
|
| 36 |
+
"unk_token": {
|
| 37 |
+
"content": "<unk>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false
|
| 42 |
+
}
|
| 43 |
+
}
|
gemma3-1B-ddm/checkpoint-120000/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
|
| 3 |
+
size 33384751
|
gemma3-1B-ddm/checkpoint-120000/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
gemma3-1B-ddm/checkpoint-120000/tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-120000/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-120000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
|
| 3 |
+
size 5560
|
gemma3-1B-ddm/checkpoint-240000/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144,
|
| 3 |
+
"[MASK]": 262145
|
| 4 |
+
}
|
gemma3-1B-ddm/checkpoint-240000/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DiscreteDiffusionModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"attn_logit_softcapping": null,
|
| 8 |
+
"bos_token_id": 2,
|
| 9 |
+
"cache_implementation": "hybrid",
|
| 10 |
+
"eos_token_id": 1,
|
| 11 |
+
"final_logit_softcapping": null,
|
| 12 |
+
"head_dim": 256,
|
| 13 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 14 |
+
"hidden_size": 1152,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 6912,
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"model_type": "gemma3_text",
|
| 19 |
+
"num_attention_heads": 4,
|
| 20 |
+
"num_hidden_layers": 26,
|
| 21 |
+
"num_key_value_heads": 1,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"query_pre_attn_scalar": 256,
|
| 24 |
+
"rms_norm_eps": 1e-06,
|
| 25 |
+
"rope_local_base_freq": 10000,
|
| 26 |
+
"rope_scaling": null,
|
| 27 |
+
"rope_theta": 1000000,
|
| 28 |
+
"sliding_window": 512,
|
| 29 |
+
"sliding_window_pattern": 6,
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.50.3",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 262144
|
| 34 |
+
}
|
gemma3-1B-ddm/checkpoint-240000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34c54e12fc1870292424af15e56b96c876a1c85d5562a60c9e186049eb09b721
|
| 3 |
+
size 7999400602
|
gemma3-1B-ddm/checkpoint-240000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59e02e1a04e54dda53c0160d00a45dee1880b1cce1e2c914599aea09d74c23e5
|
| 3 |
+
size 3999670746
|
gemma3-1B-ddm/checkpoint-240000/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68c9e3b83ce71f9827dc396ecbd67988cf9ea3d66952c77ee055a422e9b661fd
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-240000/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0960ec35cd3f25dad206f3aa4facdf51b93cd3c5bba76304f53595c5da58bc30
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-240000/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b0d19526c545ff371fee64088dc86447f743531f3c38187afb54260ebd50861
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-240000/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:355736795ed0c22e385ae4358c372730a386e0e4c449db70d4d161b9a2128328
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-240000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:125e71a4d2f4439d4912051c36abe8fcb76a0edb9d5fefe8edbadf2cca30613e
|
| 3 |
+
size 1064
|
gemma3-1B-ddm/checkpoint-240000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "[MASK]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
}
|
| 10 |
+
],
|
| 11 |
+
"boi_token": "<start_of_image>",
|
| 12 |
+
"bos_token": {
|
| 13 |
+
"content": "<bos>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eoi_token": "<end_of_image>",
|
| 20 |
+
"eos_token": {
|
| 21 |
+
"content": "<eos>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false
|
| 26 |
+
},
|
| 27 |
+
"image_token": "<image_soft_token>",
|
| 28 |
+
"mask_token": "[MASK]",
|
| 29 |
+
"pad_token": {
|
| 30 |
+
"content": "<pad>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false
|
| 35 |
+
},
|
| 36 |
+
"unk_token": {
|
| 37 |
+
"content": "<unk>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false
|
| 42 |
+
}
|
| 43 |
+
}
|
gemma3-1B-ddm/checkpoint-240000/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
|
| 3 |
+
size 33384751
|
gemma3-1B-ddm/checkpoint-240000/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
gemma3-1B-ddm/checkpoint-240000/tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-240000/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-240000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
|
| 3 |
+
size 5560
|
gemma3-1B-ddm/checkpoint-360000/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144,
|
| 3 |
+
"[MASK]": 262145
|
| 4 |
+
}
|
gemma3-1B-ddm/checkpoint-360000/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DiscreteDiffusionModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"attn_logit_softcapping": null,
|
| 8 |
+
"bos_token_id": 2,
|
| 9 |
+
"cache_implementation": "hybrid",
|
| 10 |
+
"eos_token_id": 1,
|
| 11 |
+
"final_logit_softcapping": null,
|
| 12 |
+
"head_dim": 256,
|
| 13 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 14 |
+
"hidden_size": 1152,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 6912,
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"model_type": "gemma3_text",
|
| 19 |
+
"num_attention_heads": 4,
|
| 20 |
+
"num_hidden_layers": 26,
|
| 21 |
+
"num_key_value_heads": 1,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"query_pre_attn_scalar": 256,
|
| 24 |
+
"rms_norm_eps": 1e-06,
|
| 25 |
+
"rope_local_base_freq": 10000,
|
| 26 |
+
"rope_scaling": null,
|
| 27 |
+
"rope_theta": 1000000,
|
| 28 |
+
"sliding_window": 512,
|
| 29 |
+
"sliding_window_pattern": 6,
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.50.3",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 262144
|
| 34 |
+
}
|
gemma3-1B-ddm/checkpoint-360000/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5606663fb2c713f7872faea34b9a04ef2d64b8d66524fc648c6dc16b5f32d0ed
|
| 3 |
+
size 7999400602
|
gemma3-1B-ddm/checkpoint-360000/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd686f1931ecc6545320617d366da822a4cce6f5af12501c4a775a7551ade70d
|
| 3 |
+
size 3999670746
|
gemma3-1B-ddm/checkpoint-360000/rng_state_0.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92a967c21dce6495ab72c951463ab42ebf26b2cfaa8dfd1fe15aded1b4f77b89
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-360000/rng_state_1.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:214be0da0c26984a5c4b665e3ce7b02a22471a0e5b32d021baade307b66e298d
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-360000/rng_state_2.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b006ef34673b6ab5b272b75393124ad1f31683a13e6f1f4e2630d779f91f0f3
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-360000/rng_state_3.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a045675926bf97f5dbb71ec5c8d3eb2a73978fe65eb1e0978a4489243f38215
|
| 3 |
+
size 15024
|
gemma3-1B-ddm/checkpoint-360000/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5541de360ff9f209dfc8db93cd15be6870a749a69f7a2ec6722274d6424c495b
|
| 3 |
+
size 1064
|
gemma3-1B-ddm/checkpoint-360000/special_tokens_map.json
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "[MASK]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
}
|
| 10 |
+
],
|
| 11 |
+
"boi_token": "<start_of_image>",
|
| 12 |
+
"bos_token": {
|
| 13 |
+
"content": "<bos>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false
|
| 18 |
+
},
|
| 19 |
+
"eoi_token": "<end_of_image>",
|
| 20 |
+
"eos_token": {
|
| 21 |
+
"content": "<eos>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false
|
| 26 |
+
},
|
| 27 |
+
"image_token": "<image_soft_token>",
|
| 28 |
+
"mask_token": "[MASK]",
|
| 29 |
+
"pad_token": {
|
| 30 |
+
"content": "<pad>",
|
| 31 |
+
"lstrip": false,
|
| 32 |
+
"normalized": false,
|
| 33 |
+
"rstrip": false,
|
| 34 |
+
"single_word": false
|
| 35 |
+
},
|
| 36 |
+
"unk_token": {
|
| 37 |
+
"content": "<unk>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false
|
| 42 |
+
}
|
| 43 |
+
}
|
gemma3-1B-ddm/checkpoint-360000/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
|
| 3 |
+
size 33384751
|
gemma3-1B-ddm/checkpoint-360000/tokenizer.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
| 3 |
+
size 4689074
|
gemma3-1B-ddm/checkpoint-360000/tokenizer_config.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-360000/trainer_state.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
gemma3-1B-ddm/checkpoint-360000/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ed1453815a1e170e9687bada2064f5bb86de85ac97665fc738f4373685cb5f2
|
| 3 |
+
size 5560
|
gemma3-1B-ddm/checkpoint-480000/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"<image_soft_token>": 262144,
|
| 3 |
+
"[MASK]": 262145
|
| 4 |
+
}
|
gemma3-1B-ddm/checkpoint-480000/config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"DiscreteDiffusionModel"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": false,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"attn_logit_softcapping": null,
|
| 8 |
+
"bos_token_id": 2,
|
| 9 |
+
"cache_implementation": "hybrid",
|
| 10 |
+
"eos_token_id": 1,
|
| 11 |
+
"final_logit_softcapping": null,
|
| 12 |
+
"head_dim": 256,
|
| 13 |
+
"hidden_activation": "gelu_pytorch_tanh",
|
| 14 |
+
"hidden_size": 1152,
|
| 15 |
+
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 6912,
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"model_type": "gemma3_text",
|
| 19 |
+
"num_attention_heads": 4,
|
| 20 |
+
"num_hidden_layers": 26,
|
| 21 |
+
"num_key_value_heads": 1,
|
| 22 |
+
"pad_token_id": 0,
|
| 23 |
+
"query_pre_attn_scalar": 256,
|
| 24 |
+
"rms_norm_eps": 1e-06,
|
| 25 |
+
"rope_local_base_freq": 10000,
|
| 26 |
+
"rope_scaling": null,
|
| 27 |
+
"rope_theta": 1000000,
|
| 28 |
+
"sliding_window": 512,
|
| 29 |
+
"sliding_window_pattern": 6,
|
| 30 |
+
"torch_dtype": "float32",
|
| 31 |
+
"transformers_version": "4.50.3",
|
| 32 |
+
"use_cache": true,
|
| 33 |
+
"vocab_size": 262144
|
| 34 |
+
}
|