youuor7r commited on Nov 28, 2025

Commit

17e4d32

0 Parent(s):

Clean initial commit with LFS

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +37 -0
README.md +3 -0
gemma3-1B-ddm/trainer_log.jsonl +0 -0
gemma3-1B-pt/checkpoint-120000/added_tokens.json +4 -0
gemma3-1B-pt/checkpoint-120000/config.json +34 -0
gemma3-1B-pt/checkpoint-120000/generation_config.json +13 -0
gemma3-1B-pt/checkpoint-120000/optimizer.pt +3 -0
gemma3-1B-pt/checkpoint-120000/pytorch_model.bin +3 -0
gemma3-1B-pt/checkpoint-120000/rng_state_0.pth +3 -0
gemma3-1B-pt/checkpoint-120000/rng_state_1.pth +3 -0
gemma3-1B-pt/checkpoint-120000/rng_state_2.pth +3 -0
gemma3-1B-pt/checkpoint-120000/rng_state_3.pth +3 -0
gemma3-1B-pt/checkpoint-120000/scheduler.pt +3 -0
gemma3-1B-pt/checkpoint-120000/special_tokens_map.json +43 -0
gemma3-1B-pt/checkpoint-120000/tokenizer.json +3 -0
gemma3-1B-pt/checkpoint-120000/tokenizer.model +3 -0
gemma3-1B-pt/checkpoint-120000/tokenizer_config.json +0 -0
gemma3-1B-pt/checkpoint-120000/trainer_state.json +0 -0
gemma3-1B-pt/checkpoint-120000/training_args.bin +3 -0
gemma3-1B-pt/trainer_log.jsonl +0 -0
gemma3-270m-ddm/added_tokens.json +4 -0
gemma3-270m-ddm/all_results.json +13 -0
gemma3-270m-ddm/checkpoint-120000/added_tokens.json +4 -0
gemma3-270m-ddm/checkpoint-120000/config.json +56 -0
gemma3-270m-ddm/checkpoint-120000/optimizer.pt +3 -0
gemma3-270m-ddm/checkpoint-120000/pytorch_model.bin +3 -0
gemma3-270m-ddm/checkpoint-120000/rng_state.pth +3 -0
gemma3-270m-ddm/checkpoint-120000/scheduler.pt +3 -0
gemma3-270m-ddm/checkpoint-120000/special_tokens_map.json +43 -0
gemma3-270m-ddm/checkpoint-120000/tokenizer.json +3 -0
gemma3-270m-ddm/checkpoint-120000/tokenizer.model +3 -0
gemma3-270m-ddm/checkpoint-120000/tokenizer_config.json +0 -0
gemma3-270m-ddm/checkpoint-120000/trainer_state.json +0 -0
gemma3-270m-ddm/checkpoint-120000/training_args.bin +3 -0
gemma3-270m-ddm/checkpoint-240000/added_tokens.json +4 -0
gemma3-270m-ddm/checkpoint-240000/config.json +56 -0
gemma3-270m-ddm/checkpoint-240000/optimizer.pt +3 -0
gemma3-270m-ddm/checkpoint-240000/pytorch_model.bin +3 -0
gemma3-270m-ddm/checkpoint-240000/rng_state.pth +3 -0
gemma3-270m-ddm/checkpoint-240000/scheduler.pt +3 -0
gemma3-270m-ddm/checkpoint-240000/special_tokens_map.json +43 -0
gemma3-270m-ddm/checkpoint-240000/tokenizer.json +3 -0
gemma3-270m-ddm/checkpoint-240000/tokenizer.model +3 -0
gemma3-270m-ddm/checkpoint-240000/tokenizer_config.json +0 -0
gemma3-270m-ddm/checkpoint-240000/trainer_state.json +0 -0
gemma3-270m-ddm/checkpoint-240000/training_args.bin +3 -0
gemma3-270m-ddm/checkpoint-360000/added_tokens.json +4 -0
gemma3-270m-ddm/checkpoint-360000/config.json +56 -0
gemma3-270m-ddm/checkpoint-360000/optimizer.pt +3 -0
gemma3-270m-ddm/checkpoint-360000/pytorch_model.bin +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+gemma3-1B-pt/checkpoint-120000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+*tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+---
+license: apache-2.0
+---

gemma3-1B-ddm/trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-1B-pt/checkpoint-120000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<image_soft_token>": 262144,
+  "[MASK]": 262145
+}

gemma3-1B-pt/checkpoint-120000/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "architectures": [
+    "Gemma3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 1152,
+  "initializer_range": 0.02,
+  "intermediate_size": 6912,
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 26,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": 512,
+  "sliding_window_pattern": 6,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "use_cache": false,
+  "vocab_size": 262146
+}

gemma3-1B-pt/checkpoint-120000/generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "do_sample": true,
+  "eos_token_id": [
+    1,
+    106
+  ],
+  "pad_token_id": 0,
+  "top_k": 64,
+  "top_p": 0.95,
+  "transformers_version": "4.50.3"
+}

gemma3-1B-pt/checkpoint-120000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0b8a92151db63a81a698d230e1b20dec67caa1a26870a7a8bd2fd5f402f62327
+size 7999400602

gemma3-1B-pt/checkpoint-120000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:324b917d15865905000bc576786eeda6e6180ee9c697c17db5933983253cae70
+size 3999668058

gemma3-1B-pt/checkpoint-120000/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5fa4985ea13e683f6cfcd107c46260dfd8a9a5411fcb6ecf90747bafbae7b6
+size 15024

gemma3-1B-pt/checkpoint-120000/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a391ee6404bb07343ff7b9ec9578ba2e77224ea20f62687c8cd109e35c607754
+size 15024

gemma3-1B-pt/checkpoint-120000/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de3b87ab1f04ccb08812d6d65103ffcb7504a5c5d8dbcead8ccbedbbd3f42242
+size 15024

gemma3-1B-pt/checkpoint-120000/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5e9c65e9398d2f42b1d82790dfb178f12b9e7d25fea491c48aa2cb96a324ce9
+size 15024

gemma3-1B-pt/checkpoint-120000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
+size 1064

gemma3-1B-pt/checkpoint-120000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "mask_token": "[MASK]",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gemma3-1B-pt/checkpoint-120000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
+size 33384751

gemma3-1B-pt/checkpoint-120000/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

gemma3-1B-pt/checkpoint-120000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-1B-pt/checkpoint-120000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-1B-pt/checkpoint-120000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:319ce1c7ab697eca8dcb5848c93a6b3f91f03bb2ecf75c55ac3763290641c176
+size 5560

gemma3-1B-pt/trainer_log.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-270m-ddm/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<image_soft_token>": 262144,
+  "[MASK]": 262145
+}

gemma3-270m-ddm/all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 1.0,
+    "eval_loss": 10.049214363098145,
+    "eval_runtime": 7.0093,
+    "eval_samples_per_second": 1.427,
+    "eval_steps_per_second": 0.428,
+    "perplexity": 23137.601949056367,
+    "total_flos": 5.9174692061184e+17,
+    "train_loss": 9.62966806418101,
+    "train_runtime": 83147.3938,
+    "train_samples_per_second": 23.092,
+    "train_steps_per_second": 5.773
+}

gemma3-270m-ddm/checkpoint-120000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<image_soft_token>": 262144,
+  "[MASK]": 262145
+}

gemma3-270m-ddm/checkpoint-120000/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "DiscreteDiffusionModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "sliding_window_pattern": 6,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262144
+}

gemma3-270m-ddm/checkpoint-120000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e18ab8cf1dd72162887ceddc753d433bda169e7d846e39d368a81563c3fe937
+size 2144993594

gemma3-270m-ddm/checkpoint-120000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4ca1ce5e21b0de29aede5a6ded446371f672b692f489540fc9600f870a45317
+size 1072479482

gemma3-270m-ddm/checkpoint-120000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d56b981fbeeb6b3ce4c934fb4192fb848ef9132e287b0c4bb648efe597ad4418
+size 14244

gemma3-270m-ddm/checkpoint-120000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2173489f9f315292c7c9c49aae56528ab60b0229c1e1e1fba1e26464b44e85cf
+size 1064

gemma3-270m-ddm/checkpoint-120000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "mask_token": "[MASK]",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gemma3-270m-ddm/checkpoint-120000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
+size 33384751

gemma3-270m-ddm/checkpoint-120000/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

gemma3-270m-ddm/checkpoint-120000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-270m-ddm/checkpoint-120000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-270m-ddm/checkpoint-120000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32fd4894f278427918119990b13eb00d0dc868801bb0657770dc401403f32b22
+size 5560

gemma3-270m-ddm/checkpoint-240000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<image_soft_token>": 262144,
+  "[MASK]": 262145
+}

gemma3-270m-ddm/checkpoint-240000/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "DiscreteDiffusionModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "sliding_window_pattern": 6,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262144
+}

gemma3-270m-ddm/checkpoint-240000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:571d6bca660c39a1203f10b8cf646e5998bec4503e222eb488bf0c255489337f
+size 2144993594

gemma3-270m-ddm/checkpoint-240000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:848669456fd3423f3d625c78bea22dde5526d6d8c4141376b983af90e7249914
+size 1072479482

gemma3-270m-ddm/checkpoint-240000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f91ad6dea72095bead20cef99aae007cbafa1ca2ea31bd6504f91f84388d5b6
+size 14244

gemma3-270m-ddm/checkpoint-240000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:125e71a4d2f4439d4912051c36abe8fcb76a0edb9d5fefe8edbadf2cca30613e
+size 1064

gemma3-270m-ddm/checkpoint-240000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "boi_token": "<start_of_image>",
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eoi_token": "<end_of_image>",
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "image_token": "<image_soft_token>",
+  "mask_token": "[MASK]",
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

gemma3-270m-ddm/checkpoint-240000/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88ec6df915623f4b307188dbb6fe60ddb8a1ef273c864ba38de97a320dd17dea
+size 33384751

gemma3-270m-ddm/checkpoint-240000/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
+size 4689074

gemma3-270m-ddm/checkpoint-240000/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-270m-ddm/checkpoint-240000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

gemma3-270m-ddm/checkpoint-240000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32fd4894f278427918119990b13eb00d0dc868801bb0657770dc401403f32b22
+size 5560

gemma3-270m-ddm/checkpoint-360000/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "<image_soft_token>": 262144,
+  "[MASK]": 262145
+}

gemma3-270m-ddm/checkpoint-360000/config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "_sliding_window_pattern": 6,
+  "architectures": [
+    "DiscreteDiffusionModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "attn_logit_softcapping": null,
+  "bos_token_id": 2,
+  "cache_implementation": "hybrid",
+  "eos_token_id": 1,
+  "final_logit_softcapping": null,
+  "head_dim": 256,
+  "hidden_activation": "gelu_pytorch_tanh",
+  "hidden_size": 640,
+  "initializer_range": 0.02,
+  "intermediate_size": 2048,
+  "layer_types": [
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "sliding_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "model_type": "gemma3_text",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 18,
+  "num_key_value_heads": 1,
+  "pad_token_id": 0,
+  "query_pre_attn_scalar": 256,
+  "rms_norm_eps": 1e-06,
+  "rope_local_base_freq": 10000.0,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 512,
+  "sliding_window_pattern": 6,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "use_bidirectional_attention": false,
+  "use_cache": true,
+  "vocab_size": 262144
+}

gemma3-270m-ddm/checkpoint-360000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01087dc495739e657664892673626e968813189351bf655d830e018f7efda58a
+size 2144993594

gemma3-270m-ddm/checkpoint-360000/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5790e80e0075c33f208358b7cf3d9b34b342a9b14c6cbdd3c22255a0625cc32b
+size 1072479482