jianzongwu commited on
Commit
4271c2c
1 Parent(s): 18c56fa

Upload 42 files

Browse files
Files changed (42) hide show
  1. image_generator/clip_image_encoder/config.json +23 -0
  2. image_generator/clip_image_encoder/model.safetensors +3 -0
  3. image_generator/image_proj_model/pytorch_model.bin +3 -0
  4. image_generator/magi_image_encoder/config.json +84 -0
  5. image_generator/magi_image_encoder/model.safetensors +3 -0
  6. image_generator/model_index.json +34 -0
  7. image_generator/scheduler/scheduler_config.json +18 -0
  8. image_generator/text_encoder/config.json +24 -0
  9. image_generator/text_encoder/model.safetensors +3 -0
  10. image_generator/text_encoder_2/config.json +24 -0
  11. image_generator/text_encoder_2/model.safetensors +3 -0
  12. image_generator/tokenizer/merges.txt +0 -0
  13. image_generator/tokenizer/special_tokens_map.json +24 -0
  14. image_generator/tokenizer/tokenizer_config.json +33 -0
  15. image_generator/tokenizer/vocab.json +0 -0
  16. image_generator/tokenizer_2/merges.txt +0 -0
  17. image_generator/tokenizer_2/special_tokens_map.json +24 -0
  18. image_generator/tokenizer_2/tokenizer_config.json +33 -0
  19. image_generator/tokenizer_2/vocab.json +0 -0
  20. image_generator/unet/config.json +70 -0
  21. image_generator/unet/diffsensei_unet.safetensors +3 -0
  22. image_generator/vae/config.json +32 -0
  23. image_generator/vae/diffusion_pytorch_model.safetensors +3 -0
  24. mllm/agent/pytorch_model.bin +3 -0
  25. mllm/llm/config.json +31 -0
  26. mllm/llm/generation_config.json +9 -0
  27. mllm/llm/model-00001-of-00011.safetensors +3 -0
  28. mllm/llm/model-00002-of-00011.safetensors +3 -0
  29. mllm/llm/model-00003-of-00011.safetensors +3 -0
  30. mllm/llm/model-00004-of-00011.safetensors +3 -0
  31. mllm/llm/model-00005-of-00011.safetensors +3 -0
  32. mllm/llm/model-00006-of-00011.safetensors +3 -0
  33. mllm/llm/model-00007-of-00011.safetensors +3 -0
  34. mllm/llm/model-00008-of-00011.safetensors +3 -0
  35. mllm/llm/model-00009-of-00011.safetensors +3 -0
  36. mllm/llm/model-00010-of-00011.safetensors +3 -0
  37. mllm/llm/model-00011-of-00011.safetensors +3 -0
  38. mllm/llm/model.safetensors.index.json +410 -0
  39. mllm/tokenizer/added_tokens.json +332 -0
  40. mllm/tokenizer/special_tokens_map.json +24 -0
  41. mllm/tokenizer/tokenizer.model +3 -0
  42. mllm/tokenizer/tokenizer_config.json +34 -0
image_generator/clip_image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./image_encoder",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 1280,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 32,
19
+ "patch_size": 14,
20
+ "projection_dim": 1024,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.28.0.dev0"
23
+ }
image_generator/clip_image_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca9667da1ca9e0b0f75e46bb030f7e011f44f86cbfb8d5a36590fcd7507b030
3
+ size 2528373448
image_generator/image_proj_model/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5620c45d343deb1ac090419de50f87c51af77b85890adfa06318ed195949aa3
3
+ size 335934224
image_generator/magi_image_encoder/config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "checkpoints/vit-mae-base",
3
+ "add_cross_attention": false,
4
+ "architectures": [
5
+ "ViTMAEForPreTraining"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.0,
8
+ "bad_words_ids": null,
9
+ "begin_suppress_tokens": null,
10
+ "bos_token_id": null,
11
+ "chunk_size_feed_forward": 0,
12
+ "cross_attention_hidden_size": null,
13
+ "decoder_hidden_size": 512,
14
+ "decoder_intermediate_size": 2048,
15
+ "decoder_num_attention_heads": 16,
16
+ "decoder_num_hidden_layers": 8,
17
+ "decoder_start_token_id": null,
18
+ "diversity_penalty": 0.0,
19
+ "do_sample": false,
20
+ "early_stopping": false,
21
+ "encoder_no_repeat_ngram_size": 0,
22
+ "eos_token_id": null,
23
+ "exponential_decay_length_penalty": null,
24
+ "finetuning_task": null,
25
+ "forced_bos_token_id": null,
26
+ "forced_eos_token_id": null,
27
+ "hidden_act": "gelu",
28
+ "hidden_dropout_prob": 0.0,
29
+ "hidden_size": 768,
30
+ "id2label": {
31
+ "0": "LABEL_0",
32
+ "1": "LABEL_1"
33
+ },
34
+ "image_size": 224,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 3072,
37
+ "is_decoder": false,
38
+ "is_encoder_decoder": false,
39
+ "label2id": {
40
+ "LABEL_0": 0,
41
+ "LABEL_1": 1
42
+ },
43
+ "layer_norm_eps": 1e-12,
44
+ "length_penalty": 1.0,
45
+ "mask_ratio": 0.75,
46
+ "max_length": 20,
47
+ "min_length": 0,
48
+ "model_type": "",
49
+ "no_repeat_ngram_size": 0,
50
+ "norm_pix_loss": false,
51
+ "num_attention_heads": 12,
52
+ "num_beam_groups": 1,
53
+ "num_beams": 1,
54
+ "num_channels": 3,
55
+ "num_hidden_layers": 12,
56
+ "num_return_sequences": 1,
57
+ "output_attentions": false,
58
+ "output_hidden_states": false,
59
+ "output_scores": false,
60
+ "pad_token_id": null,
61
+ "patch_size": 16,
62
+ "prefix": null,
63
+ "problem_type": null,
64
+ "pruned_heads": {},
65
+ "qkv_bias": true,
66
+ "remove_invalid_values": false,
67
+ "repetition_penalty": 1.0,
68
+ "return_dict": true,
69
+ "return_dict_in_generate": false,
70
+ "sep_token_id": null,
71
+ "suppress_tokens": null,
72
+ "task_specific_params": null,
73
+ "temperature": 1.0,
74
+ "tf_legacy_loss": false,
75
+ "tie_encoder_decoder": false,
76
+ "tie_word_embeddings": true,
77
+ "tokenizer_class": null,
78
+ "top_k": 50,
79
+ "top_p": 1.0,
80
+ "torch_dtype": "float32",
81
+ "torchscript": false,
82
+ "typical_p": 1.0,
83
+ "use_bfloat16": false
84
+ }
image_generator/magi_image_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a64913540d219494187ec8ed6716a99bd8b050f33118087f1537d36c22f5ef74
3
+ size 343216848
image_generator/model_index.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionXLPipeline",
3
+ "_diffusers_version": "0.19.0.dev0",
4
+ "force_zeros_for_empty_prompt": true,
5
+ "add_watermarker": null,
6
+ "scheduler": [
7
+ "diffusers",
8
+ "EulerDiscreteScheduler"
9
+ ],
10
+ "text_encoder": [
11
+ "transformers",
12
+ "CLIPTextModel"
13
+ ],
14
+ "text_encoder_2": [
15
+ "transformers",
16
+ "CLIPTextModelWithProjection"
17
+ ],
18
+ "tokenizer": [
19
+ "transformers",
20
+ "CLIPTokenizer"
21
+ ],
22
+ "tokenizer_2": [
23
+ "transformers",
24
+ "CLIPTokenizer"
25
+ ],
26
+ "unet": [
27
+ "diffusers",
28
+ "UNet2DConditionModel"
29
+ ],
30
+ "vae": [
31
+ "diffusers",
32
+ "AutoencoderKL"
33
+ ]
34
+ }
image_generator/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "EulerDiscreteScheduler",
3
+ "_diffusers_version": "0.19.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "interpolation_type": "linear",
9
+ "num_train_timesteps": 1000,
10
+ "prediction_type": "epsilon",
11
+ "sample_max_value": 1.0,
12
+ "set_alpha_to_one": false,
13
+ "skip_prk_steps": true,
14
+ "steps_offset": 1,
15
+ "timestep_spacing": "leading",
16
+ "trained_betas": null,
17
+ "use_karras_sigmas": false
18
+ }
image_generator/text_encoder/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModel"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "quick_gelu",
10
+ "hidden_size": 768,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 768,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
+ }
image_generator/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c3d6454dd2d23414b56aa1b5858a72487a656937847b6fea8d0606d7a42cdbc
3
+ size 492265168
image_generator/text_encoder_2/config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "CLIPTextModelWithProjection"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 0,
7
+ "dropout": 0.0,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "gelu",
10
+ "hidden_size": 1280,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 77,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 20,
18
+ "num_hidden_layers": 32,
19
+ "pad_token_id": 1,
20
+ "projection_dim": 1280,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.32.0.dev0",
23
+ "vocab_size": 49408
24
+ }
image_generator/text_encoder_2/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a6032f63d37ae02bbc74ccd6a27440578cd71701f96532229d0154f55a8d3ff
3
+ size 2778702264
image_generator/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
image_generator/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
image_generator/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 77,
23
+ "pad_token": "<|endoftext|>",
24
+ "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
image_generator/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
image_generator/tokenizer_2/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
image_generator/tokenizer_2/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
image_generator/tokenizer_2/tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "clean_up_tokenization_spaces": true,
12
+ "do_lower_case": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "<|endoftext|>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 77,
23
+ "pad_token": "!",
24
+ "tokenizer_class": "CLIPTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<|endoftext|>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
image_generator/tokenizer_2/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
image_generator/unet/config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.19.0.dev0",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": "text_time",
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": 256,
8
+ "attention_head_dim": [
9
+ 5,
10
+ 10,
11
+ 20
12
+ ],
13
+ "block_out_channels": [
14
+ 320,
15
+ 640,
16
+ 1280
17
+ ],
18
+ "center_input_sample": false,
19
+ "class_embed_type": null,
20
+ "class_embeddings_concat": false,
21
+ "conv_in_kernel": 3,
22
+ "conv_out_kernel": 3,
23
+ "cross_attention_dim": 2048,
24
+ "cross_attention_norm": null,
25
+ "down_block_types": [
26
+ "DownBlock2D",
27
+ "CrossAttnDownBlock2D",
28
+ "CrossAttnDownBlock2D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dual_cross_attention": false,
32
+ "encoder_hid_dim": null,
33
+ "encoder_hid_dim_type": null,
34
+ "flip_sin_to_cos": true,
35
+ "freq_shift": 0,
36
+ "in_channels": 4,
37
+ "layers_per_block": 2,
38
+ "mid_block_only_cross_attention": null,
39
+ "mid_block_scale_factor": 1,
40
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
41
+ "norm_eps": 1e-05,
42
+ "norm_num_groups": 32,
43
+ "num_attention_heads": null,
44
+ "num_class_embeds": null,
45
+ "only_cross_attention": false,
46
+ "out_channels": 4,
47
+ "projection_class_embeddings_input_dim": 2816,
48
+ "resnet_out_scale_factor": 1.0,
49
+ "resnet_skip_time_act": false,
50
+ "resnet_time_scale_shift": "default",
51
+ "sample_size": 128,
52
+ "time_cond_proj_dim": null,
53
+ "time_embedding_act_fn": null,
54
+ "time_embedding_dim": null,
55
+ "time_embedding_type": "positional",
56
+ "timestep_post_act": null,
57
+ "transformer_layers_per_block": [
58
+ 1,
59
+ 2,
60
+ 10
61
+ ],
62
+ "up_block_types": [
63
+ "CrossAttnUpBlock2D",
64
+ "CrossAttnUpBlock2D",
65
+ "UpBlock2D"
66
+ ],
67
+ "upcast_attention": null,
68
+ "use_linear_projection": true
69
+ }
70
+
image_generator/unet/diffsensei_unet.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5b715d2d9405739999039ef474e28ce6991d7bf8f169186114d97adb8dca630
3
+ size 11633249944
image_generator/vae/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.20.0.dev0",
4
+ "_name_or_path": "../sdxl-vae/",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "layers_per_block": 2,
22
+ "norm_num_groups": 32,
23
+ "out_channels": 3,
24
+ "sample_size": 1024,
25
+ "scaling_factor": 0.13025,
26
+ "up_block_types": [
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D"
31
+ ]
32
+ }
image_generator/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1598f3d24932bcfe6634e8b618ea1e30ab1d57f5aad13a6d2de446d2199f2341
3
+ size 334643268
mllm/agent/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b3fff6d4968d9a192a8b4476bf3f65a5fbe930e55c280649a943a33202da50d
3
+ size 574332016
mllm/llm/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "checkpoints/diffsensei/mllm/llm",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "head_dim": 128,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 5120,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 13824,
15
+ "max_position_embeddings": 2048,
16
+ "mlp_bias": false,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 40,
19
+ "num_hidden_layers": 40,
20
+ "num_key_value_heads": 40,
21
+ "pad_token_id": 0,
22
+ "pretraining_tp": 1,
23
+ "rms_norm_eps": 1e-05,
24
+ "rope_scaling": null,
25
+ "rope_theta": 10000.0,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.45.2",
29
+ "use_cache": true,
30
+ "vocab_size": 32330
31
+ }
mllm/llm/generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "temperature": 0.9,
7
+ "top_p": 0.6,
8
+ "transformers_version": "4.45.2"
9
+ }
mllm/llm/model-00001-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d905c280ae7f88642fe72152edf6c89b6371e9323bda028da065c29cd155c0c9
3
+ size 4888007736
mllm/llm/model-00002-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ecd53547d8eb4b6d4e7b66fd9ad8a06413be551b0b287194bf59e8255c36cf
3
+ size 4970419224
mllm/llm/model-00003-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f46b554fc1a8b263678319445c1fb5d3607aa17c92bba1a34583d6784c80bfd
3
+ size 4970419608
mllm/llm/model-00004-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:594ecad28d8c3d4d45d53634caa890487e9e3fec0621e3865c4488580811e693
3
+ size 4970419632
mllm/llm/model-00005-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8644d993edf4b955e3ee8e340c2c0a16fc75c3a3e0c380e0e5ca22ebb4d89f6a
3
+ size 4970419632
mllm/llm/model-00006-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49dd56edadd559d3007129ac7c5f84dfd4c018e899f5f3415610417018f5d5b1
3
+ size 4792120528
mllm/llm/model-00007-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53e3286378ee51b9e70d671341bbe2e2d3f8e0553bc23b8a98ea02ceff45087a
3
+ size 4792161712
mllm/llm/model-00008-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d118a0ddd4d84efff4c11d9e87732e6de84a928cf691e69032c494ded0930e40
3
+ size 4792161712
mllm/llm/model-00009-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce15d5270fd141ca3ac6b825d88fc2674ab41e138324ebe6541a28099a4fd90
3
+ size 4970419264
mllm/llm/model-00010-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a8ed033aa0e45c32cb4ff5d606b39908a593b73d0b2c279fdc19c34f76b7b26
3
+ size 4970419632
mllm/llm/model-00011-of-00011.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98f3a2e11569fbe313f430111f46a08eff47b58c18ffda835ed5ae5276740928
3
+ size 2990062328
mllm/llm/model.safetensors.index.json ADDED
@@ -0,0 +1,410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 52076984320
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00011-of-00011.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00011.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00011.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
16
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00011.safetensors",
17
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
18
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00011.safetensors",
19
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
20
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
21
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
22
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
23
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
24
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
25
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
26
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00011.safetensors",
27
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
28
+ "model.layers.10.input_layernorm.weight": "model-00003-of-00011.safetensors",
29
+ "model.layers.10.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
30
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
31
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
32
+ "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
33
+ "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
34
+ "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
35
+ "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
36
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00003-of-00011.safetensors",
37
+ "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
38
+ "model.layers.11.input_layernorm.weight": "model-00004-of-00011.safetensors",
39
+ "model.layers.11.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
40
+ "model.layers.11.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
41
+ "model.layers.11.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
42
+ "model.layers.11.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
43
+ "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
44
+ "model.layers.11.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
45
+ "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
46
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00004-of-00011.safetensors",
47
+ "model.layers.11.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
48
+ "model.layers.12.input_layernorm.weight": "model-00004-of-00011.safetensors",
49
+ "model.layers.12.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
50
+ "model.layers.12.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
51
+ "model.layers.12.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
52
+ "model.layers.12.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
53
+ "model.layers.12.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
54
+ "model.layers.12.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
55
+ "model.layers.12.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
56
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00004-of-00011.safetensors",
57
+ "model.layers.12.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
58
+ "model.layers.13.input_layernorm.weight": "model-00004-of-00011.safetensors",
59
+ "model.layers.13.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
60
+ "model.layers.13.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
61
+ "model.layers.13.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
62
+ "model.layers.13.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
63
+ "model.layers.13.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
64
+ "model.layers.13.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
65
+ "model.layers.13.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
66
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00004-of-00011.safetensors",
67
+ "model.layers.13.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
68
+ "model.layers.14.input_layernorm.weight": "model-00004-of-00011.safetensors",
69
+ "model.layers.14.mlp.down_proj.weight": "model-00004-of-00011.safetensors",
70
+ "model.layers.14.mlp.gate_proj.weight": "model-00004-of-00011.safetensors",
71
+ "model.layers.14.mlp.up_proj.weight": "model-00004-of-00011.safetensors",
72
+ "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00011.safetensors",
73
+ "model.layers.14.self_attn.k_proj.weight": "model-00004-of-00011.safetensors",
74
+ "model.layers.14.self_attn.o_proj.weight": "model-00004-of-00011.safetensors",
75
+ "model.layers.14.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
76
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00004-of-00011.safetensors",
77
+ "model.layers.14.self_attn.v_proj.weight": "model-00004-of-00011.safetensors",
78
+ "model.layers.15.input_layernorm.weight": "model-00005-of-00011.safetensors",
79
+ "model.layers.15.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
80
+ "model.layers.15.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
81
+ "model.layers.15.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
82
+ "model.layers.15.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
83
+ "model.layers.15.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
84
+ "model.layers.15.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
85
+ "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00011.safetensors",
86
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00005-of-00011.safetensors",
87
+ "model.layers.15.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
88
+ "model.layers.16.input_layernorm.weight": "model-00005-of-00011.safetensors",
89
+ "model.layers.16.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
90
+ "model.layers.16.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
91
+ "model.layers.16.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
92
+ "model.layers.16.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
93
+ "model.layers.16.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
94
+ "model.layers.16.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
95
+ "model.layers.16.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
96
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00005-of-00011.safetensors",
97
+ "model.layers.16.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
98
+ "model.layers.17.input_layernorm.weight": "model-00005-of-00011.safetensors",
99
+ "model.layers.17.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
100
+ "model.layers.17.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
101
+ "model.layers.17.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
102
+ "model.layers.17.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
103
+ "model.layers.17.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
104
+ "model.layers.17.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
105
+ "model.layers.17.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
106
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00005-of-00011.safetensors",
107
+ "model.layers.17.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
108
+ "model.layers.18.input_layernorm.weight": "model-00005-of-00011.safetensors",
109
+ "model.layers.18.mlp.down_proj.weight": "model-00005-of-00011.safetensors",
110
+ "model.layers.18.mlp.gate_proj.weight": "model-00005-of-00011.safetensors",
111
+ "model.layers.18.mlp.up_proj.weight": "model-00005-of-00011.safetensors",
112
+ "model.layers.18.post_attention_layernorm.weight": "model-00005-of-00011.safetensors",
113
+ "model.layers.18.self_attn.k_proj.weight": "model-00005-of-00011.safetensors",
114
+ "model.layers.18.self_attn.o_proj.weight": "model-00005-of-00011.safetensors",
115
+ "model.layers.18.self_attn.q_proj.weight": "model-00005-of-00011.safetensors",
116
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00005-of-00011.safetensors",
117
+ "model.layers.18.self_attn.v_proj.weight": "model-00005-of-00011.safetensors",
118
+ "model.layers.19.input_layernorm.weight": "model-00006-of-00011.safetensors",
119
+ "model.layers.19.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
120
+ "model.layers.19.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
121
+ "model.layers.19.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
122
+ "model.layers.19.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
123
+ "model.layers.19.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
124
+ "model.layers.19.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
125
+ "model.layers.19.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
126
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00006-of-00011.safetensors",
127
+ "model.layers.19.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
128
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00011.safetensors",
129
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00011.safetensors",
130
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00011.safetensors",
131
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00011.safetensors",
132
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00011.safetensors",
133
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
134
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
135
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
136
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00011.safetensors",
137
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
138
+ "model.layers.20.input_layernorm.weight": "model-00006-of-00011.safetensors",
139
+ "model.layers.20.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
140
+ "model.layers.20.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
141
+ "model.layers.20.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
142
+ "model.layers.20.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
143
+ "model.layers.20.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
144
+ "model.layers.20.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
145
+ "model.layers.20.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
146
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00006-of-00011.safetensors",
147
+ "model.layers.20.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
148
+ "model.layers.21.input_layernorm.weight": "model-00006-of-00011.safetensors",
149
+ "model.layers.21.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
150
+ "model.layers.21.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
151
+ "model.layers.21.mlp.up_proj.weight": "model-00006-of-00011.safetensors",
152
+ "model.layers.21.post_attention_layernorm.weight": "model-00006-of-00011.safetensors",
153
+ "model.layers.21.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
154
+ "model.layers.21.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
155
+ "model.layers.21.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
156
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00006-of-00011.safetensors",
157
+ "model.layers.21.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
158
+ "model.layers.22.input_layernorm.weight": "model-00007-of-00011.safetensors",
159
+ "model.layers.22.mlp.down_proj.weight": "model-00006-of-00011.safetensors",
160
+ "model.layers.22.mlp.gate_proj.weight": "model-00006-of-00011.safetensors",
161
+ "model.layers.22.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
162
+ "model.layers.22.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
163
+ "model.layers.22.self_attn.k_proj.weight": "model-00006-of-00011.safetensors",
164
+ "model.layers.22.self_attn.o_proj.weight": "model-00006-of-00011.safetensors",
165
+ "model.layers.22.self_attn.q_proj.weight": "model-00006-of-00011.safetensors",
166
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00006-of-00011.safetensors",
167
+ "model.layers.22.self_attn.v_proj.weight": "model-00006-of-00011.safetensors",
168
+ "model.layers.23.input_layernorm.weight": "model-00007-of-00011.safetensors",
169
+ "model.layers.23.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
170
+ "model.layers.23.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
171
+ "model.layers.23.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
172
+ "model.layers.23.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
173
+ "model.layers.23.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
174
+ "model.layers.23.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
175
+ "model.layers.23.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
176
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00007-of-00011.safetensors",
177
+ "model.layers.23.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
178
+ "model.layers.24.input_layernorm.weight": "model-00007-of-00011.safetensors",
179
+ "model.layers.24.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
180
+ "model.layers.24.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
181
+ "model.layers.24.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
182
+ "model.layers.24.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
183
+ "model.layers.24.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
184
+ "model.layers.24.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
185
+ "model.layers.24.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
186
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00007-of-00011.safetensors",
187
+ "model.layers.24.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
188
+ "model.layers.25.input_layernorm.weight": "model-00007-of-00011.safetensors",
189
+ "model.layers.25.mlp.down_proj.weight": "model-00007-of-00011.safetensors",
190
+ "model.layers.25.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
191
+ "model.layers.25.mlp.up_proj.weight": "model-00007-of-00011.safetensors",
192
+ "model.layers.25.post_attention_layernorm.weight": "model-00007-of-00011.safetensors",
193
+ "model.layers.25.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
194
+ "model.layers.25.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
195
+ "model.layers.25.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
196
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00007-of-00011.safetensors",
197
+ "model.layers.25.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
198
+ "model.layers.26.input_layernorm.weight": "model-00008-of-00011.safetensors",
199
+ "model.layers.26.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
200
+ "model.layers.26.mlp.gate_proj.weight": "model-00007-of-00011.safetensors",
201
+ "model.layers.26.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
202
+ "model.layers.26.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
203
+ "model.layers.26.self_attn.k_proj.weight": "model-00007-of-00011.safetensors",
204
+ "model.layers.26.self_attn.o_proj.weight": "model-00007-of-00011.safetensors",
205
+ "model.layers.26.self_attn.q_proj.weight": "model-00007-of-00011.safetensors",
206
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00007-of-00011.safetensors",
207
+ "model.layers.26.self_attn.v_proj.weight": "model-00007-of-00011.safetensors",
208
+ "model.layers.27.input_layernorm.weight": "model-00008-of-00011.safetensors",
209
+ "model.layers.27.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
210
+ "model.layers.27.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
211
+ "model.layers.27.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
212
+ "model.layers.27.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
213
+ "model.layers.27.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
214
+ "model.layers.27.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
215
+ "model.layers.27.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
216
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00008-of-00011.safetensors",
217
+ "model.layers.27.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
218
+ "model.layers.28.input_layernorm.weight": "model-00008-of-00011.safetensors",
219
+ "model.layers.28.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
220
+ "model.layers.28.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
221
+ "model.layers.28.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
222
+ "model.layers.28.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
223
+ "model.layers.28.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
224
+ "model.layers.28.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
225
+ "model.layers.28.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
226
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00008-of-00011.safetensors",
227
+ "model.layers.28.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
228
+ "model.layers.29.input_layernorm.weight": "model-00008-of-00011.safetensors",
229
+ "model.layers.29.mlp.down_proj.weight": "model-00008-of-00011.safetensors",
230
+ "model.layers.29.mlp.gate_proj.weight": "model-00008-of-00011.safetensors",
231
+ "model.layers.29.mlp.up_proj.weight": "model-00008-of-00011.safetensors",
232
+ "model.layers.29.post_attention_layernorm.weight": "model-00008-of-00011.safetensors",
233
+ "model.layers.29.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
234
+ "model.layers.29.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
235
+ "model.layers.29.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
236
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00008-of-00011.safetensors",
237
+ "model.layers.29.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
238
+ "model.layers.3.input_layernorm.weight": "model-00002-of-00011.safetensors",
239
+ "model.layers.3.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
240
+ "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
241
+ "model.layers.3.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
242
+ "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
243
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00011.safetensors",
244
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00011.safetensors",
245
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00011.safetensors",
246
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00011.safetensors",
247
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00011.safetensors",
248
+ "model.layers.30.input_layernorm.weight": "model-00009-of-00011.safetensors",
249
+ "model.layers.30.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
250
+ "model.layers.30.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
251
+ "model.layers.30.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
252
+ "model.layers.30.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
253
+ "model.layers.30.self_attn.k_proj.weight": "model-00008-of-00011.safetensors",
254
+ "model.layers.30.self_attn.o_proj.weight": "model-00008-of-00011.safetensors",
255
+ "model.layers.30.self_attn.q_proj.weight": "model-00008-of-00011.safetensors",
256
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00008-of-00011.safetensors",
257
+ "model.layers.30.self_attn.v_proj.weight": "model-00008-of-00011.safetensors",
258
+ "model.layers.31.input_layernorm.weight": "model-00009-of-00011.safetensors",
259
+ "model.layers.31.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
260
+ "model.layers.31.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
261
+ "model.layers.31.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
262
+ "model.layers.31.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
263
+ "model.layers.31.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
264
+ "model.layers.31.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
265
+ "model.layers.31.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
266
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00009-of-00011.safetensors",
267
+ "model.layers.31.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
268
+ "model.layers.32.input_layernorm.weight": "model-00009-of-00011.safetensors",
269
+ "model.layers.32.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
270
+ "model.layers.32.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
271
+ "model.layers.32.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
272
+ "model.layers.32.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
273
+ "model.layers.32.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
274
+ "model.layers.32.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
275
+ "model.layers.32.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
276
+ "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00009-of-00011.safetensors",
277
+ "model.layers.32.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
278
+ "model.layers.33.input_layernorm.weight": "model-00009-of-00011.safetensors",
279
+ "model.layers.33.mlp.down_proj.weight": "model-00009-of-00011.safetensors",
280
+ "model.layers.33.mlp.gate_proj.weight": "model-00009-of-00011.safetensors",
281
+ "model.layers.33.mlp.up_proj.weight": "model-00009-of-00011.safetensors",
282
+ "model.layers.33.post_attention_layernorm.weight": "model-00009-of-00011.safetensors",
283
+ "model.layers.33.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
284
+ "model.layers.33.self_attn.o_proj.weight": "model-00009-of-00011.safetensors",
285
+ "model.layers.33.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
286
+ "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00009-of-00011.safetensors",
287
+ "model.layers.33.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
288
+ "model.layers.34.input_layernorm.weight": "model-00010-of-00011.safetensors",
289
+ "model.layers.34.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
290
+ "model.layers.34.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
291
+ "model.layers.34.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
292
+ "model.layers.34.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
293
+ "model.layers.34.self_attn.k_proj.weight": "model-00009-of-00011.safetensors",
294
+ "model.layers.34.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
295
+ "model.layers.34.self_attn.q_proj.weight": "model-00009-of-00011.safetensors",
296
+ "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00010-of-00011.safetensors",
297
+ "model.layers.34.self_attn.v_proj.weight": "model-00009-of-00011.safetensors",
298
+ "model.layers.35.input_layernorm.weight": "model-00010-of-00011.safetensors",
299
+ "model.layers.35.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
300
+ "model.layers.35.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
301
+ "model.layers.35.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
302
+ "model.layers.35.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
303
+ "model.layers.35.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
304
+ "model.layers.35.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
305
+ "model.layers.35.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
306
+ "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00010-of-00011.safetensors",
307
+ "model.layers.35.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
308
+ "model.layers.36.input_layernorm.weight": "model-00010-of-00011.safetensors",
309
+ "model.layers.36.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
310
+ "model.layers.36.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
311
+ "model.layers.36.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
312
+ "model.layers.36.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
313
+ "model.layers.36.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
314
+ "model.layers.36.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
315
+ "model.layers.36.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
316
+ "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00010-of-00011.safetensors",
317
+ "model.layers.36.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
318
+ "model.layers.37.input_layernorm.weight": "model-00010-of-00011.safetensors",
319
+ "model.layers.37.mlp.down_proj.weight": "model-00010-of-00011.safetensors",
320
+ "model.layers.37.mlp.gate_proj.weight": "model-00010-of-00011.safetensors",
321
+ "model.layers.37.mlp.up_proj.weight": "model-00010-of-00011.safetensors",
322
+ "model.layers.37.post_attention_layernorm.weight": "model-00010-of-00011.safetensors",
323
+ "model.layers.37.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
324
+ "model.layers.37.self_attn.o_proj.weight": "model-00010-of-00011.safetensors",
325
+ "model.layers.37.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
326
+ "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00010-of-00011.safetensors",
327
+ "model.layers.37.self_attn.v_proj.weight": "model-00010-of-00011.safetensors",
328
+ "model.layers.38.input_layernorm.weight": "model-00011-of-00011.safetensors",
329
+ "model.layers.38.mlp.down_proj.weight": "model-00011-of-00011.safetensors",
330
+ "model.layers.38.mlp.gate_proj.weight": "model-00011-of-00011.safetensors",
331
+ "model.layers.38.mlp.up_proj.weight": "model-00011-of-00011.safetensors",
332
+ "model.layers.38.post_attention_layernorm.weight": "model-00011-of-00011.safetensors",
333
+ "model.layers.38.self_attn.k_proj.weight": "model-00010-of-00011.safetensors",
334
+ "model.layers.38.self_attn.o_proj.weight": "model-00011-of-00011.safetensors",
335
+ "model.layers.38.self_attn.q_proj.weight": "model-00010-of-00011.safetensors",
336
+ "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00011-of-00011.safetensors",
337
+ "model.layers.38.self_attn.v_proj.weight": "model-00011-of-00011.safetensors",
338
+ "model.layers.39.input_layernorm.weight": "model-00011-of-00011.safetensors",
339
+ "model.layers.39.mlp.down_proj.weight": "model-00011-of-00011.safetensors",
340
+ "model.layers.39.mlp.gate_proj.weight": "model-00011-of-00011.safetensors",
341
+ "model.layers.39.mlp.up_proj.weight": "model-00011-of-00011.safetensors",
342
+ "model.layers.39.post_attention_layernorm.weight": "model-00011-of-00011.safetensors",
343
+ "model.layers.39.self_attn.k_proj.weight": "model-00011-of-00011.safetensors",
344
+ "model.layers.39.self_attn.o_proj.weight": "model-00011-of-00011.safetensors",
345
+ "model.layers.39.self_attn.q_proj.weight": "model-00011-of-00011.safetensors",
346
+ "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00011-of-00011.safetensors",
347
+ "model.layers.39.self_attn.v_proj.weight": "model-00011-of-00011.safetensors",
348
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00011.safetensors",
349
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
350
+ "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
351
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
352
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
353
+ "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
354
+ "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
355
+ "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
356
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00002-of-00011.safetensors",
357
+ "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
358
+ "model.layers.5.input_layernorm.weight": "model-00002-of-00011.safetensors",
359
+ "model.layers.5.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
360
+ "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
361
+ "model.layers.5.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
362
+ "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
363
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
364
+ "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
365
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
366
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00002-of-00011.safetensors",
367
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
368
+ "model.layers.6.input_layernorm.weight": "model-00002-of-00011.safetensors",
369
+ "model.layers.6.mlp.down_proj.weight": "model-00002-of-00011.safetensors",
370
+ "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00011.safetensors",
371
+ "model.layers.6.mlp.up_proj.weight": "model-00002-of-00011.safetensors",
372
+ "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00011.safetensors",
373
+ "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
374
+ "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00011.safetensors",
375
+ "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
376
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00002-of-00011.safetensors",
377
+ "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
378
+ "model.layers.7.input_layernorm.weight": "model-00003-of-00011.safetensors",
379
+ "model.layers.7.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
380
+ "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
381
+ "model.layers.7.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
382
+ "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
383
+ "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00011.safetensors",
384
+ "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
385
+ "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00011.safetensors",
386
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00003-of-00011.safetensors",
387
+ "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00011.safetensors",
388
+ "model.layers.8.input_layernorm.weight": "model-00003-of-00011.safetensors",
389
+ "model.layers.8.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
390
+ "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
391
+ "model.layers.8.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
392
+ "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
393
+ "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
394
+ "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
395
+ "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
396
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00003-of-00011.safetensors",
397
+ "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
398
+ "model.layers.9.input_layernorm.weight": "model-00003-of-00011.safetensors",
399
+ "model.layers.9.mlp.down_proj.weight": "model-00003-of-00011.safetensors",
400
+ "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00011.safetensors",
401
+ "model.layers.9.mlp.up_proj.weight": "model-00003-of-00011.safetensors",
402
+ "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00011.safetensors",
403
+ "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00011.safetensors",
404
+ "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00011.safetensors",
405
+ "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00011.safetensors",
406
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00003-of-00011.safetensors",
407
+ "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00011.safetensors",
408
+ "model.norm.weight": "model-00011-of-00011.safetensors"
409
+ }
410
+ }
mllm/tokenizer/added_tokens.json ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</img>": 32101,
3
+ "</patch>": 32103,
4
+ "<box_end>": 32329,
5
+ "<box_start>": 32328,
6
+ "<img>": 32100,
7
+ "<img_00000>": 32000,
8
+ "<img_00001>": 32001,
9
+ "<img_00002>": 32002,
10
+ "<img_00003>": 32003,
11
+ "<img_00004>": 32004,
12
+ "<img_00005>": 32005,
13
+ "<img_00006>": 32006,
14
+ "<img_00007>": 32007,
15
+ "<img_00008>": 32008,
16
+ "<img_00009>": 32009,
17
+ "<img_00010>": 32010,
18
+ "<img_00011>": 32011,
19
+ "<img_00012>": 32012,
20
+ "<img_00013>": 32013,
21
+ "<img_00014>": 32014,
22
+ "<img_00015>": 32015,
23
+ "<img_00016>": 32016,
24
+ "<img_00017>": 32017,
25
+ "<img_00018>": 32018,
26
+ "<img_00019>": 32019,
27
+ "<img_00020>": 32020,
28
+ "<img_00021>": 32021,
29
+ "<img_00022>": 32022,
30
+ "<img_00023>": 32023,
31
+ "<img_00024>": 32024,
32
+ "<img_00025>": 32025,
33
+ "<img_00026>": 32026,
34
+ "<img_00027>": 32027,
35
+ "<img_00028>": 32028,
36
+ "<img_00029>": 32029,
37
+ "<img_00030>": 32030,
38
+ "<img_00031>": 32031,
39
+ "<img_00032>": 32032,
40
+ "<img_00033>": 32033,
41
+ "<img_00034>": 32034,
42
+ "<img_00035>": 32035,
43
+ "<img_00036>": 32036,
44
+ "<img_00037>": 32037,
45
+ "<img_00038>": 32038,
46
+ "<img_00039>": 32039,
47
+ "<img_00040>": 32040,
48
+ "<img_00041>": 32041,
49
+ "<img_00042>": 32042,
50
+ "<img_00043>": 32043,
51
+ "<img_00044>": 32044,
52
+ "<img_00045>": 32045,
53
+ "<img_00046>": 32046,
54
+ "<img_00047>": 32047,
55
+ "<img_00048>": 32048,
56
+ "<img_00049>": 32049,
57
+ "<img_00050>": 32050,
58
+ "<img_00051>": 32051,
59
+ "<img_00052>": 32052,
60
+ "<img_00053>": 32053,
61
+ "<img_00054>": 32054,
62
+ "<img_00055>": 32055,
63
+ "<img_00056>": 32056,
64
+ "<img_00057>": 32057,
65
+ "<img_00058>": 32058,
66
+ "<img_00059>": 32059,
67
+ "<img_00060>": 32060,
68
+ "<img_00061>": 32061,
69
+ "<img_00062>": 32062,
70
+ "<img_00063>": 32063,
71
+ "<img_00064>": 32064,
72
+ "<img_00065>": 32065,
73
+ "<img_00066>": 32066,
74
+ "<img_00067>": 32067,
75
+ "<img_00068>": 32068,
76
+ "<img_00069>": 32069,
77
+ "<img_00070>": 32070,
78
+ "<img_00071>": 32071,
79
+ "<img_00072>": 32072,
80
+ "<img_00073>": 32073,
81
+ "<img_00074>": 32074,
82
+ "<img_00075>": 32075,
83
+ "<img_00076>": 32076,
84
+ "<img_00077>": 32077,
85
+ "<img_00078>": 32078,
86
+ "<img_00079>": 32079,
87
+ "<img_00080>": 32080,
88
+ "<img_00081>": 32081,
89
+ "<img_00082>": 32082,
90
+ "<img_00083>": 32083,
91
+ "<img_00084>": 32084,
92
+ "<img_00085>": 32085,
93
+ "<img_00086>": 32086,
94
+ "<img_00087>": 32087,
95
+ "<img_00088>": 32088,
96
+ "<img_00089>": 32089,
97
+ "<img_00090>": 32090,
98
+ "<img_00091>": 32091,
99
+ "<img_00092>": 32092,
100
+ "<img_00093>": 32093,
101
+ "<img_00094>": 32094,
102
+ "<img_00095>": 32095,
103
+ "<img_00096>": 32096,
104
+ "<img_00097>": 32097,
105
+ "<img_00098>": 32098,
106
+ "<img_00099>": 32099,
107
+ "<loc-0>": 32104,
108
+ "<loc-100>": 32204,
109
+ "<loc-101>": 32205,
110
+ "<loc-102>": 32206,
111
+ "<loc-103>": 32207,
112
+ "<loc-104>": 32208,
113
+ "<loc-105>": 32209,
114
+ "<loc-106>": 32210,
115
+ "<loc-107>": 32211,
116
+ "<loc-108>": 32212,
117
+ "<loc-109>": 32213,
118
+ "<loc-10>": 32114,
119
+ "<loc-110>": 32214,
120
+ "<loc-111>": 32215,
121
+ "<loc-112>": 32216,
122
+ "<loc-113>": 32217,
123
+ "<loc-114>": 32218,
124
+ "<loc-115>": 32219,
125
+ "<loc-116>": 32220,
126
+ "<loc-117>": 32221,
127
+ "<loc-118>": 32222,
128
+ "<loc-119>": 32223,
129
+ "<loc-11>": 32115,
130
+ "<loc-120>": 32224,
131
+ "<loc-121>": 32225,
132
+ "<loc-122>": 32226,
133
+ "<loc-123>": 32227,
134
+ "<loc-124>": 32228,
135
+ "<loc-125>": 32229,
136
+ "<loc-126>": 32230,
137
+ "<loc-127>": 32231,
138
+ "<loc-128>": 32232,
139
+ "<loc-129>": 32233,
140
+ "<loc-12>": 32116,
141
+ "<loc-130>": 32234,
142
+ "<loc-131>": 32235,
143
+ "<loc-132>": 32236,
144
+ "<loc-133>": 32237,
145
+ "<loc-134>": 32238,
146
+ "<loc-135>": 32239,
147
+ "<loc-136>": 32240,
148
+ "<loc-137>": 32241,
149
+ "<loc-138>": 32242,
150
+ "<loc-139>": 32243,
151
+ "<loc-13>": 32117,
152
+ "<loc-140>": 32244,
153
+ "<loc-141>": 32245,
154
+ "<loc-142>": 32246,
155
+ "<loc-143>": 32247,
156
+ "<loc-144>": 32248,
157
+ "<loc-145>": 32249,
158
+ "<loc-146>": 32250,
159
+ "<loc-147>": 32251,
160
+ "<loc-148>": 32252,
161
+ "<loc-149>": 32253,
162
+ "<loc-14>": 32118,
163
+ "<loc-150>": 32254,
164
+ "<loc-151>": 32255,
165
+ "<loc-152>": 32256,
166
+ "<loc-153>": 32257,
167
+ "<loc-154>": 32258,
168
+ "<loc-155>": 32259,
169
+ "<loc-156>": 32260,
170
+ "<loc-157>": 32261,
171
+ "<loc-158>": 32262,
172
+ "<loc-159>": 32263,
173
+ "<loc-15>": 32119,
174
+ "<loc-160>": 32264,
175
+ "<loc-161>": 32265,
176
+ "<loc-162>": 32266,
177
+ "<loc-163>": 32267,
178
+ "<loc-164>": 32268,
179
+ "<loc-165>": 32269,
180
+ "<loc-166>": 32270,
181
+ "<loc-167>": 32271,
182
+ "<loc-168>": 32272,
183
+ "<loc-169>": 32273,
184
+ "<loc-16>": 32120,
185
+ "<loc-170>": 32274,
186
+ "<loc-171>": 32275,
187
+ "<loc-172>": 32276,
188
+ "<loc-173>": 32277,
189
+ "<loc-174>": 32278,
190
+ "<loc-175>": 32279,
191
+ "<loc-176>": 32280,
192
+ "<loc-177>": 32281,
193
+ "<loc-178>": 32282,
194
+ "<loc-179>": 32283,
195
+ "<loc-17>": 32121,
196
+ "<loc-180>": 32284,
197
+ "<loc-181>": 32285,
198
+ "<loc-182>": 32286,
199
+ "<loc-183>": 32287,
200
+ "<loc-184>": 32288,
201
+ "<loc-185>": 32289,
202
+ "<loc-186>": 32290,
203
+ "<loc-187>": 32291,
204
+ "<loc-188>": 32292,
205
+ "<loc-189>": 32293,
206
+ "<loc-18>": 32122,
207
+ "<loc-190>": 32294,
208
+ "<loc-191>": 32295,
209
+ "<loc-192>": 32296,
210
+ "<loc-193>": 32297,
211
+ "<loc-194>": 32298,
212
+ "<loc-195>": 32299,
213
+ "<loc-196>": 32300,
214
+ "<loc-197>": 32301,
215
+ "<loc-198>": 32302,
216
+ "<loc-199>": 32303,
217
+ "<loc-19>": 32123,
218
+ "<loc-1>": 32105,
219
+ "<loc-200>": 32304,
220
+ "<loc-201>": 32305,
221
+ "<loc-202>": 32306,
222
+ "<loc-203>": 32307,
223
+ "<loc-204>": 32308,
224
+ "<loc-205>": 32309,
225
+ "<loc-206>": 32310,
226
+ "<loc-207>": 32311,
227
+ "<loc-208>": 32312,
228
+ "<loc-209>": 32313,
229
+ "<loc-20>": 32124,
230
+ "<loc-210>": 32314,
231
+ "<loc-211>": 32315,
232
+ "<loc-212>": 32316,
233
+ "<loc-213>": 32317,
234
+ "<loc-214>": 32318,
235
+ "<loc-215>": 32319,
236
+ "<loc-216>": 32320,
237
+ "<loc-217>": 32321,
238
+ "<loc-218>": 32322,
239
+ "<loc-219>": 32323,
240
+ "<loc-21>": 32125,
241
+ "<loc-220>": 32324,
242
+ "<loc-221>": 32325,
243
+ "<loc-222>": 32326,
244
+ "<loc-223>": 32327,
245
+ "<loc-22>": 32126,
246
+ "<loc-23>": 32127,
247
+ "<loc-24>": 32128,
248
+ "<loc-25>": 32129,
249
+ "<loc-26>": 32130,
250
+ "<loc-27>": 32131,
251
+ "<loc-28>": 32132,
252
+ "<loc-29>": 32133,
253
+ "<loc-2>": 32106,
254
+ "<loc-30>": 32134,
255
+ "<loc-31>": 32135,
256
+ "<loc-32>": 32136,
257
+ "<loc-33>": 32137,
258
+ "<loc-34>": 32138,
259
+ "<loc-35>": 32139,
260
+ "<loc-36>": 32140,
261
+ "<loc-37>": 32141,
262
+ "<loc-38>": 32142,
263
+ "<loc-39>": 32143,
264
+ "<loc-3>": 32107,
265
+ "<loc-40>": 32144,
266
+ "<loc-41>": 32145,
267
+ "<loc-42>": 32146,
268
+ "<loc-43>": 32147,
269
+ "<loc-44>": 32148,
270
+ "<loc-45>": 32149,
271
+ "<loc-46>": 32150,
272
+ "<loc-47>": 32151,
273
+ "<loc-48>": 32152,
274
+ "<loc-49>": 32153,
275
+ "<loc-4>": 32108,
276
+ "<loc-50>": 32154,
277
+ "<loc-51>": 32155,
278
+ "<loc-52>": 32156,
279
+ "<loc-53>": 32157,
280
+ "<loc-54>": 32158,
281
+ "<loc-55>": 32159,
282
+ "<loc-56>": 32160,
283
+ "<loc-57>": 32161,
284
+ "<loc-58>": 32162,
285
+ "<loc-59>": 32163,
286
+ "<loc-5>": 32109,
287
+ "<loc-60>": 32164,
288
+ "<loc-61>": 32165,
289
+ "<loc-62>": 32166,
290
+ "<loc-63>": 32167,
291
+ "<loc-64>": 32168,
292
+ "<loc-65>": 32169,
293
+ "<loc-66>": 32170,
294
+ "<loc-67>": 32171,
295
+ "<loc-68>": 32172,
296
+ "<loc-69>": 32173,
297
+ "<loc-6>": 32110,
298
+ "<loc-70>": 32174,
299
+ "<loc-71>": 32175,
300
+ "<loc-72>": 32176,
301
+ "<loc-73>": 32177,
302
+ "<loc-74>": 32178,
303
+ "<loc-75>": 32179,
304
+ "<loc-76>": 32180,
305
+ "<loc-77>": 32181,
306
+ "<loc-78>": 32182,
307
+ "<loc-79>": 32183,
308
+ "<loc-7>": 32111,
309
+ "<loc-80>": 32184,
310
+ "<loc-81>": 32185,
311
+ "<loc-82>": 32186,
312
+ "<loc-83>": 32187,
313
+ "<loc-84>": 32188,
314
+ "<loc-85>": 32189,
315
+ "<loc-86>": 32190,
316
+ "<loc-87>": 32191,
317
+ "<loc-88>": 32192,
318
+ "<loc-89>": 32193,
319
+ "<loc-8>": 32112,
320
+ "<loc-90>": 32194,
321
+ "<loc-91>": 32195,
322
+ "<loc-92>": 32196,
323
+ "<loc-93>": 32197,
324
+ "<loc-94>": 32198,
325
+ "<loc-95>": 32199,
326
+ "<loc-96>": 32200,
327
+ "<loc-97>": 32201,
328
+ "<loc-98>": 32202,
329
+ "<loc-99>": 32203,
330
+ "<loc-9>": 32113,
331
+ "<patch>": 32102
332
+ }
mllm/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
mllm/tokenizer/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
mllm/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": false,
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": null,
24
+ "sp_model_kwargs": {},
25
+ "tokenizer_class": "LlamaTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }