roygan commited on
Commit
9af477e
·
verified ·
1 Parent(s): fb67008

update config (#3)

Browse files

- Track tokenizer.json with LFS (71413fe40951bad177f1d00c7b8b09decf87116c)
- Add tokenizer.json via LFS (5a65b912323b849b7c150099db8f881cdde50cba)
- update config (a8cb0eadabec912971979c0eb3ae25667adf84de)

.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  assets/logo.png filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  assets/logo.png filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
+ }
config.json ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "vision_start_token_id": 151652,
9
+ "vision_end_token_id": 151653,
10
+ "vision_token_id": 151654,
11
+ "image_token_id": 151655,
12
+ "video_token_id": 151656,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2048,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 11008,
17
+ "max_position_embeddings": 128000,
18
+ "max_window_layers": 70,
19
+ "model_type": "qwen2_5_vl",
20
+ "num_attention_heads": 16,
21
+ "num_hidden_layers": 36,
22
+ "num_key_value_heads": 2,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_theta": 1000000.0,
25
+ "sliding_window": 32768,
26
+ "tie_word_embeddings": true,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.41.2",
29
+ "_attn_implementation": "flash_attention_2",
30
+ "use_cache": true,
31
+ "use_sliding_window": false,
32
+ "vision_config": {
33
+ "depth": 32,
34
+ "hidden_act": "silu",
35
+ "hidden_size": 1280,
36
+ "intermediate_size": 3420,
37
+ "num_heads": 16,
38
+ "in_chans": 3,
39
+ "out_hidden_size": 2048,
40
+ "patch_size": 14,
41
+ "spatial_merge_size": 2,
42
+ "spatial_patch_size": 14,
43
+ "window_size": 112,
44
+ "fullatt_block_indexes": [
45
+ 7,
46
+ 15,
47
+ 23,
48
+ 31
49
+ ],
50
+ "tokens_per_second": 2,
51
+ "temporal_patch_size": 2
52
+ },
53
+ "rope_scaling": {
54
+ "type": "mrope",
55
+ "mrope_section": [
56
+ 16,
57
+ 24,
58
+ 24
59
+ ]
60
+ },
61
+ "vocab_size": 151936,
62
+ "num_experts": 2,
63
+ "experts":[
64
+ {
65
+ "hidden_size": 2048,
66
+ "intermediate_size": 11008,
67
+ "hidden_act": "silu"
68
+ },
69
+ {
70
+ "hidden_size": 2048,
71
+ "intermediate_size": 2048,
72
+ "hidden_act": "silu"
73
+ }
74
+ ],
75
+ "dof_config": {
76
+ "follow_left_ee_cartesian_pos": 3,
77
+ "follow_left_ee_rotation": 3,
78
+ "follow_left_gripper": 1,
79
+ "follow_right_ee_cartesian_pos": 3,
80
+ "follow_right_ee_rotation": 3,
81
+ "follow_right_gripper": 1,
82
+ "head_actions": 2,
83
+ "height": 1,
84
+ "car_pose": 3
85
+ },
86
+ "agent_pos_config": {
87
+ "follow_left_ee_cartesian_pos": 3,
88
+ "follow_left_ee_rotation": 3,
89
+ "follow_left_gripper": 1,
90
+ "follow_right_ee_cartesian_pos": 3,
91
+ "follow_right_ee_rotation": 3,
92
+ "follow_right_gripper": 1,
93
+ "head_actions": 2,
94
+ "height": 1,
95
+ "car_pose": 3
96
+ },
97
+ "noise_scheduler": {
98
+ "beta_alpha": 1.5,
99
+ "beta_beta": 1.0,
100
+ "s": 0.999,
101
+ "num_inference_timesteps": 5
102
+ },
103
+ "dim_inputs": [2048,2048],
104
+ "attention_moe": false,
105
+ "mlp_moe": true
106
+ }
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework": "pytorch", "task": "vision-understanding", "allow_remote": true}
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "pad_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": [
6
+ 151645,
7
+ 151643
8
+ ],
9
+ "repetition_penalty": 1.05,
10
+ "temperature": 0.000001,
11
+ "transformers_version": "4.49.0"
12
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "min_pixels": 3136,
3
+ "max_pixels": 12845056,
4
+ "patch_size": 14,
5
+ "temporal_patch_size": 2,
6
+ "merge_size": 2,
7
+ "image_mean": [
8
+ 0.48145466,
9
+ 0.4578275,
10
+ 0.40821073
11
+ ],
12
+ "image_std": [
13
+ 0.26862954,
14
+ 0.26130258,
15
+ 0.27577711
16
+ ],
17
+ "image_processor_type": "Qwen2VLImageProcessor",
18
+ "processor_class": "Qwen2_5_VLProcessor"
19
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a5df236d417e062783cda976a6c21955fe386a1dd8fb9aa06f29694a6d3a4de
3
+ size 11826664
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff