yifanxu commited on
Commit
c6b22dd
1 Parent(s): 8475861

model version 1.0

Browse files
README.md CHANGED
@@ -1,3 +1,15 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Libra-Chat
2
+ This model was further finetuned with instructions based on Libra-Base for multi-modal chat.
3
+
4
+ ### !!! NOTE !!!
5
+ In addition to the pretrained weights in this repo, please download the pretrained CLIP model in huggingface and merge it into the path, as:
6
+
7
+ ```
8
+ libra-chat/
9
+ ├── ...
10
+ └── openai-clip-vit-large-patch14-336/
11
+ └── ...
12
+ ```
13
+
14
+ The CLIP model can be downloaded [here](https://huggingface.co/openai/clip-vit-large-patch14-336).
15
+
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/yfxu/libra/CHECKPOINTS/Libra/instruction_1",
3
+ "addition_mode": false,
4
+ "architectures": [
5
+ "LibraForCausalLM"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "bos_token_id": 1,
9
+ "bridge_rank": 8,
10
+ "concat_signals": true,
11
+ "contiguous_signal_size": 4096,
12
+ "embd_pdrop": 0.0,
13
+ "eos_token_id": 2,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 4096,
16
+ "image_feature_resolution": 24,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 11008,
19
+ "max_position_embeddings": 2048,
20
+ "max_vision_token_length": 578,
21
+ "model_type": "libra",
22
+ "newline_token_id": 13,
23
+ "norm_signals": true,
24
+ "num_attention_heads": 32,
25
+ "num_hidden_layers": 32,
26
+ "num_key_value_heads": 32,
27
+ "pad_token_id": 0,
28
+ "resid_pdrop": 0.0,
29
+ "rms_norm_eps": 1e-06,
30
+ "rope_theta": 10000.0,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.38.2",
34
+ "unified_head": false,
35
+ "use_2d_rope": false,
36
+ "use_bridge": true,
37
+ "use_cache": true,
38
+ "use_vision_position_embedding": false,
39
+ "vision_codebook_num": 2,
40
+ "vision_down_ratio": 4,
41
+ "vision_embd_pdrop": 0.0,
42
+ "vision_prediction_mode": "1d",
43
+ "vision_resid_pdrop": 0.0,
44
+ "vision_vocab_size": 514,
45
+ "vocab_size": 32000
46
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.38.2"
7
+ }
model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:345b6c3ec19864928e52d5ef8559d5a32dcbb13be73dc29fe12c5b245e183b3a
3
+ size 4983174656
model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:490254e3e79aea5fa5e1d45a16e251646511c30bec1a0713a937bcfd90f0f86c
3
+ size 4945806000
model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb185c67000894f10651a2713562651a5dd2247e425c1c7837509969d408f40
3
+ size 4981750512
model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:746949af79a9fd564aa847cd64fa72db8eed9f7346429159620151a8da2339de
3
+ size 4972348888
model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a313056d8006ba9668e01a1220886e6c5cd79af6e1672694d3b908461f16cd5a
3
+ size 2148920376
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "unk_token": {
17
+ "content": "<unk>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "model_max_length": 2048,
22
+ "pad_token": null,
23
+ "sp_model_kwargs": {},
24
+ "tokenizer_class": "LlamaTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": true,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
vision_tokenizer_config.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ freeze: True
2
+ max_vision_token_length: 578 # 24*24 (resolution) + 2 (<img> and <\img>); corresponding to model_config.max_vision_token_length, dataset_config.image_size
3
+ params:
4
+ embed_dim: 1024 # debug
5
+ ckpt_path: vqgan.ckpt
6
+ codebook_size: 512
7
+ num_codebook: 2
8
+ ddconfig:
9
+ # only_auto_encoder: True
10
+ encoder_name: openai-clip-vit-large-patch14-336
11
+ select_layer: [2,10,18,22]
12
+ double_z: False
13
+ z_channels: 1024
14
+ resolution: 336 # 336
15
+ in_channels: 3
16
+ out_ch: 3
17
+ ch: 128
18
+ ch_mult: [ 1,1,2,4,8] # num_down = len(ch_mult)-1
19
+ num_res_blocks: 2
20
+ attn_resolutions: [24]
21
+ dropout: 0.0
22
+ initial_resolution: 24
23
+ num_attn_head: 8
vqgan.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d01a38fadd81dec3557120ec6e8d36d51758ac1a8a8afe58102f404d03e47a08
3
+ size 3247360961