stas commited on
Commit
878fa94
1 Parent(s): ac8e205
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ {
2
+ "<fake_token_around_image>": 50265,
3
+ "<image>": 50266
4
+ }
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_remove_final_layer_norm": false,
3
+ "activation_function": "relu",
4
+ "additional_vocab_size": 2,
5
+ "alpha_initializer": "ones",
6
+ "alpha_type": "vector",
7
+ "alphas_initializer_range": 0.0,
8
+ "architectures": [
9
+ "VOPTForCausalLM"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 2,
13
+ "cross_layer_interval": 1,
14
+ "do_layer_norm_before": true,
15
+ "dropout": 0.1,
16
+ "eos_token_id": 2,
17
+ "ffn_dim": 64,
18
+ "freeze_lm_head": false,
19
+ "freeze_text_layers": true,
20
+ "freeze_text_module_exceptions": [],
21
+ "freeze_vision_layers": true,
22
+ "freeze_vision_module_exceptions": [],
23
+ "hidden_size": 16,
24
+ "image_token_index": 50257,
25
+ "init_std": 0.02,
26
+ "layerdrop": 0.0,
27
+ "max_new_tokens": 100,
28
+ "max_position_embeddings": 128,
29
+ "model_type": "opt",
30
+ "num_attention_heads": 4,
31
+ "num_hidden_layers": 2,
32
+ "pad_token_id": 1,
33
+ "resampler_depth": 2,
34
+ "resampler_head_dim": 8,
35
+ "resampler_n_heads": 2,
36
+ "resampler_n_latents": 16,
37
+ "tie_word_embeddings": false,
38
+ "torch_dtype": "float16",
39
+ "transformers_version": "4.26.0.dev0",
40
+ "use_cache": true,
41
+ "use_resampler": true,
42
+ "vision_embed_dim": 32,
43
+ "vision_image_size": 30,
44
+ "vision_model_name": "hf-internal-testing/tiny-random-clip",
45
+ "vision_model_params": "{}",
46
+ "vocab_size": 50265,
47
+ "word_embed_proj_dim": 16
48
+ }
make_tiny_model.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # This script creates a super tiny model that is useful inside tests, when we just want to test that
4
+ # the machinery works, without needing to check the quality of the outcomes.
5
+ #
6
+ # usage: adjust the configs if wanted, but otherwise just run the script
7
+
8
+ from pathlib import Path
9
+ from types import SimpleNamespace
10
+
11
+ import torchvision.transforms as transforms
12
+ from PIL import Image
13
+
14
+ from m4.models.vopt.modeling_vopt import VOPTConfig, VOPTForCausalLM
15
+ from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
16
+ from m4.training.utils import get_tokenizer
17
+
18
+
19
+ mname_tiny = "tiny-random-vopt-clip"
20
+
21
+ path = Path(mname_tiny)
22
+ path.mkdir(parents=True, exist_ok=True)
23
+
24
+ # from the hardcoded https://github.com/huggingface/m4/blob/adf102f0000cb2632cd8a3ebb87398c65e448a97/m4/training/main.py#L80
25
+ additional_vocab_size = 2
26
+
27
+ config = VOPTConfig()
28
+ config.update(
29
+ dict(
30
+ ffn_dim=64,
31
+ hidden_size=16,
32
+ max_position_embeddings=128,
33
+ num_attention_heads=4,
34
+ num_hidden_layers=2,
35
+ word_embed_proj_dim=16,
36
+ max_new_tokens=100,
37
+ use_resampler=True,
38
+ resampler_depth=2,
39
+ resampler_head_dim=8,
40
+ resampler_n_heads=2,
41
+ resampler_n_latents=16,
42
+ vision_embed_dim=32,
43
+ vision_image_size=30,
44
+ vision_model_name="hf-internal-testing/tiny-random-clip",
45
+ vision_model_params="{}",
46
+ vocab_size=50265,
47
+ additional_vocab_size=additional_vocab_size,
48
+ )
49
+ )
50
+
51
+ # print(config)
52
+ # can now modify config to say tiny values
53
+
54
+ model = VOPTForCausalLM.from_config(config)
55
+ # print(model.config)
56
+ # print(model)
57
+
58
+ tokenizer_config = dict(
59
+ tokenizer_add_special_tokens="{}",
60
+ tokenizer_add_tokens=(
61
+ '[AddedToken("<fake_token_around_image>", rstrip=False, lstrip=False), AddedToken("<image>", rstrip=False,'
62
+ " lstrip=False)]"
63
+ ),
64
+ tokenizer_name="facebook/opt-13b",
65
+ tokenizer_params='{"use_fast":True}',
66
+ )
67
+ tokenizer_config = SimpleNamespace(**tokenizer_config)
68
+ # print(tokenizer_config)
69
+
70
+ tokenizer = get_tokenizer(
71
+ tokenizer_name=tokenizer_config.tokenizer_name,
72
+ tokenizer_add_tokens=tokenizer_config.tokenizer_add_tokens,
73
+ tokenizer_add_special_tokens=tokenizer_config.tokenizer_add_special_tokens,
74
+ tokenizer_params=tokenizer_config.tokenizer_params,
75
+ additional_vocab_size=model.config.additional_vocab_size,
76
+ model_vocab_size=model.config.vocab_size,
77
+ )
78
+ assert "<image>" in tokenizer.get_vocab()
79
+
80
+ # Test w/ one image and one text
81
+ query = "<fake_token_around_image><image><fake_token_around_image>This is a picture of a cat."
82
+ query_tokens = tokenizer(query, return_tensors="pt")
83
+
84
+ num_images_per_ex = 1
85
+ pixel_values = transforms.ToTensor()(Image.new("RGB", (30, 30))).repeat(1, 1, 1, 1).unsqueeze(0)
86
+ image_attention_mask, _ = image_attention_mask_for_packed_input_ids(query_tokens["input_ids"], tokenizer)
87
+ image_attention_mask = incremental_to_binary_attention_mask(image_attention_mask, num_classes=num_images_per_ex)
88
+
89
+ input = {
90
+ "input_ids": query_tokens["input_ids"],
91
+ "attention_mask": query_tokens["attention_mask"],
92
+ "pixel_values": pixel_values,
93
+ "pixel_values": pixel_values,
94
+ "image_attention_mask": image_attention_mask,
95
+ }
96
+ # debug shapes
97
+ # print(query_tokens["input_ids"].shape)
98
+ # print(query_tokens["attention_mask"].shape)
99
+ # print(pixel_values.shape)
100
+ # print(image_attention_mask.shape)
101
+
102
+ out_gen = model.generate(**input)
103
+ text = tokenizer.batch_decode(out_gen)
104
+ # print(text)
105
+
106
+ # Save model + config + tokenizer
107
+ model.half() # makes it smaller
108
+ model.save_pretrained(path)
109
+ tokenizer.save_pretrained(path)
110
+
111
+ # test we can load it back
112
+ model = VOPTForCausalLM.from_pretrained(path)
113
+
114
+ print(f"Generated {mname_tiny} - Upload the generated folder to the hub")
merges.txt ADDED
The diff for this file is too large to render. See raw diff
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b026c0c74c4616842ce379de3a50d64c83a6f4a261d9d333e4f115764951a48e
3
+ size 3435021
special_tokens_map.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<fake_token_around_image>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<image>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "</s>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": {
26
+ "content": "</s>",
27
+ "lstrip": false,
28
+ "normalized": true,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "pad_token": {
33
+ "content": "<pad>",
34
+ "lstrip": false,
35
+ "normalized": true,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
+ "unk_token": {
40
+ "content": "</s>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ }
46
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "</s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1000000000000000019884624838656,
22
+ "name_or_path": "facebook/opt-13b",
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<pad>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "special_tokens_map_file": null,
32
+ "tokenizer_class": "GPT2Tokenizer",
33
+ "unk_token": {
34
+ "__type": "AddedToken",
35
+ "content": "</s>",
36
+ "lstrip": false,
37
+ "normalized": true,
38
+ "rstrip": false,
39
+ "single_word": false
40
+ }
41
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff