TheKOG commited on
Commit
f04aec5
1 Parent(s): 390cc56

Upload 11 files

Browse files
Files changed (6) hide show
  1. README.md +1 -8
  2. config.json +12 -12
  3. generation_config.json +5 -6
  4. pytorch_model.bin +2 -2
  5. tokenizer.json +0 -0
  6. vocab.json +0 -0
README.md CHANGED
@@ -1,11 +1,4 @@
1
- ---
2
- license: apache-2.0
3
- pipeline_tag: image-to-text
4
- tags:
5
- - image-to-text
6
- - image-captioning
7
- ---
8
- ## Use in Transformers
9
  ```python
10
  from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
11
  import torch
 
1
+ ## Usage method:
 
 
 
 
 
 
 
2
  ```python
3
  from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
4
  import torch
config.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "_commit_hash": null,
3
- "_name_or_path": "./best-pth",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
7
- "bos_token_id": 36,
8
  "decoder": {
9
  "_name_or_path": "",
10
  "activation_function": "gelu_new",
@@ -15,16 +15,16 @@
15
  "attn_pdrop": 0.1,
16
  "bad_words_ids": null,
17
  "begin_suppress_tokens": null,
18
- "bos_token_id": 36,
19
  "chunk_size_feed_forward": 0,
20
  "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": 36,
22
  "diversity_penalty": 0.0,
23
  "do_sample": false,
24
  "early_stopping": false,
25
  "embd_pdrop": 0.1,
26
  "encoder_no_repeat_ngram_size": 0,
27
- "eos_token_id": 36,
28
  "exponential_decay_length_penalty": null,
29
  "finetuning_task": null,
30
  "forced_bos_token_id": null,
@@ -58,7 +58,7 @@
58
  "output_attentions": false,
59
  "output_hidden_states": false,
60
  "output_scores": false,
61
- "pad_token_id": 36,
62
  "prefix": null,
63
  "problem_type": null,
64
  "pruned_heads": {},
@@ -92,13 +92,13 @@
92
  "top_p": 1.0,
93
  "torch_dtype": null,
94
  "torchscript": false,
95
- "transformers_version": "4.28.1",
96
  "typical_p": 1.0,
97
  "use_bfloat16": false,
98
  "use_cache": true,
99
- "vocab_size": 37
100
  },
101
- "decoder_start_token_id": 36,
102
  "encoder": {
103
  "_name_or_path": "",
104
  "add_cross_attention": false,
@@ -175,14 +175,14 @@
175
  "top_p": 1.0,
176
  "torch_dtype": null,
177
  "torchscript": false,
178
- "transformers_version": "4.28.1",
179
  "typical_p": 1.0,
180
  "use_bfloat16": false
181
  },
182
- "eos_token_id": 36,
183
  "is_encoder_decoder": true,
184
  "model_type": "vision-encoder-decoder",
185
- "pad_token_id": 36,
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
  "transformers_version": null
 
1
  {
2
  "_commit_hash": null,
3
+ "_name_or_path": "./image-captioning-output",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
7
+ "bos_token_id": 50256,
8
  "decoder": {
9
  "_name_or_path": "",
10
  "activation_function": "gelu_new",
 
15
  "attn_pdrop": 0.1,
16
  "bad_words_ids": null,
17
  "begin_suppress_tokens": null,
18
+ "bos_token_id": 50256,
19
  "chunk_size_feed_forward": 0,
20
  "cross_attention_hidden_size": null,
21
+ "decoder_start_token_id": 50256,
22
  "diversity_penalty": 0.0,
23
  "do_sample": false,
24
  "early_stopping": false,
25
  "embd_pdrop": 0.1,
26
  "encoder_no_repeat_ngram_size": 0,
27
+ "eos_token_id": 50256,
28
  "exponential_decay_length_penalty": null,
29
  "finetuning_task": null,
30
  "forced_bos_token_id": null,
 
58
  "output_attentions": false,
59
  "output_hidden_states": false,
60
  "output_scores": false,
61
+ "pad_token_id": 50256,
62
  "prefix": null,
63
  "problem_type": null,
64
  "pruned_heads": {},
 
92
  "top_p": 1.0,
93
  "torch_dtype": null,
94
  "torchscript": false,
95
+ "transformers_version": "4.31.0",
96
  "typical_p": 1.0,
97
  "use_bfloat16": false,
98
  "use_cache": true,
99
+ "vocab_size": 50257
100
  },
101
+ "decoder_start_token_id": 50256,
102
  "encoder": {
103
  "_name_or_path": "",
104
  "add_cross_attention": false,
 
175
  "top_p": 1.0,
176
  "torch_dtype": null,
177
  "torchscript": false,
178
+ "transformers_version": "4.31.0",
179
  "typical_p": 1.0,
180
  "use_bfloat16": false
181
  },
182
+ "eos_token_id": 50256,
183
  "is_encoder_decoder": true,
184
  "model_type": "vision-encoder-decoder",
185
+ "pad_token_id": 50256,
186
  "tie_word_embeddings": false,
187
  "torch_dtype": "float32",
188
  "transformers_version": null
generation_config.json CHANGED
@@ -1,8 +1,7 @@
1
  {
2
- "_from_model_config": true,
3
- "bos_token_id": 36,
4
- "decoder_start_token_id": 36,
5
- "eos_token_id": 36,
6
- "pad_token_id": 36,
7
- "transformers_version": "4.28.1"
8
  }
 
1
  {
2
+ "bos_token_id": 50256,
3
+ "decoder_start_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "pad_token_id": 50256,
6
+ "transformers_version": "4.31.0"
 
7
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85507ef26553b9b7b332a4e7772207bad2b0fec1f6e1842fbbd63096510ce5c5
3
- size 827948609
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12fde92ad7047f4dc316ce88e0a922b872d7b3db12bbb61e60f9de17ada6f201
3
+ size 956935293
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
vocab.json CHANGED
The diff for this file is too large to render. See raw diff