sumet commited on
Commit
ce8d8e0
1 Parent(s): c37132c

Upload 9 files

Browse files
config.json CHANGED
@@ -1,16 +1,17 @@
1
  {
2
- "_name_or_path": "microsoft/trocr-base-stage1",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
6
  "decoder": {
7
  "_name_or_path": "",
8
  "activation_dropout": 0.0,
9
- "activation_function": "relu",
10
  "add_cross_attention": true,
11
  "architectures": null,
12
  "attention_dropout": 0.0,
13
  "bad_words_ids": null,
 
14
  "bos_token_id": 0,
15
  "chunk_size_feed_forward": 0,
16
  "classifier_dropout": 0.0,
@@ -27,6 +28,7 @@
27
  "early_stopping": false,
28
  "encoder_no_repeat_ngram_size": 0,
29
  "eos_token_id": 2,
 
30
  "finetuning_task": null,
31
  "forced_bos_token_id": null,
32
  "forced_eos_token_id": null,
@@ -41,10 +43,10 @@
41
  "LABEL_0": 0,
42
  "LABEL_1": 1
43
  },
44
- "layernorm_embedding": false,
45
  "length_penalty": 1.0,
46
  "max_length": 20,
47
- "max_position_embeddings": 1024,
48
  "min_length": 0,
49
  "model_type": "trocr",
50
  "no_repeat_ngram_size": 0,
@@ -62,21 +64,23 @@
62
  "repetition_penalty": 1.0,
63
  "return_dict": true,
64
  "return_dict_in_generate": false,
65
- "scale_embedding": true,
66
  "sep_token_id": null,
 
67
  "task_specific_params": null,
68
  "temperature": 1.0,
 
69
  "tie_encoder_decoder": false,
70
- "tie_word_embeddings": false,
71
  "tokenizer_class": null,
72
  "top_k": 50,
73
  "top_p": 1.0,
74
  "torch_dtype": null,
75
  "torchscript": false,
76
- "transformers_version": "4.12.5",
77
  "use_bfloat16": false,
78
  "use_cache": false,
79
- "use_learned_position_embeddings": false,
80
  "vocab_size": 50265
81
  },
82
  "decoder_start_token_id": 0,
@@ -87,6 +91,7 @@
87
  "architectures": null,
88
  "attention_probs_dropout_prob": 0.0,
89
  "bad_words_ids": null,
 
90
  "bos_token_id": null,
91
  "chunk_size_feed_forward": 0,
92
  "cross_attention_hidden_size": null,
@@ -95,7 +100,9 @@
95
  "do_sample": false,
96
  "early_stopping": false,
97
  "encoder_no_repeat_ngram_size": 0,
 
98
  "eos_token_id": null,
 
99
  "finetuning_task": null,
100
  "forced_bos_token_id": null,
101
  "forced_eos_token_id": null,
@@ -141,8 +148,10 @@
141
  "return_dict": true,
142
  "return_dict_in_generate": false,
143
  "sep_token_id": null,
 
144
  "task_specific_params": null,
145
  "temperature": 1.0,
 
146
  "tie_encoder_decoder": false,
147
  "tie_word_embeddings": true,
148
  "tokenizer_class": null,
@@ -150,19 +159,20 @@
150
  "top_p": 1.0,
151
  "torch_dtype": null,
152
  "torchscript": false,
153
- "transformers_version": "4.12.5",
154
  "use_bfloat16": false
155
  },
156
  "eos_token_id": 2,
157
  "is_encoder_decoder": true,
158
  "length_penalty": 2.0,
159
- "max_length": 10,
160
  "model_type": "vision-encoder-decoder",
161
  "no_repeat_ngram_size": 3,
162
  "num_beams": 4,
163
  "pad_token_id": 1,
 
164
  "tie_word_embeddings": false,
165
  "torch_dtype": "float32",
166
- "transformers_version": null,
167
  "vocab_size": 50265
168
  }
 
1
  {
2
+ "_name_or_path": "microsoft/trocr-base-handwritten",
3
  "architectures": [
4
  "VisionEncoderDecoderModel"
5
  ],
6
  "decoder": {
7
  "_name_or_path": "",
8
  "activation_dropout": 0.0,
9
+ "activation_function": "gelu",
10
  "add_cross_attention": true,
11
  "architectures": null,
12
  "attention_dropout": 0.0,
13
  "bad_words_ids": null,
14
+ "begin_suppress_tokens": null,
15
  "bos_token_id": 0,
16
  "chunk_size_feed_forward": 0,
17
  "classifier_dropout": 0.0,
 
28
  "early_stopping": false,
29
  "encoder_no_repeat_ngram_size": 0,
30
  "eos_token_id": 2,
31
+ "exponential_decay_length_penalty": null,
32
  "finetuning_task": null,
33
  "forced_bos_token_id": null,
34
  "forced_eos_token_id": null,
 
43
  "LABEL_0": 0,
44
  "LABEL_1": 1
45
  },
46
+ "layernorm_embedding": true,
47
  "length_penalty": 1.0,
48
  "max_length": 20,
49
+ "max_position_embeddings": 512,
50
  "min_length": 0,
51
  "model_type": "trocr",
52
  "no_repeat_ngram_size": 0,
 
64
  "repetition_penalty": 1.0,
65
  "return_dict": true,
66
  "return_dict_in_generate": false,
67
+ "scale_embedding": false,
68
  "sep_token_id": null,
69
+ "suppress_tokens": null,
70
  "task_specific_params": null,
71
  "temperature": 1.0,
72
+ "tf_legacy_loss": false,
73
  "tie_encoder_decoder": false,
74
+ "tie_word_embeddings": true,
75
  "tokenizer_class": null,
76
  "top_k": 50,
77
  "top_p": 1.0,
78
  "torch_dtype": null,
79
  "torchscript": false,
80
+ "typical_p": 1.0,
81
  "use_bfloat16": false,
82
  "use_cache": false,
83
+ "use_learned_position_embeddings": true,
84
  "vocab_size": 50265
85
  },
86
  "decoder_start_token_id": 0,
 
91
  "architectures": null,
92
  "attention_probs_dropout_prob": 0.0,
93
  "bad_words_ids": null,
94
+ "begin_suppress_tokens": null,
95
  "bos_token_id": null,
96
  "chunk_size_feed_forward": 0,
97
  "cross_attention_hidden_size": null,
 
100
  "do_sample": false,
101
  "early_stopping": false,
102
  "encoder_no_repeat_ngram_size": 0,
103
+ "encoder_stride": 16,
104
  "eos_token_id": null,
105
+ "exponential_decay_length_penalty": null,
106
  "finetuning_task": null,
107
  "forced_bos_token_id": null,
108
  "forced_eos_token_id": null,
 
148
  "return_dict": true,
149
  "return_dict_in_generate": false,
150
  "sep_token_id": null,
151
+ "suppress_tokens": null,
152
  "task_specific_params": null,
153
  "temperature": 1.0,
154
+ "tf_legacy_loss": false,
155
  "tie_encoder_decoder": false,
156
  "tie_word_embeddings": true,
157
  "tokenizer_class": null,
 
159
  "top_p": 1.0,
160
  "torch_dtype": null,
161
  "torchscript": false,
162
+ "typical_p": 1.0,
163
  "use_bfloat16": false
164
  },
165
  "eos_token_id": 2,
166
  "is_encoder_decoder": true,
167
  "length_penalty": 2.0,
168
+ "max_length": 64,
169
  "model_type": "vision-encoder-decoder",
170
  "no_repeat_ngram_size": 3,
171
  "num_beams": 4,
172
  "pad_token_id": 1,
173
+ "processor_class": "TrOCRProcessor",
174
  "tie_word_embeddings": false,
175
  "torch_dtype": "float32",
176
+ "transformers_version": "4.32.1",
177
  "vocab_size": 50265
178
  }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 0,
3
+ "decoder_start_token_id": 0,
4
+ "early_stopping": true,
5
+ "eos_token_id": 2,
6
+ "length_penalty": 2.0,
7
+ "max_length": 64,
8
+ "no_repeat_ngram_size": 3,
9
+ "num_beams": 4,
10
+ "pad_token_id": 1,
11
+ "transformers_version": "4.32.1",
12
+ "use_cache": false
13
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7616590322292667a9236be8811f1fd731264455df71ce3cee71752f71c90bd0
3
+ size 2667049975
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 384,
20
+ "width": 384
21
+ }
22
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18ec62351d5433a370ad266920c37e02540712b11d0f694abcf1c91a9ddb86b4
3
- size 1539649667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d71cfba588a3059e92e4e4045978fa552e7d94773f78ce87cfbf6c6b0d604415
3
+ size 1335854221
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6bda2751f95a66ee4a22b3ad21795225c1e7a41e2a49ee94036634d40fc6328
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41832dedf57a243e88a5dd568de84f293bb695cd3e7de2787177db6ba2f2c0d7
3
+ size 627
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c993f98f6063ff0cb800e08f158acc0ebb2d3c2af87fa0034b7129d0443e01b
3
+ size 4219