aravind-selvam commited on
Commit
05eb3cf
1 Parent(s): c5d5234

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
README.md CHANGED
@@ -1,58 +1,63 @@
1
  ---
2
  license: mit
 
 
 
 
 
3
  ---
4
- ### Usage
5
- Inference Code for this model
6
-
7
- ```
8
- import re
9
- import transformers
10
- from transformers import DonutProcessor, VisionEncoderDecoderModel
11
- import torch
12
-
13
- fine_tuned_model = VisionEncoderDecoderModel.from_pretrained("aravind-selvam/donut_finetuned_chart")
14
- processor = DonutProcessor.from_pretrained("aravind-selvam/donut_finetuned_chart")
15
-
16
- # Move model to GPU
17
- device = "cuda" if torch.cuda.is_available() else "cpu"
18
- fine_tuned_model.to(device)
19
-
20
- # Load random document image from the test set
21
- dataset = load_dataset("hf-internal-testing/example-documents", split="test")
22
- sample_image = dataset[1]
23
-
24
- def run_prediction(sample, model=fine_tuned_model, processor=processor):
25
- # pixel values
26
- pixel_values = processor(image, return_tensors="pt").pixel_values
27
- # prepare inputs
28
- task_prompt = "<s>"
29
- decoder_input_ids = processor.tokenizer(task_prompt, add_special_tokens=False, return_tensors="pt").input_ids
30
-
31
- # run inference
32
- outputs = model.generate(
33
- pixel_values.to(device),
34
- decoder_input_ids=decoder_input_ids.to(device),
35
- max_length=model.decoder.config.max_position_embeddings,
36
- early_stopping=True,
37
- pad_token_id=processor.tokenizer.pad_token_id,
38
- eos_token_id=processor.tokenizer.eos_token_id,
39
- use_cache=True,
40
- num_beams=2,
41
- # bad_words_ids=[[processor.tokenizer.unk_token_id]],
42
- return_dict_in_generate=True,
43
- )
44
-
45
- # process output
46
- prediction = processor.batch_decode(outputs.sequences)[0]
47
- prediction = re.sub(r"<one>", "1", prediction)
48
- prediction = processor.token2json(prediction)
49
-
50
-
51
- # load reference target
52
- target = processor.token2json(test_sample["target_sequence"])
53
- return prediction, target
54
-
55
- prediction, target = run_prediction(sample_image)
56
- print(f"Reference:\n {target}")
57
- print(f"Prediction:\n {prediction}")
58
- ```
 
1
  ---
2
  license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: donut_finetuned_chart
7
+ results: []
8
  ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # donut_finetuned_chart
14
+
15
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on an unknown dataset.
16
+ It achieves the following results on the evaluation set:
17
+ - Loss: 0.4957
18
+ - Cer: 0.2318
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2.3e-05
38
+ - train_batch_size: 8
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - gradient_accumulation_steps: 3
42
+ - total_train_batch_size: 24
43
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
+ - lr_scheduler_type: linear
45
+ - num_epochs: 4
46
+ - mixed_precision_training: Native AMP
47
+
48
+ ### Training results
49
+
50
+ | Training Loss | Epoch | Step | Validation Loss | Cer |
51
+ |:-------------:|:-----:|:----:|:---------------:|:------:|
52
+ | 3.4943 | 1.0 | 166 | 0.6634 | 0.2341 |
53
+ | 0.475 | 2.0 | 333 | 0.5370 | 0.2320 |
54
+ | 0.3009 | 3.0 | 500 | 0.5051 | 0.2318 |
55
+ | 0.2611 | 3.98 | 664 | 0.4957 | 0.2318 |
56
+
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 4.28.1
61
+ - Pytorch 2.0.0+cu118
62
+ - Datasets 2.11.0
63
+ - Tokenizers 0.13.3
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "_commit_hash": null,
3
- "_name_or_path": "/content/aravind-selvam/donut_finetuned_chart_crop/checkpoint-800",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
@@ -50,7 +50,7 @@
50
  "LABEL_1": 1
51
  },
52
  "length_penalty": 1.0,
53
- "max_length": 179,
54
  "max_position_embeddings": 1536,
55
  "min_length": 0,
56
  "model_type": "mbart",
@@ -126,8 +126,8 @@
126
  "1": "LABEL_1"
127
  },
128
  "image_size": [
129
- 800,
130
- 800
131
  ],
132
  "initializer_range": 0.02,
133
  "is_decoder": false,
 
1
  {
2
+ "_commit_hash": "a959cf33c20e09215873e338299c900f57047c61",
3
+ "_name_or_path": "naver-clova-ix/donut-base",
4
  "architectures": [
5
  "VisionEncoderDecoderModel"
6
  ],
 
50
  "LABEL_1": 1
51
  },
52
  "length_penalty": 1.0,
53
+ "max_length": 198,
54
  "max_position_embeddings": 1536,
55
  "min_length": 0,
56
  "model_type": "mbart",
 
126
  "1": "LABEL_1"
127
  },
128
  "image_size": [
129
+ 512,
130
+ 512
131
  ],
132
  "initializer_range": 0.02,
133
  "is_decoder": false,
generation_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
- "max_length": 179,
7
  "pad_token_id": 1,
8
  "transformers_version": "4.28.1"
9
  }
 
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 2,
5
  "forced_eos_token_id": 2,
6
+ "max_length": 198,
7
  "pad_token_id": 1,
8
  "transformers_version": "4.28.1"
9
  }
preprocessor_config.json CHANGED
@@ -19,8 +19,8 @@
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
- "size": {
23
- "height": 800,
24
- "width": 800
25
- }
26
  }
 
19
  "processor_class": "DonutProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 512,
24
+ 512
25
+ ]
26
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea66388754c10575c25825bf7e7d904b13aaa60d25b5696062795aaedf71ccfa
3
- size 809221337
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eda6ec1fa729856fbf78befe45d2c0ea6fc0c4feb19044dc0bbdde5530e4c8a2
3
+ size 809228057
runs/Apr21_07-30-56_6d0fdd897f67/1682062443.6477368/events.out.tfevents.1682062443.6d0fdd897f67.565.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:163b630b279ef32c956ce29e9adba28357aedc0c93e8e5bd5aa0a2b1737429d3
3
+ size 6183
runs/Apr21_07-30-56_6d0fdd897f67/events.out.tfevents.1682062443.6d0fdd897f67.565.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8d0d48db1932f10aac5ff55c0fc05de15bacc7f52cd55bd3c744823466b5e7
3
+ size 11116
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e1f780ddb2f4c7a0615c792e2c622d7ef4400050e8deebd9eaba96312757e68
3
+ size 3771