model files are added

Browse files

Files changed (14) hide show

.DS_Store +0 -0
config.json +75 -0
generation_config.json +12 -0
merges.txt +0 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +15 -0
tokenizer.json +0 -0
tokenizer_config.json +57 -0
trainer_state.json +739 -0
training_args.bin +3 -0
vocab.json +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

config.json ADDED Viewed

	@@ -0,0 +1,75 @@

+{
+  "_name_or_path": "facebook/bart-base",
+  "activation_dropout": 0.1,
+  "activation_function": "gelu",
+  "add_bias_logits": false,
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "classif_dropout": 0.1,
+  "classifier_dropout": 0.0,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 6,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 6,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "normalize_embedding": true,
+  "num_beams": 4,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "length_penalty": 1.0,
+      "max_length": 128,
+      "min_length": 12,
+      "num_beams": 4
+    },
+    "summarization_cnn": {
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "num_beams": 4
+    },
+    "summarization_xsum": {
+      "length_penalty": 1.0,
+      "max_length": 62,
+      "min_length": 11,
+      "num_beams": 6
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "use_cache": true,
+  "vocab_size": 50265
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.38.2"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc96341a118cd5e143be21b2facdd4ab838539a7bba52af1bcbfed486690c664
+size 557912620

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24165cc88b9488949bb4e552f6b04151e6dd6cdde1fab6dd427e8e5a5d80caab
+size 1115579898

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d97f8b6459373c118b1617234bf9c04bd797322db8c55570f4ebf6dbe844bbb
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac36bc24c0dcde8780c73dbb5e6ec02976f9779e9bcd724560176dc11c0c4e85
+size 1064

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "bos_token": "<s>",
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1024,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,739 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.56,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "grad_norm": 8.571518898010254,
+      "learning_rate": 0.0003965811965811966,
+      "loss": 6.0892,
+      "step": 10
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 2.477086067199707,
+      "learning_rate": 0.00039316239316239317,
+      "loss": 1.4485,
+      "step": 20
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.7137540578842163,
+      "learning_rate": 0.00038974358974358975,
+      "loss": 0.9732,
+      "step": 30
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.5362057685852051,
+      "learning_rate": 0.0003863247863247863,
+      "loss": 0.7804,
+      "step": 40
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.6810179948806763,
+      "learning_rate": 0.00038290598290598296,
+      "loss": 0.665,
+      "step": 50
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.4986821115016937,
+      "learning_rate": 0.0003794871794871795,
+      "loss": 0.6091,
+      "step": 60
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 0.4309682250022888,
+      "learning_rate": 0.00037606837606837606,
+      "loss": 0.5502,
+      "step": 70
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 0.3824257552623749,
+      "learning_rate": 0.0003726495726495727,
+      "loss": 0.5164,
+      "step": 80
+    },
+    {
+      "epoch": 0.23,
+      "grad_norm": 0.3188970386981964,
+      "learning_rate": 0.00036923076923076927,
+      "loss": 0.4883,
+      "step": 90
+    },
+    {
+      "epoch": 0.26,
+      "grad_norm": 0.3359103202819824,
+      "learning_rate": 0.00036581196581196584,
+      "loss": 0.4612,
+      "step": 100
+    },
+    {
+      "epoch": 0.28,
+      "grad_norm": 0.4327464699745178,
+      "learning_rate": 0.0003623931623931624,
+      "loss": 0.4351,
+      "step": 110
+    },
+    {
+      "epoch": 0.31,
+      "grad_norm": 0.4424777030944824,
+      "learning_rate": 0.000358974358974359,
+      "loss": 0.4217,
+      "step": 120
+    },
+    {
+      "epoch": 0.33,
+      "grad_norm": 0.520322322845459,
+      "learning_rate": 0.00035555555555555557,
+      "loss": 0.4076,
+      "step": 130
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 0.48572778701782227,
+      "learning_rate": 0.00035213675213675215,
+      "loss": 0.3948,
+      "step": 140
+    },
+    {
+      "epoch": 0.38,
+      "grad_norm": 0.2985605001449585,
+      "learning_rate": 0.0003487179487179487,
+      "loss": 0.3823,
+      "step": 150
+    },
+    {
+      "epoch": 0.41,
+      "grad_norm": 0.28738752007484436,
+      "learning_rate": 0.00034529914529914536,
+      "loss": 0.375,
+      "step": 160
+    },
+    {
+      "epoch": 0.44,
+      "grad_norm": 0.29423144459724426,
+      "learning_rate": 0.0003418803418803419,
+      "loss": 0.3591,
+      "step": 170
+    },
+    {
+      "epoch": 0.46,
+      "grad_norm": 0.26430046558380127,
+      "learning_rate": 0.00033846153846153846,
+      "loss": 0.3494,
+      "step": 180
+    },
+    {
+      "epoch": 0.49,
+      "grad_norm": 0.2734215259552002,
+      "learning_rate": 0.0003350427350427351,
+      "loss": 0.3396,
+      "step": 190
+    },
+    {
+      "epoch": 0.51,
+      "grad_norm": 0.3005197048187256,
+      "learning_rate": 0.00033162393162393166,
+      "loss": 0.3352,
+      "step": 200
+    },
+    {
+      "epoch": 0.54,
+      "grad_norm": 0.2822723686695099,
+      "learning_rate": 0.0003282051282051282,
+      "loss": 0.3241,
+      "step": 210
+    },
+    {
+      "epoch": 0.56,
+      "grad_norm": 0.2792316973209381,
+      "learning_rate": 0.0003247863247863248,
+      "loss": 0.3208,
+      "step": 220
+    },
+    {
+      "epoch": 0.59,
+      "grad_norm": 0.2761669158935547,
+      "learning_rate": 0.0003213675213675214,
+      "loss": 0.3148,
+      "step": 230
+    },
+    {
+      "epoch": 0.61,
+      "grad_norm": 0.2733113467693329,
+      "learning_rate": 0.0003179487179487179,
+      "loss": 0.311,
+      "step": 240
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.3393694758415222,
+      "learning_rate": 0.00031452991452991455,
+      "loss": 0.3056,
+      "step": 250
+    },
+    {
+      "epoch": 0.67,
+      "grad_norm": 0.29316985607147217,
+      "learning_rate": 0.0003111111111111111,
+      "loss": 0.2975,
+      "step": 260
+    },
+    {
+      "epoch": 0.69,
+      "grad_norm": 0.29134783148765564,
+      "learning_rate": 0.0003076923076923077,
+      "loss": 0.2898,
+      "step": 270
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 0.41234660148620605,
+      "learning_rate": 0.0003042735042735043,
+      "loss": 0.2895,
+      "step": 280
+    },
+    {
+      "epoch": 0.74,
+      "grad_norm": 0.26693716645240784,
+      "learning_rate": 0.00030085470085470086,
+      "loss": 0.2835,
+      "step": 290
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 0.2862294614315033,
+      "learning_rate": 0.00029743589743589743,
+      "loss": 0.2747,
+      "step": 300
+    },
+    {
+      "epoch": 0.79,
+      "grad_norm": 0.2596249282360077,
+      "learning_rate": 0.00029401709401709406,
+      "loss": 0.2752,
+      "step": 310
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 0.2555866241455078,
+      "learning_rate": 0.0002905982905982906,
+      "loss": 0.2662,
+      "step": 320
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 0.3845195472240448,
+      "learning_rate": 0.0002871794871794872,
+      "loss": 0.2625,
+      "step": 330
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 0.23550209403038025,
+      "learning_rate": 0.0002837606837606838,
+      "loss": 0.256,
+      "step": 340
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 0.2434936910867691,
+      "learning_rate": 0.0002803418803418803,
+      "loss": 0.2545,
+      "step": 350
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 0.23562268912792206,
+      "learning_rate": 0.00027692307692307695,
+      "loss": 0.2536,
+      "step": 360
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 0.3110085427761078,
+      "learning_rate": 0.0002735042735042735,
+      "loss": 0.2497,
+      "step": 370
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 0.2646142244338989,
+      "learning_rate": 0.0002700854700854701,
+      "loss": 0.2448,
+      "step": 380
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.22812116146087646,
+      "learning_rate": 0.0002666666666666667,
+      "loss": 0.2409,
+      "step": 390
+    },
+    {
+      "epoch": 1.02,
+      "grad_norm": 0.21481893956661224,
+      "learning_rate": 0.00026324786324786326,
+      "loss": 0.225,
+      "step": 400
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 0.2561526298522949,
+      "learning_rate": 0.00025982905982905983,
+      "loss": 0.2194,
+      "step": 410
+    },
+    {
+      "epoch": 1.08,
+      "grad_norm": 0.2297515720129013,
+      "learning_rate": 0.00025641025641025646,
+      "loss": 0.216,
+      "step": 420
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 0.25526463985443115,
+      "learning_rate": 0.000252991452991453,
+      "loss": 0.2171,
+      "step": 430
+    },
+    {
+      "epoch": 1.13,
+      "grad_norm": 0.24202637374401093,
+      "learning_rate": 0.00024957264957264956,
+      "loss": 0.2149,
+      "step": 440
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 0.20644807815551758,
+      "learning_rate": 0.0002461538461538462,
+      "loss": 0.209,
+      "step": 450
+    },
+    {
+      "epoch": 1.18,
+      "grad_norm": 0.2795998454093933,
+      "learning_rate": 0.00024273504273504272,
+      "loss": 0.2071,
+      "step": 460
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 0.306149959564209,
+      "learning_rate": 0.00023931623931623932,
+      "loss": 0.208,
+      "step": 470
+    },
+    {
+      "epoch": 1.23,
+      "grad_norm": 0.2355523407459259,
+      "learning_rate": 0.00023589743589743593,
+      "loss": 0.2051,
+      "step": 480
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.2909263074398041,
+      "learning_rate": 0.0002324786324786325,
+      "loss": 0.2023,
+      "step": 490
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 0.5298261642456055,
+      "learning_rate": 0.00022905982905982905,
+      "loss": 0.2018,
+      "step": 500
+    },
+    {
+      "epoch": 1.28,
+      "eval_cer": 0.9319083335386478,
+      "eval_loss": 0.17038685083389282,
+      "eval_runtime": 138.0008,
+      "eval_samples_per_second": 14.493,
+      "eval_steps_per_second": 0.457,
+      "step": 500
+    },
+    {
+      "epoch": 1.31,
+      "grad_norm": 0.23548871278762817,
+      "learning_rate": 0.00022564102564102566,
+      "loss": 0.2008,
+      "step": 510
+    },
+    {
+      "epoch": 1.33,
+      "grad_norm": 0.20162977278232574,
+      "learning_rate": 0.00022222222222222223,
+      "loss": 0.1975,
+      "step": 520
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 0.2593408524990082,
+      "learning_rate": 0.00021880341880341884,
+      "loss": 0.1959,
+      "step": 530
+    },
+    {
+      "epoch": 1.38,
+      "grad_norm": 0.21452312171459198,
+      "learning_rate": 0.0002153846153846154,
+      "loss": 0.194,
+      "step": 540
+    },
+    {
+      "epoch": 1.41,
+      "grad_norm": 0.2637544274330139,
+      "learning_rate": 0.000211965811965812,
+      "loss": 0.1909,
+      "step": 550
+    },
+    {
+      "epoch": 1.43,
+      "grad_norm": 0.24357128143310547,
+      "learning_rate": 0.00020854700854700857,
+      "loss": 0.19,
+      "step": 560
+    },
+    {
+      "epoch": 1.46,
+      "grad_norm": 0.2084117978811264,
+      "learning_rate": 0.00020512820512820512,
+      "loss": 0.1857,
+      "step": 570
+    },
+    {
+      "epoch": 1.48,
+      "grad_norm": 0.23245294392108917,
+      "learning_rate": 0.00020170940170940172,
+      "loss": 0.1858,
+      "step": 580
+    },
+    {
+      "epoch": 1.51,
+      "grad_norm": 0.23836293816566467,
+      "learning_rate": 0.0001982905982905983,
+      "loss": 0.183,
+      "step": 590
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 0.19184565544128418,
+      "learning_rate": 0.00019487179487179487,
+      "loss": 0.183,
+      "step": 600
+    },
+    {
+      "epoch": 1.56,
+      "grad_norm": 0.20401564240455627,
+      "learning_rate": 0.00019145299145299148,
+      "loss": 0.1829,
+      "step": 610
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 0.21579188108444214,
+      "learning_rate": 0.00018803418803418803,
+      "loss": 0.1812,
+      "step": 620
+    },
+    {
+      "epoch": 1.61,
+      "grad_norm": 0.23108145594596863,
+      "learning_rate": 0.00018461538461538463,
+      "loss": 0.1781,
+      "step": 630
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 0.2311713844537735,
+      "learning_rate": 0.0001811965811965812,
+      "loss": 0.1755,
+      "step": 640
+    },
+    {
+      "epoch": 1.66,
+      "grad_norm": 0.19794794917106628,
+      "learning_rate": 0.00017777777777777779,
+      "loss": 0.1768,
+      "step": 650
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 0.2516119182109833,
+      "learning_rate": 0.00017435897435897436,
+      "loss": 0.1737,
+      "step": 660
+    },
+    {
+      "epoch": 1.72,
+      "grad_norm": 0.20975567400455475,
+      "learning_rate": 0.00017094017094017094,
+      "loss": 0.1712,
+      "step": 670
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 0.22168505191802979,
+      "learning_rate": 0.00016752136752136754,
+      "loss": 0.1693,
+      "step": 680
+    },
+    {
+      "epoch": 1.77,
+      "grad_norm": 0.22844062745571136,
+      "learning_rate": 0.0001641025641025641,
+      "loss": 0.168,
+      "step": 690
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 0.22804197669029236,
+      "learning_rate": 0.0001606837606837607,
+      "loss": 0.1721,
+      "step": 700
+    },
+    {
+      "epoch": 1.82,
+      "grad_norm": 0.22620578110218048,
+      "learning_rate": 0.00015726495726495727,
+      "loss": 0.1703,
+      "step": 710
+    },
+    {
+      "epoch": 1.84,
+      "grad_norm": 0.21445313096046448,
+      "learning_rate": 0.00015384615384615385,
+      "loss": 0.1673,
+      "step": 720
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 0.207479327917099,
+      "learning_rate": 0.00015042735042735043,
+      "loss": 0.1648,
+      "step": 730
+    },
+    {
+      "epoch": 1.89,
+      "grad_norm": 0.22134087979793549,
+      "learning_rate": 0.00014700854700854703,
+      "loss": 0.1629,
+      "step": 740
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 0.20121484994888306,
+      "learning_rate": 0.0001435897435897436,
+      "loss": 0.1638,
+      "step": 750
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 0.2002618908882141,
+      "learning_rate": 0.00014017094017094016,
+      "loss": 0.1621,
+      "step": 760
+    },
+    {
+      "epoch": 1.97,
+      "grad_norm": 0.19750453531742096,
+      "learning_rate": 0.00013675213675213676,
+      "loss": 0.1667,
+      "step": 770
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.22286508977413177,
+      "learning_rate": 0.00013333333333333334,
+      "loss": 0.1642,
+      "step": 780
+    },
+    {
+      "epoch": 2.02,
+      "grad_norm": 0.21668635308742523,
+      "learning_rate": 0.00012991452991452992,
+      "loss": 0.1482,
+      "step": 790
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 0.233961820602417,
+      "learning_rate": 0.0001264957264957265,
+      "loss": 0.1453,
+      "step": 800
+    },
+    {
+      "epoch": 2.07,
+      "grad_norm": 0.1865084022283554,
+      "learning_rate": 0.0001230769230769231,
+      "loss": 0.1455,
+      "step": 810
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 0.1853141337633133,
+      "learning_rate": 0.00011965811965811966,
+      "loss": 0.1442,
+      "step": 820
+    },
+    {
+      "epoch": 2.12,
+      "grad_norm": 0.17371739447116852,
+      "learning_rate": 0.00011623931623931625,
+      "loss": 0.1382,
+      "step": 830
+    },
+    {
+      "epoch": 2.15,
+      "grad_norm": 0.19631154835224152,
+      "learning_rate": 0.00011282051282051283,
+      "loss": 0.1415,
+      "step": 840
+    },
+    {
+      "epoch": 2.18,
+      "grad_norm": 0.194850891828537,
+      "learning_rate": 0.00010940170940170942,
+      "loss": 0.141,
+      "step": 850
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 0.18121449649333954,
+      "learning_rate": 0.000105982905982906,
+      "loss": 0.1388,
+      "step": 860
+    },
+    {
+      "epoch": 2.23,
+      "grad_norm": 0.2176773101091385,
+      "learning_rate": 0.00010256410256410256,
+      "loss": 0.1399,
+      "step": 870
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 0.19013133645057678,
+      "learning_rate": 9.914529914529915e-05,
+      "loss": 0.137,
+      "step": 880
+    },
+    {
+      "epoch": 2.28,
+      "grad_norm": 0.22148679196834564,
+      "learning_rate": 9.572649572649574e-05,
+      "loss": 0.139,
+      "step": 890
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 0.20861493051052094,
+      "learning_rate": 9.230769230769232e-05,
+      "loss": 0.139,
+      "step": 900
+    },
+    {
+      "epoch": 2.33,
+      "grad_norm": 0.17541790008544922,
+      "learning_rate": 8.888888888888889e-05,
+      "loss": 0.1362,
+      "step": 910
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 0.1971459984779358,
+      "learning_rate": 8.547008547008547e-05,
+      "loss": 0.1346,
+      "step": 920
+    },
+    {
+      "epoch": 2.38,
+      "grad_norm": 0.20883004367351532,
+      "learning_rate": 8.205128205128205e-05,
+      "loss": 0.1351,
+      "step": 930
+    },
+    {
+      "epoch": 2.41,
+      "grad_norm": 0.18058577179908752,
+      "learning_rate": 7.863247863247864e-05,
+      "loss": 0.1363,
+      "step": 940
+    },
+    {
+      "epoch": 2.43,
+      "grad_norm": 0.19193512201309204,
+      "learning_rate": 7.521367521367521e-05,
+      "loss": 0.1359,
+      "step": 950
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 0.17777132987976074,
+      "learning_rate": 7.17948717948718e-05,
+      "loss": 0.1363,
+      "step": 960
+    },
+    {
+      "epoch": 2.48,
+      "grad_norm": 0.1730206310749054,
+      "learning_rate": 6.837606837606838e-05,
+      "loss": 0.1339,
+      "step": 970
+    },
+    {
+      "epoch": 2.51,
+      "grad_norm": 0.172698512673378,
+      "learning_rate": 6.495726495726496e-05,
+      "loss": 0.1317,
+      "step": 980
+    },
+    {
+      "epoch": 2.53,
+      "grad_norm": 0.1746242642402649,
+      "learning_rate": 6.153846153846155e-05,
+      "loss": 0.132,
+      "step": 990
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 0.1631608009338379,
+      "learning_rate": 5.8119658119658126e-05,
+      "loss": 0.1292,
+      "step": 1000
+    },
+    {
+      "epoch": 2.56,
+      "eval_cer": 0.9320702386692806,
+      "eval_loss": 0.11450555920600891,
+      "eval_runtime": 136.638,
+      "eval_samples_per_second": 14.637,
+      "eval_steps_per_second": 0.461,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1170,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 6.737241075941376e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c287b31111cf124ac69c0e9b53bdaf288743eceab9254160487882b78a64250
+size 5048

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff