Thanathorn commited on
Commit
2c7698e
1 Parent(s): 545494a

Update model to latest version May 13, 2022

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. model_args.json +1 -1
  3. optimizer.pt +1 -1
  4. pytorch_model.bin +1 -1
  5. scheduler.pt +1 -1
  6. training_args.bin +1 -1
README.md CHANGED
@@ -10,7 +10,7 @@ widget:
10
  ---
11
 
12
  # mt5-kmutt-thai-sentence-sum
13
- This repository contains the finetuned mT5 model for Thai sentence summarization. The architecture of the model is based on mT5 model and fine-tuned on text-summarization pairs in Thai. Also, this project is a Senior Project of Computer Engineering Student at King Mongkut’s University of Technology Thonburi.
14
 
15
  ## Usage on SimpleTransformer (Tested on version 0.63.4)
16
  ```python
10
  ---
11
 
12
  # mt5-kmutt-thai-sentence-sum
13
+ This repository contains the finetuned mT5-base model for Thai sentence summarization. The architecture of the model is based on mT5 model and fine-tuned on text-summarization pairs in Thai. Also, this project is a Senior Project of Computer Engineering Student at King Mongkut’s University of Technology Thonburi.
14
 
15
  ## Usage on SimpleTransformer (Tested on version 0.63.4)
16
  ```python
model_args.json CHANGED
@@ -1 +1 @@
1
- {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": false, "adafactor_scale_parameter": false, "adafactor_warmup_init": false, "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 10, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 30000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": false, "gradient_accumulation_steps": 1, "learning_rate": 0.0003, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": 32, "max_grad_norm": 1.0, "max_seq_length": 100, "model_name": "google/mt5-base", "model_type": "mt5", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 50, "optimizer": "Adafactor", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 1, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "constant_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 10, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": "Senior-Project", "warmup_ratio": 0.06, "warmup_steps": 1743, "weight_decay": 0.0, "model_class": "T5Model", "dataset_class": null, "do_sample": false, "early_stopping": true, "evaluate_generated_text": false, "length_penalty": 2.0, "max_length": 20, "max_steps": -1, "num_beams": 1, "num_return_sequences": 1, "preprocess_inputs": true, "repetition_penalty": 1.0, "special_tokens_list": [], "top_k": null, "top_p": null, "use_multiprocessed_decoding": true}
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": false, "adafactor_scale_parameter": false, "adafactor_warmup_init": false, "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 8, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 30000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": false, "gradient_accumulation_steps": 1, "learning_rate": 0.0003, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": 32, "max_grad_norm": 1.0, "max_seq_length": 128, "model_name": "google/mt5-base", "model_type": "mt5", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": false, "no_save": false, "not_saved_args": [], "num_train_epochs": 50, "optimizer": "Adafactor", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 2, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": true, "save_model_every_epoch": true, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "constant_schedule_with_warmup", "silent": false, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": "Senior-Project", "warmup_ratio": 0.06, "warmup_steps": 3261, "weight_decay": 0.0, "model_class": "T5Model", "dataset_class": null, "do_sample": false, "early_stopping": true, "evaluate_generated_text": false, "length_penalty": 2.0, "max_length": 20, "max_steps": -1, "num_beams": 1, "num_return_sequences": 1, "preprocess_inputs": true, "repetition_penalty": 1.0, "special_tokens_list": [], "top_k": null, "top_p": null, "use_multiprocessed_decoding": true}
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79930045193209ab76a2c219da3bf02da92d7b631b71507015fb6f50a4206cd5
3
  size 4113057
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb60fb834d2456b1fce2d7697f4493d30004a2fcc3e8b25ae30e2ba6004ad8e6
3
  size 4113057
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8483302282aa9b0cc45024a6ef0c9271ce1d1ec035584b4f0001ba39a71665bd
3
  size 2329700301
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c9061f60caff70432802f8a0801cdb7353357c0f106435308a729da4502f7c
3
  size 2329700301
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4510591288454f820aaa49a3c831f868ff1dd9a9eadb9ac12caeb5408db9b7d
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8887b20465b4bdb3e275d3209eae62915addf06ce6f472507dd4317c4e080ea8
3
  size 623
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02ce6db2a3237d254df7f4222c9095506cea3ff337a8d1384fbf9ffbe27d49fa
3
  size 3119
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c5c336a8a9bc18ce6adfc7e9047009e33a6fb33d849b6d02851cd8734600eb9
3
  size 3119