krplt commited on Feb 16

Commit

fd52a0b

•

1 Parent(s): 1002bb5

feat: v1 model

Browse files

Files changed (36) hide show

.idea/csv-editor.xml +16 -0
.idea/misc.xml +1 -1
.idea/trocr-handwritten-mathematical-expressions.iml +4 -2
dataset/annotations.csv +23 -0
dataset/images/0001.png +0 -0
dataset/images/0002.png +0 -0
dataset/images/0003.png +0 -0
dataset/images/0004.png +0 -0
dataset/images/0005.png +0 -0
dataset/images/0006.png +0 -0
dataset/images/0007.png +0 -0
dataset/images/0008.png +0 -0
dataset/images/0009.png +0 -0
dataset/images/0010.png +0 -0
dataset/images/0011.png +0 -0
dataset/images/0012.png +0 -0
dataset/images/0013.png +0 -0
dataset/images/0014.png +0 -0
dataset/images/0015.png +0 -0
dataset/images/0016.png +0 -0
dataset/images/0017.png +0 -0
dataset/images/0018.png +0 -0
dataset/images/0019.png +0 -0
dataset/images/0020.png +0 -0
dataset/images/0021.png +0 -0
dataset/images/0022.png +0 -0
model/checkpoint-500/config.json +171 -0
model/checkpoint-500/generation_config.json +9 -0
model/checkpoint-500/model.safetensors +3 -0
model/checkpoint-500/optimizer.pt +3 -0
model/checkpoint-500/rng_state.pth +3 -0
model/checkpoint-500/scheduler.pt +3 -0
model/checkpoint-500/trainer_state.json +321 -0
model/checkpoint-500/training_args.bin +3 -0
requirements.txt +6 -0
train.py +87 -0

.idea/csv-editor.xml ADDED Viewed

	@@ -0,0 +1,16 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="CsvFileAttributes">
+    <option name="attributeMap">
+      <map>
+        <entry key="\dataset\annotations.csv">
+          <value>
+            <Attribute>
+              <option name="separator" value="," />
+            </Attribute>
+          </value>
+        </entry>
+      </map>
+    </option>
+  </component>
+</project>

.idea/misc.xml CHANGED Viewed

@@ -3,5 +3,5 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (Задачи на семинар 8. Ответы)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (Задачи на семинар 8. Ответы)" project-jdk-type="Python SDK" />
 </project>

   <component name="Black">
     <option name="sdkName" value="Python 3.10 (Задачи на семинар 8. Ответы)" />
   </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.12 (trocr-handwritten-mathematical-expressions)" project-jdk-type="Python SDK" />
 </project>

.idea/trocr-handwritten-mathematical-expressions.iml CHANGED Viewed

@@ -1,8 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
-    <content url="file://$MODULE_DIR$" />
-    <orderEntry type="inheritedJdk" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
   <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="jdk" jdkName="Python 3.12 (trocr-handwritten-mathematical-expressions)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>

dataset/annotations.csv CHANGED Viewed

	@@ -0,0 +1,23 @@

+filename,expression
+./dataset/images/0001.png,0.9 + 0.1
+./dataset/images/0002.png,1 + 1
+./dataset/images/0003.png,2 + 2 * 2
+./dataset/images/0004.png,13 / 3 + 3^2
+./dataset/images/0005.png,26 * 3
+./dataset/images/0006.png,52 + 100
+./dataset/images/0007.png,6.5 * 9
+./dataset/images/0008.png,89 * 9
+./dataset/images/0009.png,46 - 2
+./dataset/images/0010.png,28 - 9
+./dataset/images/0011.png,4^3 / 7
+./dataset/images/0012.png,73 / 4 + (3 * 3)
+./dataset/images/0013.png,0.123 + 0
+./dataset/images/0014.png,1.34 + 5.67
+./dataset/images/0015.png,123 - 49 + 7
+./dataset/images/0016.png,1426 = 62x
+./dataset/images/0017.png,103 = a + 91
+./dataset/images/0018.png,799x + 22 = 426688
+./dataset/images/0019.png,901 + 315 = 302a - 274027
+./dataset/images/0020.png,20(58x + 78) = 115240
+./dataset/images/0021.png,46(4c + 2) = -4508 / 2
+./dataset/images/0022.png,65 = 5915 / x

dataset/images/0001.png ADDED Viewed

dataset/images/0002.png ADDED Viewed

dataset/images/0003.png ADDED Viewed

dataset/images/0004.png ADDED Viewed

dataset/images/0005.png ADDED Viewed

dataset/images/0006.png ADDED Viewed

dataset/images/0007.png ADDED Viewed

dataset/images/0008.png ADDED Viewed

dataset/images/0009.png ADDED Viewed

dataset/images/0010.png ADDED Viewed

dataset/images/0011.png ADDED Viewed

dataset/images/0012.png ADDED Viewed

dataset/images/0013.png ADDED Viewed

dataset/images/0014.png ADDED Viewed

dataset/images/0015.png ADDED Viewed

dataset/images/0016.png ADDED Viewed

dataset/images/0017.png ADDED Viewed

dataset/images/0018.png ADDED Viewed

dataset/images/0019.png ADDED Viewed

dataset/images/0020.png ADDED Viewed

dataset/images/0021.png ADDED Viewed

dataset/images/0022.png ADDED Viewed

model/checkpoint-500/config.json ADDED Viewed

	@@ -0,0 +1,171 @@

+{
+  "_name_or_path": "microsoft/trocr-base-handwritten",
+  "architectures": [
+    "VisionEncoderDecoderModel"
+  ],
+  "decoder": {
+    "_name_or_path": "",
+    "activation_dropout": 0.0,
+    "activation_function": "gelu",
+    "add_cross_attention": true,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "classifier_dropout": 0.0,
+    "cross_attention_hidden_size": 768,
+    "d_model": 1024,
+    "decoder_attention_heads": 16,
+    "decoder_ffn_dim": 4096,
+    "decoder_layerdrop": 0.0,
+    "decoder_layers": 12,
+    "decoder_start_token_id": 2,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.1,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "init_std": 0.02,
+    "is_decoder": true,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layernorm_embedding": true,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 512,
+    "min_length": 0,
+    "model_type": "trocr",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "scale_embedding": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": false,
+    "use_learned_position_embeddings": true,
+    "vocab_size": 50265
+  },
+  "decoder_start_token_id": 0,
+  "encoder": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "encoder_stride": 16,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 384,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-12,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "vit",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 16,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": false,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "is_encoder_decoder": true,
+  "model_type": "vision-encoder-decoder",
+  "pad_token_id": 1,
+  "processor_class": "TrOCRProcessor",
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.2"
+}

model/checkpoint-500/generation_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "eos_token_id": 2,
+  "pad_token_id": 1,
+  "transformers_version": "4.37.2",
+  "use_cache": false
+}

model/checkpoint-500/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b40685900fc767af31aef4a3a62d0f2fc964f910e9d5d6eb3a6accc9c83324f2
+size 1335747032

model/checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:277ee31979f928e16dacf35727b361540431efc2179066178de8aff98fade57e
+size 2667050412

model/checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae815a4262cf019aa02c8291a4c9c2a2b22f9c4534ccef44ebbef7835c2c5e48
+size 14244

model/checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d02c16cc82c2dd3c41a58b5d09ae955539e7b9165124433ee976a0bb0323a2ee
+size 1064

model/checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,321 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 100.0,
+  "eval_steps": 500,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 2.0,
+      "learning_rate": 9.8e-05,
+      "loss": 8.2056,
+      "step": 10
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 9.6e-05,
+      "loss": 4.4086,
+      "step": 20
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 9.4e-05,
+      "loss": 3.1851,
+      "step": 30
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 3.2542,
+      "step": 40
+    },
+    {
+      "epoch": 10.0,
+      "learning_rate": 9e-05,
+      "loss": 2.6913,
+      "step": 50
+    },
+    {
+      "epoch": 12.0,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 2.3765,
+      "step": 60
+    },
+    {
+      "epoch": 14.0,
+      "learning_rate": 8.6e-05,
+      "loss": 2.2853,
+      "step": 70
+    },
+    {
+      "epoch": 16.0,
+      "learning_rate": 8.4e-05,
+      "loss": 2.3182,
+      "step": 80
+    },
+    {
+      "epoch": 18.0,
+      "learning_rate": 8.2e-05,
+      "loss": 2.156,
+      "step": 90
+    },
+    {
+      "epoch": 20.0,
+      "learning_rate": 8e-05,
+      "loss": 1.9019,
+      "step": 100
+    },
+    {
+      "epoch": 22.0,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.8288,
+      "step": 110
+    },
+    {
+      "epoch": 24.0,
+      "learning_rate": 7.6e-05,
+      "loss": 1.7968,
+      "step": 120
+    },
+    {
+      "epoch": 26.0,
+      "learning_rate": 7.4e-05,
+      "loss": 1.6366,
+      "step": 130
+    },
+    {
+      "epoch": 28.0,
+      "learning_rate": 7.2e-05,
+      "loss": 1.5084,
+      "step": 140
+    },
+    {
+      "epoch": 30.0,
+      "learning_rate": 7e-05,
+      "loss": 1.3425,
+      "step": 150
+    },
+    {
+      "epoch": 32.0,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 1.3157,
+      "step": 160
+    },
+    {
+      "epoch": 34.0,
+      "learning_rate": 6.6e-05,
+      "loss": 1.1184,
+      "step": 170
+    },
+    {
+      "epoch": 36.0,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.8982,
+      "step": 180
+    },
+    {
+      "epoch": 38.0,
+      "learning_rate": 6.2e-05,
+      "loss": 0.7471,
+      "step": 190
+    },
+    {
+      "epoch": 40.0,
+      "learning_rate": 6e-05,
+      "loss": 0.7546,
+      "step": 200
+    },
+    {
+      "epoch": 42.0,
+      "learning_rate": 5.8e-05,
+      "loss": 0.5103,
+      "step": 210
+    },
+    {
+      "epoch": 44.0,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.4532,
+      "step": 220
+    },
+    {
+      "epoch": 46.0,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.4687,
+      "step": 230
+    },
+    {
+      "epoch": 48.0,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.4073,
+      "step": 240
+    },
+    {
+      "epoch": 50.0,
+      "learning_rate": 5e-05,
+      "loss": 0.5488,
+      "step": 250
+    },
+    {
+      "epoch": 52.0,
+      "learning_rate": 4.8e-05,
+      "loss": 0.5888,
+      "step": 260
+    },
+    {
+      "epoch": 54.0,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.3194,
+      "step": 270
+    },
+    {
+      "epoch": 56.0,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.47,
+      "step": 280
+    },
+    {
+      "epoch": 58.0,
+      "learning_rate": 4.2e-05,
+      "loss": 0.2736,
+      "step": 290
+    },
+    {
+      "epoch": 60.0,
+      "learning_rate": 4e-05,
+      "loss": 0.2798,
+      "step": 300
+    },
+    {
+      "epoch": 62.0,
+      "learning_rate": 3.8e-05,
+      "loss": 0.322,
+      "step": 310
+    },
+    {
+      "epoch": 64.0,
+      "learning_rate": 3.6e-05,
+      "loss": 0.1707,
+      "step": 320
+    },
+    {
+      "epoch": 66.0,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.1222,
+      "step": 330
+    },
+    {
+      "epoch": 68.0,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.1246,
+      "step": 340
+    },
+    {
+      "epoch": 70.0,
+      "learning_rate": 3e-05,
+      "loss": 0.1404,
+      "step": 350
+    },
+    {
+      "epoch": 72.0,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 0.1098,
+      "step": 360
+    },
+    {
+      "epoch": 74.0,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 0.1441,
+      "step": 370
+    },
+    {
+      "epoch": 76.0,
+      "learning_rate": 2.4e-05,
+      "loss": 0.1531,
+      "step": 380
+    },
+    {
+      "epoch": 78.0,
+      "learning_rate": 2.2000000000000003e-05,
+      "loss": 0.1241,
+      "step": 390
+    },
+    {
+      "epoch": 80.0,
+      "learning_rate": 2e-05,
+      "loss": 0.103,
+      "step": 400
+    },
+    {
+      "epoch": 82.0,
+      "learning_rate": 1.8e-05,
+      "loss": 0.0907,
+      "step": 410
+    },
+    {
+      "epoch": 84.0,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.0909,
+      "step": 420
+    },
+    {
+      "epoch": 86.0,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 0.0874,
+      "step": 430
+    },
+    {
+      "epoch": 88.0,
+      "learning_rate": 1.2e-05,
+      "loss": 0.0757,
+      "step": 440
+    },
+    {
+      "epoch": 90.0,
+      "learning_rate": 1e-05,
+      "loss": 0.0753,
+      "step": 450
+    },
+    {
+      "epoch": 92.0,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.0763,
+      "step": 460
+    },
+    {
+      "epoch": 94.0,
+      "learning_rate": 6e-06,
+      "loss": 0.0714,
+      "step": 470
+    },
+    {
+      "epoch": 96.0,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0736,
+      "step": 480
+    },
+    {
+      "epoch": 98.0,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.0601,
+      "step": 490
+    },
+    {
+      "epoch": 100.0,
+      "learning_rate": 0.0,
+      "loss": 0.0731,
+      "step": 500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 500,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 100,
+  "save_steps": 500,
+  "total_flos": 1.4217418628923392e+18,
+  "train_batch_size": 2,
+  "trial_name": null,
+  "trial_params": null
+}

model/checkpoint-500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04deb0d096a36c5f7da938e19ec51b9bd341c0c139090e278562f90413d3087e
+size 4664

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+transformers
+torch # pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
+pandas
+pillow
+scikit-learn
+accelerate

train.py CHANGED Viewed

	@@ -0,0 +1,87 @@

+from torch.utils.data import Dataset
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel, Trainer, TrainingArguments
+from PIL import Image
+import pandas as pd
+from sklearn.model_selection import train_test_split
+class HandwrittenMathDataset(Dataset):
+    """
+    Initialize the class with the provided annotations file, image directory, and processor.
+    Parameters:
+        annotations_file (str): The file path to the annotations file.
+        img_dir (str): The directory path to the images.
+        processor: The processor object to be used for image processing.
+    """
+    def __init__(self, annotations_file, img_dir, processor, subset="train"):
+        self.img_labels = pd.read_csv(annotations_file)
+        self.train_data, self.test_data = train_test_split(self.img_labels, test_size=0.1, random_state=42)
+        self.data = self.train_data if subset == "train" else self.test_data
+        self.img_dir = img_dir
+        self.processor = processor
+    def __len__(self):
+        return len(self.data)
+    def __getitem__(self, idx):
+        img_path = self.data.iloc[idx, 0]
+        image = Image.open(img_path).convert("RGB")
+        # Ensure the image is processed correctly
+        pixel_values = self.processor(images=image, return_tensors="pt").pixel_values
+        label = self.data.iloc[idx, 1]
+        # Process labels correctly
+        labels = self.processor.tokenizer(label, padding="max_length", max_length=128, truncation=True,
+                                          return_tensors="pt").input_ids
+        # Replace -100 in the labels as they are not to be computed for loss
+        labels[labels == self.processor.tokenizer.pad_token_id] = -100
+        return {"pixel_values": pixel_values.squeeze(), "labels": labels.squeeze()}
+def main():
+    """
+    A function to train a model for handwritten text recognition using TrOCRProcessor and VisionEncoderDecoderModel.
+    """
+    annotations_file = './dataset/annotations.csv'
+    img_dir = './dataset/images/'
+    model_id = 'microsoft/trocr-base-handwritten'
+    processor = TrOCRProcessor.from_pretrained(model_id)
+    model = VisionEncoderDecoderModel.from_pretrained(model_id).to("cuda")
+    # Set the decoder_start_token_id
+    model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
+    model.config.pad_token_id = processor.tokenizer.pad_token_id
+    train_dataset = HandwrittenMathDataset(annotations_file=annotations_file, img_dir=img_dir, processor=processor,
+                                           subset="train")
+    test_dataset = HandwrittenMathDataset(annotations_file=annotations_file, img_dir=img_dir, processor=processor,
+                                          subset="test")
+    training_args = TrainingArguments(
+        output_dir='./model',
+        per_device_train_batch_size=2,
+        num_train_epochs=100,
+        logging_dir='./training_logs',
+        logging_steps=10,
+        save_strategy="epoch",
+        save_total_limit=1,
+        weight_decay=0.01,
+        learning_rate=1e-4,
+        gradient_checkpointing=True,
+        gradient_accumulation_steps=2
+    )
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=test_dataset
+    )
+    trainer.train()
+if __name__ == '__main__':
+    main()