abarbosa commited on
Commit
21d0aef
·
verified ·
1 Parent(s): c52adb1

Pushing fine-tuned model to Hugging Face Hub

Browse files
README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ language:
4
+ - pt
5
+ - en
6
+ tags:
7
+ - aes
8
+ datasets:
9
+ - kamel-usp/aes_enem_dataset
10
+ base_model: TucanoBR/Tucano-2b4-Instruct
11
+ metrics:
12
+ - accuracy
13
+ - qwk
14
+ library_name: peft
15
+ model-index:
16
+ - name: Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
17
+ results:
18
+ - task:
19
+ type: text-classification
20
+ name: Automated Essay Score
21
+ dataset:
22
+ name: Automated Essay Score ENEM Dataset
23
+ type: kamel-usp/aes_enem_dataset
24
+ config: JBCS2025
25
+ split: test
26
+ metrics:
27
+ - name: Macro F1
28
+ type: f1
29
+ value: 0.2120028944740006
30
+ - name: QWK
31
+ type: qwk
32
+ value: 0.2794486215538846
33
+ - name: Weighted Macro F1
34
+ type: f1
35
+ value: 0.3298139684514741
36
+ ---
37
+ # Model ID: Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
38
+ ## Results
39
+ | | test_data |
40
+ |:-----------------|------------:|
41
+ | eval_accuracy | 0.34058 |
42
+ | eval_RMSE | 68.1005 |
43
+ | eval_QWK | 0.279449 |
44
+ | eval_Macro_F1 | 0.212003 |
45
+ | eval_Weighted_F1 | 0.329814 |
46
+ | eval_Micro_F1 | 0.34058 |
47
+ | eval_HDIV | 0.130435 |
48
+
adapter_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "TucanoBR/Tucano-2b4-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "classifier",
23
+ "score"
24
+ ],
25
+ "peft_type": "LORA",
26
+ "qalora_group_size": 16,
27
+ "r": 8,
28
+ "rank_pattern": {},
29
+ "revision": null,
30
+ "target_modules": [
31
+ "up_proj",
32
+ "v_proj",
33
+ "gate_proj",
34
+ "k_proj",
35
+ "q_proj",
36
+ "o_proj",
37
+ "down_proj"
38
+ ],
39
+ "task_type": "SEQ_CLS",
40
+ "trainable_token_indices": null,
41
+ "use_dora": false,
42
+ "use_qalora": false,
43
+ "use_rslora": false
44
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc4961d22bbe3616d2e829c51c853506324e3dba713db233d95b69f05064b6ef
3
+ size 42376520
chat_template.jinja ADDED
@@ -0,0 +1 @@
 
 
1
+ {% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<instruction>' + message['content'].strip() + '</instruction>'}}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2025-07-12T21:33:52,jbcs2025,deab63e8-20ab-426e-a5ad-6e5c82cd6e46,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8,2158.6262983139604,0.21850978474264723,0.00010122631458410323,66.0,589.0909003578051,70.0,0.0390071097300183,0.36989872230765286,0.0412659564191786,0.4501717884568498,Japan,JPN,,,,Linux-5.15.0-130-generic-x86_64-with-glibc2.35,3.12.11,3.0.2,192,INTEL(R) XEON(R) PLATINUM 8558,1,1 x NVIDIA H200,139.69,35.69,2015.3516235351562,machine,N,1.0
evaluation_results.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ eval_loss,eval_model_preparation_time,eval_accuracy,eval_RMSE,eval_QWK,eval_HDIV,eval_Macro_F1,eval_Micro_F1,eval_Weighted_F1,eval_TP_0,eval_TN_0,eval_FP_0,eval_FN_0,eval_TP_1,eval_TN_1,eval_FP_1,eval_FN_1,eval_TP_2,eval_TN_2,eval_FP_2,eval_FN_2,eval_TP_3,eval_TN_3,eval_FP_3,eval_FN_3,eval_TP_4,eval_TN_4,eval_FP_4,eval_FN_4,eval_TP_5,eval_TN_5,eval_FP_5,eval_FN_5,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,reference,timestamp,id
2
+ 2.232393741607666,0.0094,0.08333333333333333,108.37644829691803,-0.13261077462543813,0.3787878787878788,0.03826086956521739,0.08333333333333333,0.02173913043478261,0,103,28,1,0,103,4,25,0,132,0,0,0,71,0,61,0,102,0,30,11,28,89,4,9.6864,13.627,3.407,-1,validation_before_training,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
3
+ 1.7800564765930176,0.0094,0.32575757575757575,51.99067515459193,0.49125475285171105,0.030303030303030276,0.21556364147726273,0.32575757575757575,0.34292473860580175,0,131,0,1,7,96,11,18,0,120,12,0,22,49,22,39,6,84,18,24,8,91,26,7,9.351,14.116,3.529,16.0,validation_after_training,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
4
+ 1.835325837135315,0.0094,0.34057971014492755,68.10052246069989,0.2794486215538846,0.13043478260869568,0.21200289447400067,0.34057971014492755,0.32981396845147415,0,137,0,1,15,74,29,20,0,127,6,5,22,61,26,29,2,100,12,24,8,100,18,12,9.9378,13.886,3.522,16.0,test_results,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
run_experiment.log ADDED
@@ -0,0 +1,840 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [2025-07-12 20:57:50,312][__main__][INFO] - cache_dir: /tmp/
2
+ dataset:
3
+ name: kamel-usp/aes_enem_dataset
4
+ split: JBCS2025
5
+ training_params:
6
+ seed: 42
7
+ num_train_epochs: 20
8
+ logging_steps: 100
9
+ metric_for_best_model: QWK
10
+ bf16: true
11
+ bootstrap:
12
+ enabled: true
13
+ n_bootstrap: 10000
14
+ bootstrap_seed: 42
15
+ metrics:
16
+ - QWK
17
+ - Macro_F1
18
+ - Weighted_F1
19
+ post_training_results:
20
+ model_path: /workspace/jbcs2025/outputs/2025-03-24/20-42-59
21
+ experiments:
22
+ model:
23
+ name: TucanoBR/Tucano-2b4-Instruct
24
+ type: tucano_classification_lora
25
+ num_labels: 6
26
+ output_dir: ./results/
27
+ logging_dir: ./logs/
28
+ best_model_dir: ./results/best_model
29
+ lora_r: 8
30
+ lora_dropout: 0.05
31
+ lora_alpha: 16
32
+ lora_target_modules: all-linear
33
+ checkpoint_path: ''
34
+ tokenizer:
35
+ name: TucanoBR/Tucano-2b4-Instruct
36
+ dataset:
37
+ grade_index: 1
38
+ use_full_context: true
39
+ training_params:
40
+ weight_decay: 0.01
41
+ warmup_ratio: 0.1
42
+ learning_rate: 5.0e-05
43
+ train_batch_size: 8
44
+ eval_batch_size: 4
45
+ gradient_accumulation_steps: 2
46
+ gradient_checkpointing: true
47
+
48
+ [2025-07-12 20:57:54,284][__main__][INFO] - GPU 0: NVIDIA H200 | TDP ≈ 700 W
49
+ [2025-07-12 20:57:54,284][__main__][INFO] - Starting the Fine Tuning training process.
50
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer.model from cache at None
51
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer.json
52
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at None
53
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/special_tokens_map.json
54
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer_config.json
55
+ [2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
56
+ [2025-07-12 20:57:58,868][transformers.tokenization_utils_base][INFO] - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
57
+ [2025-07-12 20:57:58,877][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False; Use Full Context: True
58
+ [2025-07-12 20:58:01,043][__main__][INFO] -
59
+ Token statistics for 'train' split:
60
+ [2025-07-12 20:58:01,043][__main__][INFO] - Total examples: 500
61
+ [2025-07-12 20:58:01,043][__main__][INFO] - Min tokens: 3557
62
+ [2025-07-12 20:58:01,043][__main__][INFO] - Max tokens: 3557
63
+ [2025-07-12 20:58:01,043][__main__][INFO] - Avg tokens: 3557.00
64
+ [2025-07-12 20:58:01,043][__main__][INFO] - Std tokens: 0.00
65
+ [2025-07-12 20:58:01,377][__main__][INFO] -
66
+ Token statistics for 'validation' split:
67
+ [2025-07-12 20:58:01,377][__main__][INFO] - Total examples: 132
68
+ [2025-07-12 20:58:01,377][__main__][INFO] - Min tokens: 3759
69
+ [2025-07-12 20:58:01,377][__main__][INFO] - Max tokens: 3759
70
+ [2025-07-12 20:58:01,377][__main__][INFO] - Avg tokens: 3759.00
71
+ [2025-07-12 20:58:01,377][__main__][INFO] - Std tokens: 0.00
72
+ [2025-07-12 20:58:01,715][__main__][INFO] -
73
+ Token statistics for 'test' split:
74
+ [2025-07-12 20:58:01,715][__main__][INFO] - Total examples: 138
75
+ [2025-07-12 20:58:01,715][__main__][INFO] - Min tokens: 3782
76
+ [2025-07-12 20:58:01,715][__main__][INFO] - Max tokens: 3782
77
+ [2025-07-12 20:58:01,715][__main__][INFO] - Avg tokens: 3782.00
78
+ [2025-07-12 20:58:01,716][__main__][INFO] - Std tokens: 0.00
79
+ [2025-07-12 20:58:01,716][__main__][INFO] - If token statistics are the same (max, avg, min) keep in mind that this is due to batched tokenization and padding.
80
+ [2025-07-12 20:58:01,716][__main__][INFO] - Model max length: 4096. If it is the same as stats, then there is a high chance that sequences are being truncated.
81
+ [2025-07-12 20:58:01,901][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
82
+ [2025-07-12 20:58:01,902][transformers.configuration_utils][INFO] - Model config LlamaConfig {
83
+ "architectures": [
84
+ "LlamaForCausalLM"
85
+ ],
86
+ "attention_bias": false,
87
+ "attention_dropout": 0.0,
88
+ "bos_token_id": 1,
89
+ "eos_token_id": 2,
90
+ "head_dim": 160,
91
+ "hidden_act": "silu",
92
+ "hidden_size": 2560,
93
+ "id2label": {
94
+ "0": "LABEL_0",
95
+ "1": "LABEL_1",
96
+ "2": "LABEL_2",
97
+ "3": "LABEL_3",
98
+ "4": "LABEL_4",
99
+ "5": "LABEL_5"
100
+ },
101
+ "initializer_range": 0.02,
102
+ "intermediate_size": 10240,
103
+ "label2id": {
104
+ "LABEL_0": 0,
105
+ "LABEL_1": 1,
106
+ "LABEL_2": 2,
107
+ "LABEL_3": 3,
108
+ "LABEL_4": 4,
109
+ "LABEL_5": 5
110
+ },
111
+ "max_position_embeddings": 4096,
112
+ "mlp_bias": false,
113
+ "model_type": "llama",
114
+ "num_attention_heads": 16,
115
+ "num_hidden_layers": 24,
116
+ "num_key_value_heads": 4,
117
+ "pad_token_id": 3,
118
+ "pretraining_tp": 1,
119
+ "rms_norm_eps": 1e-05,
120
+ "rope_scaling": null,
121
+ "rope_theta": 10000.0,
122
+ "tie_word_embeddings": false,
123
+ "torch_dtype": "float32",
124
+ "transformers_version": "4.53.2",
125
+ "use_cache": false,
126
+ "vocab_size": 32002
127
+ }
128
+
129
+ [2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/model.safetensors.index.json
130
+ [2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.float32 as defined in model's config object
131
+ [2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - Instantiating LlamaForSequenceClassification model under default dtype torch.float32.
132
+ [2025-07-12 20:58:02,051][transformers.modeling_utils][WARNING] - Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForSequenceClassification is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)`
133
+ [2025-07-12 20:58:06,170][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at TucanoBR/Tucano-2b4-Instruct were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
134
+ - This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
135
+ - This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
136
+ [2025-07-12 20:58:06,170][transformers.modeling_utils][WARNING] - Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TucanoBR/Tucano-2b4-Instruct and are newly initialized: ['score.weight']
137
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
138
+ [2025-07-12 20:58:06,872][__main__][INFO] - Initialized new PEFT model for ce loss
139
+ [2025-07-12 20:58:06,874][__main__][INFO] - None
140
+ [2025-07-12 20:58:06,875][transformers.training_args][INFO] - PyTorch: setting up devices
141
+ [2025-07-12 20:58:06,932][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
142
+ [2025-07-12 20:58:06,946][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
143
+ [2025-07-12 20:58:06,968][transformers.trainer][INFO] - Using auto half precision backend
144
+ [2025-07-12 20:58:06,968][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
145
+ [2025-07-12 20:58:06,970][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
146
+ [2025-07-12 20:58:06,982][transformers.trainer][INFO] -
147
+ ***** Running Evaluation *****
148
+ [2025-07-12 20:58:06,982][transformers.trainer][INFO] - Num examples = 132
149
+ [2025-07-12 20:58:06,982][transformers.trainer][INFO] - Batch size = 4
150
+ [2025-07-12 20:58:07,197][transformers.modeling_flash_attention_utils][WARNING] - The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
151
+ [2025-07-12 20:58:16,877][transformers.trainer][INFO] - The following columns in the Training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
152
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - ***** Running training *****
153
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Num examples = 500
154
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Num Epochs = 20
155
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Instantaneous batch size per device = 8
156
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
157
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Gradient Accumulation steps = 2
158
+ [2025-07-12 20:58:16,919][transformers.trainer][INFO] - Total optimization steps = 640
159
+ [2025-07-12 20:58:16,921][transformers.trainer][INFO] - Number of trainable parameters = 10,583,040
160
+ [2025-07-12 21:00:19,314][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
161
+ [2025-07-12 21:00:19,317][transformers.trainer][INFO] -
162
+ ***** Running Evaluation *****
163
+ [2025-07-12 21:00:19,317][transformers.trainer][INFO] - Num examples = 132
164
+ [2025-07-12 21:00:19,317][transformers.trainer][INFO] - Batch size = 4
165
+ [2025-07-12 21:00:28,703][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-32
166
+ [2025-07-12 21:00:29,127][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
167
+ [2025-07-12 21:00:29,128][transformers.configuration_utils][INFO] - Model config LlamaConfig {
168
+ "architectures": [
169
+ "LlamaForCausalLM"
170
+ ],
171
+ "attention_bias": false,
172
+ "attention_dropout": 0.0,
173
+ "bos_token_id": 1,
174
+ "eos_token_id": 2,
175
+ "head_dim": 160,
176
+ "hidden_act": "silu",
177
+ "hidden_size": 2560,
178
+ "initializer_range": 0.02,
179
+ "intermediate_size": 10240,
180
+ "max_position_embeddings": 4096,
181
+ "mlp_bias": false,
182
+ "model_type": "llama",
183
+ "num_attention_heads": 16,
184
+ "num_hidden_layers": 24,
185
+ "num_key_value_heads": 4,
186
+ "pad_token_id": 3,
187
+ "pretraining_tp": 1,
188
+ "rms_norm_eps": 1e-05,
189
+ "rope_scaling": null,
190
+ "rope_theta": 10000.0,
191
+ "tie_word_embeddings": false,
192
+ "torch_dtype": "float32",
193
+ "transformers_version": "4.53.2",
194
+ "use_cache": false,
195
+ "vocab_size": 32002
196
+ }
197
+
198
+ [2025-07-12 21:02:31,483][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
199
+ [2025-07-12 21:02:31,486][transformers.trainer][INFO] -
200
+ ***** Running Evaluation *****
201
+ [2025-07-12 21:02:31,486][transformers.trainer][INFO] - Num examples = 132
202
+ [2025-07-12 21:02:31,486][transformers.trainer][INFO] - Batch size = 4
203
+ [2025-07-12 21:02:40,854][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-64
204
+ [2025-07-12 21:02:41,216][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
205
+ [2025-07-12 21:02:41,216][transformers.configuration_utils][INFO] - Model config LlamaConfig {
206
+ "architectures": [
207
+ "LlamaForCausalLM"
208
+ ],
209
+ "attention_bias": false,
210
+ "attention_dropout": 0.0,
211
+ "bos_token_id": 1,
212
+ "eos_token_id": 2,
213
+ "head_dim": 160,
214
+ "hidden_act": "silu",
215
+ "hidden_size": 2560,
216
+ "initializer_range": 0.02,
217
+ "intermediate_size": 10240,
218
+ "max_position_embeddings": 4096,
219
+ "mlp_bias": false,
220
+ "model_type": "llama",
221
+ "num_attention_heads": 16,
222
+ "num_hidden_layers": 24,
223
+ "num_key_value_heads": 4,
224
+ "pad_token_id": 3,
225
+ "pretraining_tp": 1,
226
+ "rms_norm_eps": 1e-05,
227
+ "rope_scaling": null,
228
+ "rope_theta": 10000.0,
229
+ "tie_word_embeddings": false,
230
+ "torch_dtype": "float32",
231
+ "transformers_version": "4.53.2",
232
+ "use_cache": false,
233
+ "vocab_size": 32002
234
+ }
235
+
236
+ [2025-07-12 21:02:41,383][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-32] due to args.save_total_limit
237
+ [2025-07-12 21:04:43,618][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
238
+ [2025-07-12 21:04:43,621][transformers.trainer][INFO] -
239
+ ***** Running Evaluation *****
240
+ [2025-07-12 21:04:43,621][transformers.trainer][INFO] - Num examples = 132
241
+ [2025-07-12 21:04:43,621][transformers.trainer][INFO] - Batch size = 4
242
+ [2025-07-12 21:04:53,000][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-96
243
+ [2025-07-12 21:04:53,360][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
244
+ [2025-07-12 21:04:53,361][transformers.configuration_utils][INFO] - Model config LlamaConfig {
245
+ "architectures": [
246
+ "LlamaForCausalLM"
247
+ ],
248
+ "attention_bias": false,
249
+ "attention_dropout": 0.0,
250
+ "bos_token_id": 1,
251
+ "eos_token_id": 2,
252
+ "head_dim": 160,
253
+ "hidden_act": "silu",
254
+ "hidden_size": 2560,
255
+ "initializer_range": 0.02,
256
+ "intermediate_size": 10240,
257
+ "max_position_embeddings": 4096,
258
+ "mlp_bias": false,
259
+ "model_type": "llama",
260
+ "num_attention_heads": 16,
261
+ "num_hidden_layers": 24,
262
+ "num_key_value_heads": 4,
263
+ "pad_token_id": 3,
264
+ "pretraining_tp": 1,
265
+ "rms_norm_eps": 1e-05,
266
+ "rope_scaling": null,
267
+ "rope_theta": 10000.0,
268
+ "tie_word_embeddings": false,
269
+ "torch_dtype": "float32",
270
+ "transformers_version": "4.53.2",
271
+ "use_cache": false,
272
+ "vocab_size": 32002
273
+ }
274
+
275
+ [2025-07-12 21:04:53,582][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-64] due to args.save_total_limit
276
+ [2025-07-12 21:06:55,791][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
277
+ [2025-07-12 21:06:55,794][transformers.trainer][INFO] -
278
+ ***** Running Evaluation *****
279
+ [2025-07-12 21:06:55,794][transformers.trainer][INFO] - Num examples = 132
280
+ [2025-07-12 21:06:55,794][transformers.trainer][INFO] - Batch size = 4
281
+ [2025-07-12 21:07:05,175][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-128
282
+ [2025-07-12 21:07:05,551][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
283
+ [2025-07-12 21:07:05,552][transformers.configuration_utils][INFO] - Model config LlamaConfig {
284
+ "architectures": [
285
+ "LlamaForCausalLM"
286
+ ],
287
+ "attention_bias": false,
288
+ "attention_dropout": 0.0,
289
+ "bos_token_id": 1,
290
+ "eos_token_id": 2,
291
+ "head_dim": 160,
292
+ "hidden_act": "silu",
293
+ "hidden_size": 2560,
294
+ "initializer_range": 0.02,
295
+ "intermediate_size": 10240,
296
+ "max_position_embeddings": 4096,
297
+ "mlp_bias": false,
298
+ "model_type": "llama",
299
+ "num_attention_heads": 16,
300
+ "num_hidden_layers": 24,
301
+ "num_key_value_heads": 4,
302
+ "pad_token_id": 3,
303
+ "pretraining_tp": 1,
304
+ "rms_norm_eps": 1e-05,
305
+ "rope_scaling": null,
306
+ "rope_theta": 10000.0,
307
+ "tie_word_embeddings": false,
308
+ "torch_dtype": "float32",
309
+ "transformers_version": "4.53.2",
310
+ "use_cache": false,
311
+ "vocab_size": 32002
312
+ }
313
+
314
+ [2025-07-12 21:07:05,782][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-96] due to args.save_total_limit
315
+ [2025-07-12 21:09:08,127][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
316
+ [2025-07-12 21:09:08,131][transformers.trainer][INFO] -
317
+ ***** Running Evaluation *****
318
+ [2025-07-12 21:09:08,131][transformers.trainer][INFO] - Num examples = 132
319
+ [2025-07-12 21:09:08,131][transformers.trainer][INFO] - Batch size = 4
320
+ [2025-07-12 21:09:17,505][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-160
321
+ [2025-07-12 21:09:17,867][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
322
+ [2025-07-12 21:09:17,868][transformers.configuration_utils][INFO] - Model config LlamaConfig {
323
+ "architectures": [
324
+ "LlamaForCausalLM"
325
+ ],
326
+ "attention_bias": false,
327
+ "attention_dropout": 0.0,
328
+ "bos_token_id": 1,
329
+ "eos_token_id": 2,
330
+ "head_dim": 160,
331
+ "hidden_act": "silu",
332
+ "hidden_size": 2560,
333
+ "initializer_range": 0.02,
334
+ "intermediate_size": 10240,
335
+ "max_position_embeddings": 4096,
336
+ "mlp_bias": false,
337
+ "model_type": "llama",
338
+ "num_attention_heads": 16,
339
+ "num_hidden_layers": 24,
340
+ "num_key_value_heads": 4,
341
+ "pad_token_id": 3,
342
+ "pretraining_tp": 1,
343
+ "rms_norm_eps": 1e-05,
344
+ "rope_scaling": null,
345
+ "rope_theta": 10000.0,
346
+ "tie_word_embeddings": false,
347
+ "torch_dtype": "float32",
348
+ "transformers_version": "4.53.2",
349
+ "use_cache": false,
350
+ "vocab_size": 32002
351
+ }
352
+
353
+ [2025-07-12 21:09:18,082][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-128] due to args.save_total_limit
354
+ [2025-07-12 21:11:20,306][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
355
+ [2025-07-12 21:11:20,310][transformers.trainer][INFO] -
356
+ ***** Running Evaluation *****
357
+ [2025-07-12 21:11:20,310][transformers.trainer][INFO] - Num examples = 132
358
+ [2025-07-12 21:11:20,310][transformers.trainer][INFO] - Batch size = 4
359
+ [2025-07-12 21:11:29,717][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-192
360
+ [2025-07-12 21:11:30,082][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
361
+ [2025-07-12 21:11:30,082][transformers.configuration_utils][INFO] - Model config LlamaConfig {
362
+ "architectures": [
363
+ "LlamaForCausalLM"
364
+ ],
365
+ "attention_bias": false,
366
+ "attention_dropout": 0.0,
367
+ "bos_token_id": 1,
368
+ "eos_token_id": 2,
369
+ "head_dim": 160,
370
+ "hidden_act": "silu",
371
+ "hidden_size": 2560,
372
+ "initializer_range": 0.02,
373
+ "intermediate_size": 10240,
374
+ "max_position_embeddings": 4096,
375
+ "mlp_bias": false,
376
+ "model_type": "llama",
377
+ "num_attention_heads": 16,
378
+ "num_hidden_layers": 24,
379
+ "num_key_value_heads": 4,
380
+ "pad_token_id": 3,
381
+ "pretraining_tp": 1,
382
+ "rms_norm_eps": 1e-05,
383
+ "rope_scaling": null,
384
+ "rope_theta": 10000.0,
385
+ "tie_word_embeddings": false,
386
+ "torch_dtype": "float32",
387
+ "transformers_version": "4.53.2",
388
+ "use_cache": false,
389
+ "vocab_size": 32002
390
+ }
391
+
392
+ [2025-07-12 21:11:30,290][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-160] due to args.save_total_limit
393
+ [2025-07-12 21:13:32,503][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
394
+ [2025-07-12 21:13:32,506][transformers.trainer][INFO] -
395
+ ***** Running Evaluation *****
396
+ [2025-07-12 21:13:32,506][transformers.trainer][INFO] - Num examples = 132
397
+ [2025-07-12 21:13:32,506][transformers.trainer][INFO] - Batch size = 4
398
+ [2025-07-12 21:13:41,882][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-224
399
+ [2025-07-12 21:13:42,259][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
400
+ [2025-07-12 21:13:42,259][transformers.configuration_utils][INFO] - Model config LlamaConfig {
401
+ "architectures": [
402
+ "LlamaForCausalLM"
403
+ ],
404
+ "attention_bias": false,
405
+ "attention_dropout": 0.0,
406
+ "bos_token_id": 1,
407
+ "eos_token_id": 2,
408
+ "head_dim": 160,
409
+ "hidden_act": "silu",
410
+ "hidden_size": 2560,
411
+ "initializer_range": 0.02,
412
+ "intermediate_size": 10240,
413
+ "max_position_embeddings": 4096,
414
+ "mlp_bias": false,
415
+ "model_type": "llama",
416
+ "num_attention_heads": 16,
417
+ "num_hidden_layers": 24,
418
+ "num_key_value_heads": 4,
419
+ "pad_token_id": 3,
420
+ "pretraining_tp": 1,
421
+ "rms_norm_eps": 1e-05,
422
+ "rope_scaling": null,
423
+ "rope_theta": 10000.0,
424
+ "tie_word_embeddings": false,
425
+ "torch_dtype": "float32",
426
+ "transformers_version": "4.53.2",
427
+ "use_cache": false,
428
+ "vocab_size": 32002
429
+ }
430
+
431
+ [2025-07-12 21:15:44,697][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
432
+ [2025-07-12 21:15:44,700][transformers.trainer][INFO] -
433
+ ***** Running Evaluation *****
434
+ [2025-07-12 21:15:44,700][transformers.trainer][INFO] - Num examples = 132
435
+ [2025-07-12 21:15:44,700][transformers.trainer][INFO] - Batch size = 4
436
+ [2025-07-12 21:15:54,076][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-256
437
+ [2025-07-12 21:15:54,469][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
438
+ [2025-07-12 21:15:54,470][transformers.configuration_utils][INFO] - Model config LlamaConfig {
439
+ "architectures": [
440
+ "LlamaForCausalLM"
441
+ ],
442
+ "attention_bias": false,
443
+ "attention_dropout": 0.0,
444
+ "bos_token_id": 1,
445
+ "eos_token_id": 2,
446
+ "head_dim": 160,
447
+ "hidden_act": "silu",
448
+ "hidden_size": 2560,
449
+ "initializer_range": 0.02,
450
+ "intermediate_size": 10240,
451
+ "max_position_embeddings": 4096,
452
+ "mlp_bias": false,
453
+ "model_type": "llama",
454
+ "num_attention_heads": 16,
455
+ "num_hidden_layers": 24,
456
+ "num_key_value_heads": 4,
457
+ "pad_token_id": 3,
458
+ "pretraining_tp": 1,
459
+ "rms_norm_eps": 1e-05,
460
+ "rope_scaling": null,
461
+ "rope_theta": 10000.0,
462
+ "tie_word_embeddings": false,
463
+ "torch_dtype": "float32",
464
+ "transformers_version": "4.53.2",
465
+ "use_cache": false,
466
+ "vocab_size": 32002
467
+ }
468
+
469
+ [2025-07-12 21:15:54,684][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-224] due to args.save_total_limit
470
+ [2025-07-12 21:17:56,860][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
471
+ [2025-07-12 21:17:56,863][transformers.trainer][INFO] -
472
+ ***** Running Evaluation *****
473
+ [2025-07-12 21:17:56,863][transformers.trainer][INFO] - Num examples = 132
474
+ [2025-07-12 21:17:56,863][transformers.trainer][INFO] - Batch size = 4
475
+ [2025-07-12 21:18:06,254][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-288
476
+ [2025-07-12 21:18:06,608][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
477
+ [2025-07-12 21:18:06,608][transformers.configuration_utils][INFO] - Model config LlamaConfig {
478
+ "architectures": [
479
+ "LlamaForCausalLM"
480
+ ],
481
+ "attention_bias": false,
482
+ "attention_dropout": 0.0,
483
+ "bos_token_id": 1,
484
+ "eos_token_id": 2,
485
+ "head_dim": 160,
486
+ "hidden_act": "silu",
487
+ "hidden_size": 2560,
488
+ "initializer_range": 0.02,
489
+ "intermediate_size": 10240,
490
+ "max_position_embeddings": 4096,
491
+ "mlp_bias": false,
492
+ "model_type": "llama",
493
+ "num_attention_heads": 16,
494
+ "num_hidden_layers": 24,
495
+ "num_key_value_heads": 4,
496
+ "pad_token_id": 3,
497
+ "pretraining_tp": 1,
498
+ "rms_norm_eps": 1e-05,
499
+ "rope_scaling": null,
500
+ "rope_theta": 10000.0,
501
+ "tie_word_embeddings": false,
502
+ "torch_dtype": "float32",
503
+ "transformers_version": "4.53.2",
504
+ "use_cache": false,
505
+ "vocab_size": 32002
506
+ }
507
+
508
+ [2025-07-12 21:18:06,785][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-256] due to args.save_total_limit
509
+ [2025-07-12 21:20:08,982][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
510
+ [2025-07-12 21:20:08,985][transformers.trainer][INFO] -
511
+ ***** Running Evaluation *****
512
+ [2025-07-12 21:20:08,986][transformers.trainer][INFO] - Num examples = 132
513
+ [2025-07-12 21:20:08,986][transformers.trainer][INFO] - Batch size = 4
514
+ [2025-07-12 21:20:18,363][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-320
515
+ [2025-07-12 21:20:18,875][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
516
+ [2025-07-12 21:20:18,876][transformers.configuration_utils][INFO] - Model config LlamaConfig {
517
+ "architectures": [
518
+ "LlamaForCausalLM"
519
+ ],
520
+ "attention_bias": false,
521
+ "attention_dropout": 0.0,
522
+ "bos_token_id": 1,
523
+ "eos_token_id": 2,
524
+ "head_dim": 160,
525
+ "hidden_act": "silu",
526
+ "hidden_size": 2560,
527
+ "initializer_range": 0.02,
528
+ "intermediate_size": 10240,
529
+ "max_position_embeddings": 4096,
530
+ "mlp_bias": false,
531
+ "model_type": "llama",
532
+ "num_attention_heads": 16,
533
+ "num_hidden_layers": 24,
534
+ "num_key_value_heads": 4,
535
+ "pad_token_id": 3,
536
+ "pretraining_tp": 1,
537
+ "rms_norm_eps": 1e-05,
538
+ "rope_scaling": null,
539
+ "rope_theta": 10000.0,
540
+ "tie_word_embeddings": false,
541
+ "torch_dtype": "float32",
542
+ "transformers_version": "4.53.2",
543
+ "use_cache": false,
544
+ "vocab_size": 32002
545
+ }
546
+
547
+ [2025-07-12 21:20:19,088][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-192] due to args.save_total_limit
548
+ [2025-07-12 21:20:19,095][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-288] due to args.save_total_limit
549
+ [2025-07-12 21:22:21,320][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
550
+ [2025-07-12 21:22:21,324][transformers.trainer][INFO] -
551
+ ***** Running Evaluation *****
552
+ [2025-07-12 21:22:21,324][transformers.trainer][INFO] - Num examples = 132
553
+ [2025-07-12 21:22:21,324][transformers.trainer][INFO] - Batch size = 4
554
+ [2025-07-12 21:22:30,713][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-352
555
+ [2025-07-12 21:22:31,067][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
556
+ [2025-07-12 21:22:31,068][transformers.configuration_utils][INFO] - Model config LlamaConfig {
557
+ "architectures": [
558
+ "LlamaForCausalLM"
559
+ ],
560
+ "attention_bias": false,
561
+ "attention_dropout": 0.0,
562
+ "bos_token_id": 1,
563
+ "eos_token_id": 2,
564
+ "head_dim": 160,
565
+ "hidden_act": "silu",
566
+ "hidden_size": 2560,
567
+ "initializer_range": 0.02,
568
+ "intermediate_size": 10240,
569
+ "max_position_embeddings": 4096,
570
+ "mlp_bias": false,
571
+ "model_type": "llama",
572
+ "num_attention_heads": 16,
573
+ "num_hidden_layers": 24,
574
+ "num_key_value_heads": 4,
575
+ "pad_token_id": 3,
576
+ "pretraining_tp": 1,
577
+ "rms_norm_eps": 1e-05,
578
+ "rope_scaling": null,
579
+ "rope_theta": 10000.0,
580
+ "tie_word_embeddings": false,
581
+ "torch_dtype": "float32",
582
+ "transformers_version": "4.53.2",
583
+ "use_cache": false,
584
+ "vocab_size": 32002
585
+ }
586
+
587
+ [2025-07-12 21:22:31,296][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-320] due to args.save_total_limit
588
+ [2025-07-12 21:24:33,508][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
589
+ [2025-07-12 21:24:33,511][transformers.trainer][INFO] -
590
+ ***** Running Evaluation *****
591
+ [2025-07-12 21:24:33,511][transformers.trainer][INFO] - Num examples = 132
592
+ [2025-07-12 21:24:33,511][transformers.trainer][INFO] - Batch size = 4
593
+ [2025-07-12 21:24:42,878][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-384
594
+ [2025-07-12 21:24:43,250][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
595
+ [2025-07-12 21:24:43,250][transformers.configuration_utils][INFO] - Model config LlamaConfig {
596
+ "architectures": [
597
+ "LlamaForCausalLM"
598
+ ],
599
+ "attention_bias": false,
600
+ "attention_dropout": 0.0,
601
+ "bos_token_id": 1,
602
+ "eos_token_id": 2,
603
+ "head_dim": 160,
604
+ "hidden_act": "silu",
605
+ "hidden_size": 2560,
606
+ "initializer_range": 0.02,
607
+ "intermediate_size": 10240,
608
+ "max_position_embeddings": 4096,
609
+ "mlp_bias": false,
610
+ "model_type": "llama",
611
+ "num_attention_heads": 16,
612
+ "num_hidden_layers": 24,
613
+ "num_key_value_heads": 4,
614
+ "pad_token_id": 3,
615
+ "pretraining_tp": 1,
616
+ "rms_norm_eps": 1e-05,
617
+ "rope_scaling": null,
618
+ "rope_theta": 10000.0,
619
+ "tie_word_embeddings": false,
620
+ "torch_dtype": "float32",
621
+ "transformers_version": "4.53.2",
622
+ "use_cache": false,
623
+ "vocab_size": 32002
624
+ }
625
+
626
+ [2025-07-12 21:26:45,668][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
627
+ [2025-07-12 21:26:45,671][transformers.trainer][INFO] -
628
+ ***** Running Evaluation *****
629
+ [2025-07-12 21:26:45,671][transformers.trainer][INFO] - Num examples = 132
630
+ [2025-07-12 21:26:45,671][transformers.trainer][INFO] - Batch size = 4
631
+ [2025-07-12 21:26:55,038][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-416
632
+ [2025-07-12 21:26:55,404][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
633
+ [2025-07-12 21:26:55,405][transformers.configuration_utils][INFO] - Model config LlamaConfig {
634
+ "architectures": [
635
+ "LlamaForCausalLM"
636
+ ],
637
+ "attention_bias": false,
638
+ "attention_dropout": 0.0,
639
+ "bos_token_id": 1,
640
+ "eos_token_id": 2,
641
+ "head_dim": 160,
642
+ "hidden_act": "silu",
643
+ "hidden_size": 2560,
644
+ "initializer_range": 0.02,
645
+ "intermediate_size": 10240,
646
+ "max_position_embeddings": 4096,
647
+ "mlp_bias": false,
648
+ "model_type": "llama",
649
+ "num_attention_heads": 16,
650
+ "num_hidden_layers": 24,
651
+ "num_key_value_heads": 4,
652
+ "pad_token_id": 3,
653
+ "pretraining_tp": 1,
654
+ "rms_norm_eps": 1e-05,
655
+ "rope_scaling": null,
656
+ "rope_theta": 10000.0,
657
+ "tie_word_embeddings": false,
658
+ "torch_dtype": "float32",
659
+ "transformers_version": "4.53.2",
660
+ "use_cache": false,
661
+ "vocab_size": 32002
662
+ }
663
+
664
+ [2025-07-12 21:26:55,586][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-384] due to args.save_total_limit
665
+ [2025-07-12 21:28:57,774][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
666
+ [2025-07-12 21:28:57,777][transformers.trainer][INFO] -
667
+ ***** Running Evaluation *****
668
+ [2025-07-12 21:28:57,777][transformers.trainer][INFO] - Num examples = 132
669
+ [2025-07-12 21:28:57,777][transformers.trainer][INFO] - Batch size = 4
670
+ [2025-07-12 21:29:07,148][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-448
671
+ [2025-07-12 21:29:07,543][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
672
+ [2025-07-12 21:29:07,544][transformers.configuration_utils][INFO] - Model config LlamaConfig {
673
+ "architectures": [
674
+ "LlamaForCausalLM"
675
+ ],
676
+ "attention_bias": false,
677
+ "attention_dropout": 0.0,
678
+ "bos_token_id": 1,
679
+ "eos_token_id": 2,
680
+ "head_dim": 160,
681
+ "hidden_act": "silu",
682
+ "hidden_size": 2560,
683
+ "initializer_range": 0.02,
684
+ "intermediate_size": 10240,
685
+ "max_position_embeddings": 4096,
686
+ "mlp_bias": false,
687
+ "model_type": "llama",
688
+ "num_attention_heads": 16,
689
+ "num_hidden_layers": 24,
690
+ "num_key_value_heads": 4,
691
+ "pad_token_id": 3,
692
+ "pretraining_tp": 1,
693
+ "rms_norm_eps": 1e-05,
694
+ "rope_scaling": null,
695
+ "rope_theta": 10000.0,
696
+ "tie_word_embeddings": false,
697
+ "torch_dtype": "float32",
698
+ "transformers_version": "4.53.2",
699
+ "use_cache": false,
700
+ "vocab_size": 32002
701
+ }
702
+
703
+ [2025-07-12 21:29:07,700][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-416] due to args.save_total_limit
704
+ [2025-07-12 21:31:09,872][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
705
+ [2025-07-12 21:31:09,875][transformers.trainer][INFO] -
706
+ ***** Running Evaluation *****
707
+ [2025-07-12 21:31:09,875][transformers.trainer][INFO] - Num examples = 132
708
+ [2025-07-12 21:31:09,876][transformers.trainer][INFO] - Batch size = 4
709
+ [2025-07-12 21:31:19,247][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-480
710
+ [2025-07-12 21:31:19,601][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
711
+ [2025-07-12 21:31:19,602][transformers.configuration_utils][INFO] - Model config LlamaConfig {
712
+ "architectures": [
713
+ "LlamaForCausalLM"
714
+ ],
715
+ "attention_bias": false,
716
+ "attention_dropout": 0.0,
717
+ "bos_token_id": 1,
718
+ "eos_token_id": 2,
719
+ "head_dim": 160,
720
+ "hidden_act": "silu",
721
+ "hidden_size": 2560,
722
+ "initializer_range": 0.02,
723
+ "intermediate_size": 10240,
724
+ "max_position_embeddings": 4096,
725
+ "mlp_bias": false,
726
+ "model_type": "llama",
727
+ "num_attention_heads": 16,
728
+ "num_hidden_layers": 24,
729
+ "num_key_value_heads": 4,
730
+ "pad_token_id": 3,
731
+ "pretraining_tp": 1,
732
+ "rms_norm_eps": 1e-05,
733
+ "rope_scaling": null,
734
+ "rope_theta": 10000.0,
735
+ "tie_word_embeddings": false,
736
+ "torch_dtype": "float32",
737
+ "transformers_version": "4.53.2",
738
+ "use_cache": false,
739
+ "vocab_size": 32002
740
+ }
741
+
742
+ [2025-07-12 21:31:19,791][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-448] due to args.save_total_limit
743
+ [2025-07-12 21:33:22,017][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
744
+ [2025-07-12 21:33:22,020][transformers.trainer][INFO] -
745
+ ***** Running Evaluation *****
746
+ [2025-07-12 21:33:22,020][transformers.trainer][INFO] - Num examples = 132
747
+ [2025-07-12 21:33:22,020][transformers.trainer][INFO] - Batch size = 4
748
+ [2025-07-12 21:33:31,389][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-512
749
+ [2025-07-12 21:33:31,748][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
750
+ [2025-07-12 21:33:31,748][transformers.configuration_utils][INFO] - Model config LlamaConfig {
751
+ "architectures": [
752
+ "LlamaForCausalLM"
753
+ ],
754
+ "attention_bias": false,
755
+ "attention_dropout": 0.0,
756
+ "bos_token_id": 1,
757
+ "eos_token_id": 2,
758
+ "head_dim": 160,
759
+ "hidden_act": "silu",
760
+ "hidden_size": 2560,
761
+ "initializer_range": 0.02,
762
+ "intermediate_size": 10240,
763
+ "max_position_embeddings": 4096,
764
+ "mlp_bias": false,
765
+ "model_type": "llama",
766
+ "num_attention_heads": 16,
767
+ "num_hidden_layers": 24,
768
+ "num_key_value_heads": 4,
769
+ "pad_token_id": 3,
770
+ "pretraining_tp": 1,
771
+ "rms_norm_eps": 1e-05,
772
+ "rope_scaling": null,
773
+ "rope_theta": 10000.0,
774
+ "tie_word_embeddings": false,
775
+ "torch_dtype": "float32",
776
+ "transformers_version": "4.53.2",
777
+ "use_cache": false,
778
+ "vocab_size": 32002
779
+ }
780
+
781
+ [2025-07-12 21:33:31,981][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-480] due to args.save_total_limit
782
+ [2025-07-12 21:33:31,990][transformers.trainer][INFO] -
783
+
784
+ Training completed. Do not forget to share your model on huggingface.co/models =)
785
+
786
+
787
+ [2025-07-12 21:33:31,990][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-352 (score: 0.49125475285171105).
788
+ [2025-07-12 21:33:32,079][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-512] due to args.save_total_limit
789
+ [2025-07-12 21:33:32,089][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
790
+ [2025-07-12 21:33:32,093][transformers.trainer][INFO] -
791
+ ***** Running Evaluation *****
792
+ [2025-07-12 21:33:32,093][transformers.trainer][INFO] - Num examples = 132
793
+ [2025-07-12 21:33:32,093][transformers.trainer][INFO] - Batch size = 4
794
+ [2025-07-12 21:33:41,455][__main__][INFO] - Training completed successfully.
795
+ [2025-07-12 21:33:41,455][__main__][INFO] - Running on Test
796
+ [2025-07-12 21:33:41,455][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
797
+ [2025-07-12 21:33:41,458][transformers.trainer][INFO] -
798
+ ***** Running Evaluation *****
799
+ [2025-07-12 21:33:41,458][transformers.trainer][INFO] - Num examples = 138
800
+ [2025-07-12 21:33:41,458][transformers.trainer][INFO] - Batch size = 4
801
+ [2025-07-12 21:33:51,406][__main__][INFO] - Test metrics: {'eval_loss': 1.835325837135315, 'eval_model_preparation_time': 0.0094, 'eval_accuracy': 0.34057971014492755, 'eval_RMSE': 68.10052246069989, 'eval_QWK': 0.2794486215538846, 'eval_HDIV': 0.13043478260869568, 'eval_Macro_F1': 0.21200289447400067, 'eval_Micro_F1': 0.34057971014492755, 'eval_Weighted_F1': 0.32981396845147415, 'eval_TP_0': 0, 'eval_TN_0': 137, 'eval_FP_0': 0, 'eval_FN_0': 1, 'eval_TP_1': 15, 'eval_TN_1': 74, 'eval_FP_1': 29, 'eval_FN_1': 20, 'eval_TP_2': 0, 'eval_TN_2': 127, 'eval_FP_2': 6, 'eval_FN_2': 5, 'eval_TP_3': 22, 'eval_TN_3': 61, 'eval_FP_3': 26, 'eval_FN_3': 29, 'eval_TP_4': 2, 'eval_TN_4': 100, 'eval_FP_4': 12, 'eval_FN_4': 24, 'eval_TP_5': 8, 'eval_TN_5': 100, 'eval_FP_5': 18, 'eval_FN_5': 12, 'eval_runtime': 9.9378, 'eval_samples_per_second': 13.886, 'eval_steps_per_second': 3.522, 'epoch': 16.0}
802
+ [2025-07-12 21:33:51,407][transformers.trainer][INFO] - Saving model checkpoint to ./results/best_model
803
+ [2025-07-12 21:33:51,760][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
804
+ [2025-07-12 21:33:51,760][transformers.configuration_utils][INFO] - Model config LlamaConfig {
805
+ "architectures": [
806
+ "LlamaForCausalLM"
807
+ ],
808
+ "attention_bias": false,
809
+ "attention_dropout": 0.0,
810
+ "bos_token_id": 1,
811
+ "eos_token_id": 2,
812
+ "head_dim": 160,
813
+ "hidden_act": "silu",
814
+ "hidden_size": 2560,
815
+ "initializer_range": 0.02,
816
+ "intermediate_size": 10240,
817
+ "max_position_embeddings": 4096,
818
+ "mlp_bias": false,
819
+ "model_type": "llama",
820
+ "num_attention_heads": 16,
821
+ "num_hidden_layers": 24,
822
+ "num_key_value_heads": 4,
823
+ "pad_token_id": 3,
824
+ "pretraining_tp": 1,
825
+ "rms_norm_eps": 1e-05,
826
+ "rope_scaling": null,
827
+ "rope_theta": 10000.0,
828
+ "tie_word_embeddings": false,
829
+ "torch_dtype": "float32",
830
+ "transformers_version": "4.53.2",
831
+ "use_cache": false,
832
+ "vocab_size": 32002
833
+ }
834
+
835
+ [2025-07-12 21:33:51,879][transformers.tokenization_utils_base][INFO] - chat template saved in ./results/best_model/chat_template.jinja
836
+ [2025-07-12 21:33:51,880][transformers.tokenization_utils_base][INFO] - tokenizer config file saved in ./results/best_model/tokenizer_config.json
837
+ [2025-07-12 21:33:51,880][transformers.tokenization_utils_base][INFO] - Special tokens file saved in ./results/best_model/special_tokens_map.json
838
+ [2025-07-12 21:33:51,894][__main__][INFO] - Model and tokenizer saved to ./results/best_model
839
+ [2025-07-12 21:33:51,897][__main__][INFO] - Fine Tuning Finished.
840
+ [2025-07-12 21:33:52,406][__main__][INFO] - Total emissions: 0.2185 kg CO2eq
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<instruction>",
4
+ "</instruction>"
5
+ ],
6
+ "bos_token": {
7
+ "content": "<s>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "eos_token": {
14
+ "content": "</s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "pad_token": "<|finetune_right_pad_id|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "3": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32000": {
39
+ "content": "<instruction>",
40
+ "lstrip": false,
41
+ "normalized": true,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ },
46
+ "32001": {
47
+ "content": "</instruction>",
48
+ "lstrip": false,
49
+ "normalized": true,
50
+ "rstrip": false,
51
+ "single_word": false,
52
+ "special": true
53
+ }
54
+ },
55
+ "additional_special_tokens": [
56
+ "<instruction>",
57
+ "</instruction>"
58
+ ],
59
+ "bos_token": "<s>",
60
+ "bos_token_id": 1,
61
+ "clean_up_tokenization_spaces": false,
62
+ "eos_token": "</s>",
63
+ "eos_token_id": 2,
64
+ "extra_special_tokens": {},
65
+ "legacy": false,
66
+ "model_max_length": 4096,
67
+ "pad_token": "<|finetune_right_pad_id|>",
68
+ "pad_token_id": 0,
69
+ "padding_side": "right",
70
+ "sp_model_kwargs": {},
71
+ "tokenizer_class": "LlamaTokenizerFast",
72
+ "unk_token": "<unk>",
73
+ "unk_token_id": 0,
74
+ "use_default_system_prompt": false
75
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f03cf58751278993265b02ab20b716edf7560b4c55c2b4382d317dd90ba1f7f
3
+ size 5777