Pushing fine-tuned model to Hugging Face Hub
Browse files- README.md +48 -0
- adapter_config.json +44 -0
- adapter_model.safetensors +3 -0
- chat_template.jinja +1 -0
- emissions.csv +2 -0
- evaluation_results.csv +4 -0
- run_experiment.log +840 -0
- special_tokens_map.json +28 -0
- tokenizer.json +0 -0
- tokenizer_config.json +75 -0
- training_args.bin +3 -0
README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
---
|
| 3 |
+
language:
|
| 4 |
+
- pt
|
| 5 |
+
- en
|
| 6 |
+
tags:
|
| 7 |
+
- aes
|
| 8 |
+
datasets:
|
| 9 |
+
- kamel-usp/aes_enem_dataset
|
| 10 |
+
base_model: TucanoBR/Tucano-2b4-Instruct
|
| 11 |
+
metrics:
|
| 12 |
+
- accuracy
|
| 13 |
+
- qwk
|
| 14 |
+
library_name: peft
|
| 15 |
+
model-index:
|
| 16 |
+
- name: Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
|
| 17 |
+
results:
|
| 18 |
+
- task:
|
| 19 |
+
type: text-classification
|
| 20 |
+
name: Automated Essay Score
|
| 21 |
+
dataset:
|
| 22 |
+
name: Automated Essay Score ENEM Dataset
|
| 23 |
+
type: kamel-usp/aes_enem_dataset
|
| 24 |
+
config: JBCS2025
|
| 25 |
+
split: test
|
| 26 |
+
metrics:
|
| 27 |
+
- name: Macro F1
|
| 28 |
+
type: f1
|
| 29 |
+
value: 0.2120028944740006
|
| 30 |
+
- name: QWK
|
| 31 |
+
type: qwk
|
| 32 |
+
value: 0.2794486215538846
|
| 33 |
+
- name: Weighted Macro F1
|
| 34 |
+
type: f1
|
| 35 |
+
value: 0.3298139684514741
|
| 36 |
+
---
|
| 37 |
+
# Model ID: Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
|
| 38 |
+
## Results
|
| 39 |
+
| | test_data |
|
| 40 |
+
|:-----------------|------------:|
|
| 41 |
+
| eval_accuracy | 0.34058 |
|
| 42 |
+
| eval_RMSE | 68.1005 |
|
| 43 |
+
| eval_QWK | 0.279449 |
|
| 44 |
+
| eval_Macro_F1 | 0.212003 |
|
| 45 |
+
| eval_Weighted_F1 | 0.329814 |
|
| 46 |
+
| eval_Micro_F1 | 0.34058 |
|
| 47 |
+
| eval_HDIV | 0.130435 |
|
| 48 |
+
|
adapter_config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "TucanoBR/Tucano-2b4-Instruct",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": false,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 16,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.05,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": [
|
| 22 |
+
"classifier",
|
| 23 |
+
"score"
|
| 24 |
+
],
|
| 25 |
+
"peft_type": "LORA",
|
| 26 |
+
"qalora_group_size": 16,
|
| 27 |
+
"r": 8,
|
| 28 |
+
"rank_pattern": {},
|
| 29 |
+
"revision": null,
|
| 30 |
+
"target_modules": [
|
| 31 |
+
"up_proj",
|
| 32 |
+
"v_proj",
|
| 33 |
+
"gate_proj",
|
| 34 |
+
"k_proj",
|
| 35 |
+
"q_proj",
|
| 36 |
+
"o_proj",
|
| 37 |
+
"down_proj"
|
| 38 |
+
],
|
| 39 |
+
"task_type": "SEQ_CLS",
|
| 40 |
+
"trainable_token_indices": null,
|
| 41 |
+
"use_dora": false,
|
| 42 |
+
"use_qalora": false,
|
| 43 |
+
"use_rslora": false
|
| 44 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bc4961d22bbe3616d2e829c51c853506324e3dba713db233d95b69f05064b6ef
|
| 3 |
+
size 42376520
|
chat_template.jinja
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '<instruction>' + message['content'].strip() + '</instruction>'}}{% elif message['role'] == 'assistant' %}{{ message['content'].strip() + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}
|
emissions.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
| 2 |
+
2025-07-12T21:33:52,jbcs2025,deab63e8-20ab-426e-a5ad-6e5c82cd6e46,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8,2158.6262983139604,0.21850978474264723,0.00010122631458410323,66.0,589.0909003578051,70.0,0.0390071097300183,0.36989872230765286,0.0412659564191786,0.4501717884568498,Japan,JPN,,,,Linux-5.15.0-130-generic-x86_64-with-glibc2.35,3.12.11,3.0.2,192,INTEL(R) XEON(R) PLATINUM 8558,1,1 x NVIDIA H200,139.69,35.69,2015.3516235351562,machine,N,1.0
|
evaluation_results.csv
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
eval_loss,eval_model_preparation_time,eval_accuracy,eval_RMSE,eval_QWK,eval_HDIV,eval_Macro_F1,eval_Micro_F1,eval_Weighted_F1,eval_TP_0,eval_TN_0,eval_FP_0,eval_FN_0,eval_TP_1,eval_TN_1,eval_FP_1,eval_FN_1,eval_TP_2,eval_TN_2,eval_FP_2,eval_FN_2,eval_TP_3,eval_TN_3,eval_FP_3,eval_FN_3,eval_TP_4,eval_TN_4,eval_FP_4,eval_FN_4,eval_TP_5,eval_TN_5,eval_FP_5,eval_FN_5,eval_runtime,eval_samples_per_second,eval_steps_per_second,epoch,reference,timestamp,id
|
| 2 |
+
2.232393741607666,0.0094,0.08333333333333333,108.37644829691803,-0.13261077462543813,0.3787878787878788,0.03826086956521739,0.08333333333333333,0.02173913043478261,0,103,28,1,0,103,4,25,0,132,0,0,0,71,0,61,0,102,0,30,11,28,89,4,9.6864,13.627,3.407,-1,validation_before_training,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
|
| 3 |
+
1.7800564765930176,0.0094,0.32575757575757575,51.99067515459193,0.49125475285171105,0.030303030303030276,0.21556364147726273,0.32575757575757575,0.34292473860580175,0,131,0,1,7,96,11,18,0,120,12,0,22,49,22,39,6,84,18,24,8,91,26,7,9.351,14.116,3.529,16.0,validation_after_training,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
|
| 4 |
+
1.835325837135315,0.0094,0.34057971014492755,68.10052246069989,0.2794486215538846,0.13043478260869568,0.21200289447400067,0.34057971014492755,0.32981396845147415,0,137,0,1,15,74,29,20,0,127,6,5,22,61,26,29,2,100,12,24,8,100,18,12,9.9378,13.886,3.522,16.0,test_results,2025-07-12 20:58:06,Tucano-2b4-Instruct-tucano_classification_lora-C2-full_context-r8
|
run_experiment.log
ADDED
|
@@ -0,0 +1,840 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2025-07-12 20:57:50,312][__main__][INFO] - cache_dir: /tmp/
|
| 2 |
+
dataset:
|
| 3 |
+
name: kamel-usp/aes_enem_dataset
|
| 4 |
+
split: JBCS2025
|
| 5 |
+
training_params:
|
| 6 |
+
seed: 42
|
| 7 |
+
num_train_epochs: 20
|
| 8 |
+
logging_steps: 100
|
| 9 |
+
metric_for_best_model: QWK
|
| 10 |
+
bf16: true
|
| 11 |
+
bootstrap:
|
| 12 |
+
enabled: true
|
| 13 |
+
n_bootstrap: 10000
|
| 14 |
+
bootstrap_seed: 42
|
| 15 |
+
metrics:
|
| 16 |
+
- QWK
|
| 17 |
+
- Macro_F1
|
| 18 |
+
- Weighted_F1
|
| 19 |
+
post_training_results:
|
| 20 |
+
model_path: /workspace/jbcs2025/outputs/2025-03-24/20-42-59
|
| 21 |
+
experiments:
|
| 22 |
+
model:
|
| 23 |
+
name: TucanoBR/Tucano-2b4-Instruct
|
| 24 |
+
type: tucano_classification_lora
|
| 25 |
+
num_labels: 6
|
| 26 |
+
output_dir: ./results/
|
| 27 |
+
logging_dir: ./logs/
|
| 28 |
+
best_model_dir: ./results/best_model
|
| 29 |
+
lora_r: 8
|
| 30 |
+
lora_dropout: 0.05
|
| 31 |
+
lora_alpha: 16
|
| 32 |
+
lora_target_modules: all-linear
|
| 33 |
+
checkpoint_path: ''
|
| 34 |
+
tokenizer:
|
| 35 |
+
name: TucanoBR/Tucano-2b4-Instruct
|
| 36 |
+
dataset:
|
| 37 |
+
grade_index: 1
|
| 38 |
+
use_full_context: true
|
| 39 |
+
training_params:
|
| 40 |
+
weight_decay: 0.01
|
| 41 |
+
warmup_ratio: 0.1
|
| 42 |
+
learning_rate: 5.0e-05
|
| 43 |
+
train_batch_size: 8
|
| 44 |
+
eval_batch_size: 4
|
| 45 |
+
gradient_accumulation_steps: 2
|
| 46 |
+
gradient_checkpointing: true
|
| 47 |
+
|
| 48 |
+
[2025-07-12 20:57:54,284][__main__][INFO] - GPU 0: NVIDIA H200 | TDP ≈ 700 W
|
| 49 |
+
[2025-07-12 20:57:54,284][__main__][INFO] - Starting the Fine Tuning training process.
|
| 50 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer.model from cache at None
|
| 51 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer.json
|
| 52 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file added_tokens.json from cache at None
|
| 53 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file special_tokens_map.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/special_tokens_map.json
|
| 54 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file tokenizer_config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/tokenizer_config.json
|
| 55 |
+
[2025-07-12 20:57:58,826][transformers.tokenization_utils_base][INFO] - loading file chat_template.jinja from cache at None
|
| 56 |
+
[2025-07-12 20:57:58,868][transformers.tokenization_utils_base][INFO] - Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
| 57 |
+
[2025-07-12 20:57:58,877][__main__][INFO] - Tokenizer function parameters- Padding:longest; Truncation: False; Use Full Context: True
|
| 58 |
+
[2025-07-12 20:58:01,043][__main__][INFO] -
|
| 59 |
+
Token statistics for 'train' split:
|
| 60 |
+
[2025-07-12 20:58:01,043][__main__][INFO] - Total examples: 500
|
| 61 |
+
[2025-07-12 20:58:01,043][__main__][INFO] - Min tokens: 3557
|
| 62 |
+
[2025-07-12 20:58:01,043][__main__][INFO] - Max tokens: 3557
|
| 63 |
+
[2025-07-12 20:58:01,043][__main__][INFO] - Avg tokens: 3557.00
|
| 64 |
+
[2025-07-12 20:58:01,043][__main__][INFO] - Std tokens: 0.00
|
| 65 |
+
[2025-07-12 20:58:01,377][__main__][INFO] -
|
| 66 |
+
Token statistics for 'validation' split:
|
| 67 |
+
[2025-07-12 20:58:01,377][__main__][INFO] - Total examples: 132
|
| 68 |
+
[2025-07-12 20:58:01,377][__main__][INFO] - Min tokens: 3759
|
| 69 |
+
[2025-07-12 20:58:01,377][__main__][INFO] - Max tokens: 3759
|
| 70 |
+
[2025-07-12 20:58:01,377][__main__][INFO] - Avg tokens: 3759.00
|
| 71 |
+
[2025-07-12 20:58:01,377][__main__][INFO] - Std tokens: 0.00
|
| 72 |
+
[2025-07-12 20:58:01,715][__main__][INFO] -
|
| 73 |
+
Token statistics for 'test' split:
|
| 74 |
+
[2025-07-12 20:58:01,715][__main__][INFO] - Total examples: 138
|
| 75 |
+
[2025-07-12 20:58:01,715][__main__][INFO] - Min tokens: 3782
|
| 76 |
+
[2025-07-12 20:58:01,715][__main__][INFO] - Max tokens: 3782
|
| 77 |
+
[2025-07-12 20:58:01,715][__main__][INFO] - Avg tokens: 3782.00
|
| 78 |
+
[2025-07-12 20:58:01,716][__main__][INFO] - Std tokens: 0.00
|
| 79 |
+
[2025-07-12 20:58:01,716][__main__][INFO] - If token statistics are the same (max, avg, min) keep in mind that this is due to batched tokenization and padding.
|
| 80 |
+
[2025-07-12 20:58:01,716][__main__][INFO] - Model max length: 4096. If it is the same as stats, then there is a high chance that sequences are being truncated.
|
| 81 |
+
[2025-07-12 20:58:01,901][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 82 |
+
[2025-07-12 20:58:01,902][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 83 |
+
"architectures": [
|
| 84 |
+
"LlamaForCausalLM"
|
| 85 |
+
],
|
| 86 |
+
"attention_bias": false,
|
| 87 |
+
"attention_dropout": 0.0,
|
| 88 |
+
"bos_token_id": 1,
|
| 89 |
+
"eos_token_id": 2,
|
| 90 |
+
"head_dim": 160,
|
| 91 |
+
"hidden_act": "silu",
|
| 92 |
+
"hidden_size": 2560,
|
| 93 |
+
"id2label": {
|
| 94 |
+
"0": "LABEL_0",
|
| 95 |
+
"1": "LABEL_1",
|
| 96 |
+
"2": "LABEL_2",
|
| 97 |
+
"3": "LABEL_3",
|
| 98 |
+
"4": "LABEL_4",
|
| 99 |
+
"5": "LABEL_5"
|
| 100 |
+
},
|
| 101 |
+
"initializer_range": 0.02,
|
| 102 |
+
"intermediate_size": 10240,
|
| 103 |
+
"label2id": {
|
| 104 |
+
"LABEL_0": 0,
|
| 105 |
+
"LABEL_1": 1,
|
| 106 |
+
"LABEL_2": 2,
|
| 107 |
+
"LABEL_3": 3,
|
| 108 |
+
"LABEL_4": 4,
|
| 109 |
+
"LABEL_5": 5
|
| 110 |
+
},
|
| 111 |
+
"max_position_embeddings": 4096,
|
| 112 |
+
"mlp_bias": false,
|
| 113 |
+
"model_type": "llama",
|
| 114 |
+
"num_attention_heads": 16,
|
| 115 |
+
"num_hidden_layers": 24,
|
| 116 |
+
"num_key_value_heads": 4,
|
| 117 |
+
"pad_token_id": 3,
|
| 118 |
+
"pretraining_tp": 1,
|
| 119 |
+
"rms_norm_eps": 1e-05,
|
| 120 |
+
"rope_scaling": null,
|
| 121 |
+
"rope_theta": 10000.0,
|
| 122 |
+
"tie_word_embeddings": false,
|
| 123 |
+
"torch_dtype": "float32",
|
| 124 |
+
"transformers_version": "4.53.2",
|
| 125 |
+
"use_cache": false,
|
| 126 |
+
"vocab_size": 32002
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
[2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - loading weights file model.safetensors from cache at /tmp/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/model.safetensors.index.json
|
| 130 |
+
[2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - Will use torch_dtype=torch.float32 as defined in model's config object
|
| 131 |
+
[2025-07-12 20:58:02,050][transformers.modeling_utils][INFO] - Instantiating LlamaForSequenceClassification model under default dtype torch.float32.
|
| 132 |
+
[2025-07-12 20:58:02,051][transformers.modeling_utils][WARNING] - Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForSequenceClassification is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)`
|
| 133 |
+
[2025-07-12 20:58:06,170][transformers.modeling_utils][INFO] - Some weights of the model checkpoint at TucanoBR/Tucano-2b4-Instruct were not used when initializing LlamaForSequenceClassification: ['lm_head.weight']
|
| 134 |
+
- This IS expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
| 135 |
+
- This IS NOT expected if you are initializing LlamaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
| 136 |
+
[2025-07-12 20:58:06,170][transformers.modeling_utils][WARNING] - Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TucanoBR/Tucano-2b4-Instruct and are newly initialized: ['score.weight']
|
| 137 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
| 138 |
+
[2025-07-12 20:58:06,872][__main__][INFO] - Initialized new PEFT model for ce loss
|
| 139 |
+
[2025-07-12 20:58:06,874][__main__][INFO] - None
|
| 140 |
+
[2025-07-12 20:58:06,875][transformers.training_args][INFO] - PyTorch: setting up devices
|
| 141 |
+
[2025-07-12 20:58:06,932][__main__][INFO] - Total steps: 620. Number of warmup steps: 62
|
| 142 |
+
[2025-07-12 20:58:06,946][transformers.trainer][INFO] - You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
|
| 143 |
+
[2025-07-12 20:58:06,968][transformers.trainer][INFO] - Using auto half precision backend
|
| 144 |
+
[2025-07-12 20:58:06,968][transformers.trainer][WARNING] - No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
|
| 145 |
+
[2025-07-12 20:58:06,970][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 146 |
+
[2025-07-12 20:58:06,982][transformers.trainer][INFO] -
|
| 147 |
+
***** Running Evaluation *****
|
| 148 |
+
[2025-07-12 20:58:06,982][transformers.trainer][INFO] - Num examples = 132
|
| 149 |
+
[2025-07-12 20:58:06,982][transformers.trainer][INFO] - Batch size = 4
|
| 150 |
+
[2025-07-12 20:58:07,197][transformers.modeling_flash_attention_utils][WARNING] - The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.
|
| 151 |
+
[2025-07-12 20:58:16,877][transformers.trainer][INFO] - The following columns in the Training set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 152 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - ***** Running training *****
|
| 153 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Num examples = 500
|
| 154 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Num Epochs = 20
|
| 155 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Instantaneous batch size per device = 8
|
| 156 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Total train batch size (w. parallel, distributed & accumulation) = 16
|
| 157 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Gradient Accumulation steps = 2
|
| 158 |
+
[2025-07-12 20:58:16,919][transformers.trainer][INFO] - Total optimization steps = 640
|
| 159 |
+
[2025-07-12 20:58:16,921][transformers.trainer][INFO] - Number of trainable parameters = 10,583,040
|
| 160 |
+
[2025-07-12 21:00:19,314][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 161 |
+
[2025-07-12 21:00:19,317][transformers.trainer][INFO] -
|
| 162 |
+
***** Running Evaluation *****
|
| 163 |
+
[2025-07-12 21:00:19,317][transformers.trainer][INFO] - Num examples = 132
|
| 164 |
+
[2025-07-12 21:00:19,317][transformers.trainer][INFO] - Batch size = 4
|
| 165 |
+
[2025-07-12 21:00:28,703][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-32
|
| 166 |
+
[2025-07-12 21:00:29,127][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 167 |
+
[2025-07-12 21:00:29,128][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 168 |
+
"architectures": [
|
| 169 |
+
"LlamaForCausalLM"
|
| 170 |
+
],
|
| 171 |
+
"attention_bias": false,
|
| 172 |
+
"attention_dropout": 0.0,
|
| 173 |
+
"bos_token_id": 1,
|
| 174 |
+
"eos_token_id": 2,
|
| 175 |
+
"head_dim": 160,
|
| 176 |
+
"hidden_act": "silu",
|
| 177 |
+
"hidden_size": 2560,
|
| 178 |
+
"initializer_range": 0.02,
|
| 179 |
+
"intermediate_size": 10240,
|
| 180 |
+
"max_position_embeddings": 4096,
|
| 181 |
+
"mlp_bias": false,
|
| 182 |
+
"model_type": "llama",
|
| 183 |
+
"num_attention_heads": 16,
|
| 184 |
+
"num_hidden_layers": 24,
|
| 185 |
+
"num_key_value_heads": 4,
|
| 186 |
+
"pad_token_id": 3,
|
| 187 |
+
"pretraining_tp": 1,
|
| 188 |
+
"rms_norm_eps": 1e-05,
|
| 189 |
+
"rope_scaling": null,
|
| 190 |
+
"rope_theta": 10000.0,
|
| 191 |
+
"tie_word_embeddings": false,
|
| 192 |
+
"torch_dtype": "float32",
|
| 193 |
+
"transformers_version": "4.53.2",
|
| 194 |
+
"use_cache": false,
|
| 195 |
+
"vocab_size": 32002
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
[2025-07-12 21:02:31,483][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 199 |
+
[2025-07-12 21:02:31,486][transformers.trainer][INFO] -
|
| 200 |
+
***** Running Evaluation *****
|
| 201 |
+
[2025-07-12 21:02:31,486][transformers.trainer][INFO] - Num examples = 132
|
| 202 |
+
[2025-07-12 21:02:31,486][transformers.trainer][INFO] - Batch size = 4
|
| 203 |
+
[2025-07-12 21:02:40,854][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-64
|
| 204 |
+
[2025-07-12 21:02:41,216][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 205 |
+
[2025-07-12 21:02:41,216][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 206 |
+
"architectures": [
|
| 207 |
+
"LlamaForCausalLM"
|
| 208 |
+
],
|
| 209 |
+
"attention_bias": false,
|
| 210 |
+
"attention_dropout": 0.0,
|
| 211 |
+
"bos_token_id": 1,
|
| 212 |
+
"eos_token_id": 2,
|
| 213 |
+
"head_dim": 160,
|
| 214 |
+
"hidden_act": "silu",
|
| 215 |
+
"hidden_size": 2560,
|
| 216 |
+
"initializer_range": 0.02,
|
| 217 |
+
"intermediate_size": 10240,
|
| 218 |
+
"max_position_embeddings": 4096,
|
| 219 |
+
"mlp_bias": false,
|
| 220 |
+
"model_type": "llama",
|
| 221 |
+
"num_attention_heads": 16,
|
| 222 |
+
"num_hidden_layers": 24,
|
| 223 |
+
"num_key_value_heads": 4,
|
| 224 |
+
"pad_token_id": 3,
|
| 225 |
+
"pretraining_tp": 1,
|
| 226 |
+
"rms_norm_eps": 1e-05,
|
| 227 |
+
"rope_scaling": null,
|
| 228 |
+
"rope_theta": 10000.0,
|
| 229 |
+
"tie_word_embeddings": false,
|
| 230 |
+
"torch_dtype": "float32",
|
| 231 |
+
"transformers_version": "4.53.2",
|
| 232 |
+
"use_cache": false,
|
| 233 |
+
"vocab_size": 32002
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
[2025-07-12 21:02:41,383][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-32] due to args.save_total_limit
|
| 237 |
+
[2025-07-12 21:04:43,618][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 238 |
+
[2025-07-12 21:04:43,621][transformers.trainer][INFO] -
|
| 239 |
+
***** Running Evaluation *****
|
| 240 |
+
[2025-07-12 21:04:43,621][transformers.trainer][INFO] - Num examples = 132
|
| 241 |
+
[2025-07-12 21:04:43,621][transformers.trainer][INFO] - Batch size = 4
|
| 242 |
+
[2025-07-12 21:04:53,000][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-96
|
| 243 |
+
[2025-07-12 21:04:53,360][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 244 |
+
[2025-07-12 21:04:53,361][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 245 |
+
"architectures": [
|
| 246 |
+
"LlamaForCausalLM"
|
| 247 |
+
],
|
| 248 |
+
"attention_bias": false,
|
| 249 |
+
"attention_dropout": 0.0,
|
| 250 |
+
"bos_token_id": 1,
|
| 251 |
+
"eos_token_id": 2,
|
| 252 |
+
"head_dim": 160,
|
| 253 |
+
"hidden_act": "silu",
|
| 254 |
+
"hidden_size": 2560,
|
| 255 |
+
"initializer_range": 0.02,
|
| 256 |
+
"intermediate_size": 10240,
|
| 257 |
+
"max_position_embeddings": 4096,
|
| 258 |
+
"mlp_bias": false,
|
| 259 |
+
"model_type": "llama",
|
| 260 |
+
"num_attention_heads": 16,
|
| 261 |
+
"num_hidden_layers": 24,
|
| 262 |
+
"num_key_value_heads": 4,
|
| 263 |
+
"pad_token_id": 3,
|
| 264 |
+
"pretraining_tp": 1,
|
| 265 |
+
"rms_norm_eps": 1e-05,
|
| 266 |
+
"rope_scaling": null,
|
| 267 |
+
"rope_theta": 10000.0,
|
| 268 |
+
"tie_word_embeddings": false,
|
| 269 |
+
"torch_dtype": "float32",
|
| 270 |
+
"transformers_version": "4.53.2",
|
| 271 |
+
"use_cache": false,
|
| 272 |
+
"vocab_size": 32002
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
[2025-07-12 21:04:53,582][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-64] due to args.save_total_limit
|
| 276 |
+
[2025-07-12 21:06:55,791][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 277 |
+
[2025-07-12 21:06:55,794][transformers.trainer][INFO] -
|
| 278 |
+
***** Running Evaluation *****
|
| 279 |
+
[2025-07-12 21:06:55,794][transformers.trainer][INFO] - Num examples = 132
|
| 280 |
+
[2025-07-12 21:06:55,794][transformers.trainer][INFO] - Batch size = 4
|
| 281 |
+
[2025-07-12 21:07:05,175][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-128
|
| 282 |
+
[2025-07-12 21:07:05,551][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 283 |
+
[2025-07-12 21:07:05,552][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 284 |
+
"architectures": [
|
| 285 |
+
"LlamaForCausalLM"
|
| 286 |
+
],
|
| 287 |
+
"attention_bias": false,
|
| 288 |
+
"attention_dropout": 0.0,
|
| 289 |
+
"bos_token_id": 1,
|
| 290 |
+
"eos_token_id": 2,
|
| 291 |
+
"head_dim": 160,
|
| 292 |
+
"hidden_act": "silu",
|
| 293 |
+
"hidden_size": 2560,
|
| 294 |
+
"initializer_range": 0.02,
|
| 295 |
+
"intermediate_size": 10240,
|
| 296 |
+
"max_position_embeddings": 4096,
|
| 297 |
+
"mlp_bias": false,
|
| 298 |
+
"model_type": "llama",
|
| 299 |
+
"num_attention_heads": 16,
|
| 300 |
+
"num_hidden_layers": 24,
|
| 301 |
+
"num_key_value_heads": 4,
|
| 302 |
+
"pad_token_id": 3,
|
| 303 |
+
"pretraining_tp": 1,
|
| 304 |
+
"rms_norm_eps": 1e-05,
|
| 305 |
+
"rope_scaling": null,
|
| 306 |
+
"rope_theta": 10000.0,
|
| 307 |
+
"tie_word_embeddings": false,
|
| 308 |
+
"torch_dtype": "float32",
|
| 309 |
+
"transformers_version": "4.53.2",
|
| 310 |
+
"use_cache": false,
|
| 311 |
+
"vocab_size": 32002
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
[2025-07-12 21:07:05,782][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-96] due to args.save_total_limit
|
| 315 |
+
[2025-07-12 21:09:08,127][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 316 |
+
[2025-07-12 21:09:08,131][transformers.trainer][INFO] -
|
| 317 |
+
***** Running Evaluation *****
|
| 318 |
+
[2025-07-12 21:09:08,131][transformers.trainer][INFO] - Num examples = 132
|
| 319 |
+
[2025-07-12 21:09:08,131][transformers.trainer][INFO] - Batch size = 4
|
| 320 |
+
[2025-07-12 21:09:17,505][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-160
|
| 321 |
+
[2025-07-12 21:09:17,867][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 322 |
+
[2025-07-12 21:09:17,868][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 323 |
+
"architectures": [
|
| 324 |
+
"LlamaForCausalLM"
|
| 325 |
+
],
|
| 326 |
+
"attention_bias": false,
|
| 327 |
+
"attention_dropout": 0.0,
|
| 328 |
+
"bos_token_id": 1,
|
| 329 |
+
"eos_token_id": 2,
|
| 330 |
+
"head_dim": 160,
|
| 331 |
+
"hidden_act": "silu",
|
| 332 |
+
"hidden_size": 2560,
|
| 333 |
+
"initializer_range": 0.02,
|
| 334 |
+
"intermediate_size": 10240,
|
| 335 |
+
"max_position_embeddings": 4096,
|
| 336 |
+
"mlp_bias": false,
|
| 337 |
+
"model_type": "llama",
|
| 338 |
+
"num_attention_heads": 16,
|
| 339 |
+
"num_hidden_layers": 24,
|
| 340 |
+
"num_key_value_heads": 4,
|
| 341 |
+
"pad_token_id": 3,
|
| 342 |
+
"pretraining_tp": 1,
|
| 343 |
+
"rms_norm_eps": 1e-05,
|
| 344 |
+
"rope_scaling": null,
|
| 345 |
+
"rope_theta": 10000.0,
|
| 346 |
+
"tie_word_embeddings": false,
|
| 347 |
+
"torch_dtype": "float32",
|
| 348 |
+
"transformers_version": "4.53.2",
|
| 349 |
+
"use_cache": false,
|
| 350 |
+
"vocab_size": 32002
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
[2025-07-12 21:09:18,082][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-128] due to args.save_total_limit
|
| 354 |
+
[2025-07-12 21:11:20,306][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 355 |
+
[2025-07-12 21:11:20,310][transformers.trainer][INFO] -
|
| 356 |
+
***** Running Evaluation *****
|
| 357 |
+
[2025-07-12 21:11:20,310][transformers.trainer][INFO] - Num examples = 132
|
| 358 |
+
[2025-07-12 21:11:20,310][transformers.trainer][INFO] - Batch size = 4
|
| 359 |
+
[2025-07-12 21:11:29,717][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-192
|
| 360 |
+
[2025-07-12 21:11:30,082][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 361 |
+
[2025-07-12 21:11:30,082][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 362 |
+
"architectures": [
|
| 363 |
+
"LlamaForCausalLM"
|
| 364 |
+
],
|
| 365 |
+
"attention_bias": false,
|
| 366 |
+
"attention_dropout": 0.0,
|
| 367 |
+
"bos_token_id": 1,
|
| 368 |
+
"eos_token_id": 2,
|
| 369 |
+
"head_dim": 160,
|
| 370 |
+
"hidden_act": "silu",
|
| 371 |
+
"hidden_size": 2560,
|
| 372 |
+
"initializer_range": 0.02,
|
| 373 |
+
"intermediate_size": 10240,
|
| 374 |
+
"max_position_embeddings": 4096,
|
| 375 |
+
"mlp_bias": false,
|
| 376 |
+
"model_type": "llama",
|
| 377 |
+
"num_attention_heads": 16,
|
| 378 |
+
"num_hidden_layers": 24,
|
| 379 |
+
"num_key_value_heads": 4,
|
| 380 |
+
"pad_token_id": 3,
|
| 381 |
+
"pretraining_tp": 1,
|
| 382 |
+
"rms_norm_eps": 1e-05,
|
| 383 |
+
"rope_scaling": null,
|
| 384 |
+
"rope_theta": 10000.0,
|
| 385 |
+
"tie_word_embeddings": false,
|
| 386 |
+
"torch_dtype": "float32",
|
| 387 |
+
"transformers_version": "4.53.2",
|
| 388 |
+
"use_cache": false,
|
| 389 |
+
"vocab_size": 32002
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
[2025-07-12 21:11:30,290][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-160] due to args.save_total_limit
|
| 393 |
+
[2025-07-12 21:13:32,503][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 394 |
+
[2025-07-12 21:13:32,506][transformers.trainer][INFO] -
|
| 395 |
+
***** Running Evaluation *****
|
| 396 |
+
[2025-07-12 21:13:32,506][transformers.trainer][INFO] - Num examples = 132
|
| 397 |
+
[2025-07-12 21:13:32,506][transformers.trainer][INFO] - Batch size = 4
|
| 398 |
+
[2025-07-12 21:13:41,882][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-224
|
| 399 |
+
[2025-07-12 21:13:42,259][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 400 |
+
[2025-07-12 21:13:42,259][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 401 |
+
"architectures": [
|
| 402 |
+
"LlamaForCausalLM"
|
| 403 |
+
],
|
| 404 |
+
"attention_bias": false,
|
| 405 |
+
"attention_dropout": 0.0,
|
| 406 |
+
"bos_token_id": 1,
|
| 407 |
+
"eos_token_id": 2,
|
| 408 |
+
"head_dim": 160,
|
| 409 |
+
"hidden_act": "silu",
|
| 410 |
+
"hidden_size": 2560,
|
| 411 |
+
"initializer_range": 0.02,
|
| 412 |
+
"intermediate_size": 10240,
|
| 413 |
+
"max_position_embeddings": 4096,
|
| 414 |
+
"mlp_bias": false,
|
| 415 |
+
"model_type": "llama",
|
| 416 |
+
"num_attention_heads": 16,
|
| 417 |
+
"num_hidden_layers": 24,
|
| 418 |
+
"num_key_value_heads": 4,
|
| 419 |
+
"pad_token_id": 3,
|
| 420 |
+
"pretraining_tp": 1,
|
| 421 |
+
"rms_norm_eps": 1e-05,
|
| 422 |
+
"rope_scaling": null,
|
| 423 |
+
"rope_theta": 10000.0,
|
| 424 |
+
"tie_word_embeddings": false,
|
| 425 |
+
"torch_dtype": "float32",
|
| 426 |
+
"transformers_version": "4.53.2",
|
| 427 |
+
"use_cache": false,
|
| 428 |
+
"vocab_size": 32002
|
| 429 |
+
}
|
| 430 |
+
|
| 431 |
+
[2025-07-12 21:15:44,697][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 432 |
+
[2025-07-12 21:15:44,700][transformers.trainer][INFO] -
|
| 433 |
+
***** Running Evaluation *****
|
| 434 |
+
[2025-07-12 21:15:44,700][transformers.trainer][INFO] - Num examples = 132
|
| 435 |
+
[2025-07-12 21:15:44,700][transformers.trainer][INFO] - Batch size = 4
|
| 436 |
+
[2025-07-12 21:15:54,076][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-256
|
| 437 |
+
[2025-07-12 21:15:54,469][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 438 |
+
[2025-07-12 21:15:54,470][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 439 |
+
"architectures": [
|
| 440 |
+
"LlamaForCausalLM"
|
| 441 |
+
],
|
| 442 |
+
"attention_bias": false,
|
| 443 |
+
"attention_dropout": 0.0,
|
| 444 |
+
"bos_token_id": 1,
|
| 445 |
+
"eos_token_id": 2,
|
| 446 |
+
"head_dim": 160,
|
| 447 |
+
"hidden_act": "silu",
|
| 448 |
+
"hidden_size": 2560,
|
| 449 |
+
"initializer_range": 0.02,
|
| 450 |
+
"intermediate_size": 10240,
|
| 451 |
+
"max_position_embeddings": 4096,
|
| 452 |
+
"mlp_bias": false,
|
| 453 |
+
"model_type": "llama",
|
| 454 |
+
"num_attention_heads": 16,
|
| 455 |
+
"num_hidden_layers": 24,
|
| 456 |
+
"num_key_value_heads": 4,
|
| 457 |
+
"pad_token_id": 3,
|
| 458 |
+
"pretraining_tp": 1,
|
| 459 |
+
"rms_norm_eps": 1e-05,
|
| 460 |
+
"rope_scaling": null,
|
| 461 |
+
"rope_theta": 10000.0,
|
| 462 |
+
"tie_word_embeddings": false,
|
| 463 |
+
"torch_dtype": "float32",
|
| 464 |
+
"transformers_version": "4.53.2",
|
| 465 |
+
"use_cache": false,
|
| 466 |
+
"vocab_size": 32002
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
[2025-07-12 21:15:54,684][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-224] due to args.save_total_limit
|
| 470 |
+
[2025-07-12 21:17:56,860][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 471 |
+
[2025-07-12 21:17:56,863][transformers.trainer][INFO] -
|
| 472 |
+
***** Running Evaluation *****
|
| 473 |
+
[2025-07-12 21:17:56,863][transformers.trainer][INFO] - Num examples = 132
|
| 474 |
+
[2025-07-12 21:17:56,863][transformers.trainer][INFO] - Batch size = 4
|
| 475 |
+
[2025-07-12 21:18:06,254][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-288
|
| 476 |
+
[2025-07-12 21:18:06,608][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 477 |
+
[2025-07-12 21:18:06,608][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 478 |
+
"architectures": [
|
| 479 |
+
"LlamaForCausalLM"
|
| 480 |
+
],
|
| 481 |
+
"attention_bias": false,
|
| 482 |
+
"attention_dropout": 0.0,
|
| 483 |
+
"bos_token_id": 1,
|
| 484 |
+
"eos_token_id": 2,
|
| 485 |
+
"head_dim": 160,
|
| 486 |
+
"hidden_act": "silu",
|
| 487 |
+
"hidden_size": 2560,
|
| 488 |
+
"initializer_range": 0.02,
|
| 489 |
+
"intermediate_size": 10240,
|
| 490 |
+
"max_position_embeddings": 4096,
|
| 491 |
+
"mlp_bias": false,
|
| 492 |
+
"model_type": "llama",
|
| 493 |
+
"num_attention_heads": 16,
|
| 494 |
+
"num_hidden_layers": 24,
|
| 495 |
+
"num_key_value_heads": 4,
|
| 496 |
+
"pad_token_id": 3,
|
| 497 |
+
"pretraining_tp": 1,
|
| 498 |
+
"rms_norm_eps": 1e-05,
|
| 499 |
+
"rope_scaling": null,
|
| 500 |
+
"rope_theta": 10000.0,
|
| 501 |
+
"tie_word_embeddings": false,
|
| 502 |
+
"torch_dtype": "float32",
|
| 503 |
+
"transformers_version": "4.53.2",
|
| 504 |
+
"use_cache": false,
|
| 505 |
+
"vocab_size": 32002
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
[2025-07-12 21:18:06,785][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-256] due to args.save_total_limit
|
| 509 |
+
[2025-07-12 21:20:08,982][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 510 |
+
[2025-07-12 21:20:08,985][transformers.trainer][INFO] -
|
| 511 |
+
***** Running Evaluation *****
|
| 512 |
+
[2025-07-12 21:20:08,986][transformers.trainer][INFO] - Num examples = 132
|
| 513 |
+
[2025-07-12 21:20:08,986][transformers.trainer][INFO] - Batch size = 4
|
| 514 |
+
[2025-07-12 21:20:18,363][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-320
|
| 515 |
+
[2025-07-12 21:20:18,875][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 516 |
+
[2025-07-12 21:20:18,876][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 517 |
+
"architectures": [
|
| 518 |
+
"LlamaForCausalLM"
|
| 519 |
+
],
|
| 520 |
+
"attention_bias": false,
|
| 521 |
+
"attention_dropout": 0.0,
|
| 522 |
+
"bos_token_id": 1,
|
| 523 |
+
"eos_token_id": 2,
|
| 524 |
+
"head_dim": 160,
|
| 525 |
+
"hidden_act": "silu",
|
| 526 |
+
"hidden_size": 2560,
|
| 527 |
+
"initializer_range": 0.02,
|
| 528 |
+
"intermediate_size": 10240,
|
| 529 |
+
"max_position_embeddings": 4096,
|
| 530 |
+
"mlp_bias": false,
|
| 531 |
+
"model_type": "llama",
|
| 532 |
+
"num_attention_heads": 16,
|
| 533 |
+
"num_hidden_layers": 24,
|
| 534 |
+
"num_key_value_heads": 4,
|
| 535 |
+
"pad_token_id": 3,
|
| 536 |
+
"pretraining_tp": 1,
|
| 537 |
+
"rms_norm_eps": 1e-05,
|
| 538 |
+
"rope_scaling": null,
|
| 539 |
+
"rope_theta": 10000.0,
|
| 540 |
+
"tie_word_embeddings": false,
|
| 541 |
+
"torch_dtype": "float32",
|
| 542 |
+
"transformers_version": "4.53.2",
|
| 543 |
+
"use_cache": false,
|
| 544 |
+
"vocab_size": 32002
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
[2025-07-12 21:20:19,088][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-192] due to args.save_total_limit
|
| 548 |
+
[2025-07-12 21:20:19,095][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-288] due to args.save_total_limit
|
| 549 |
+
[2025-07-12 21:22:21,320][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 550 |
+
[2025-07-12 21:22:21,324][transformers.trainer][INFO] -
|
| 551 |
+
***** Running Evaluation *****
|
| 552 |
+
[2025-07-12 21:22:21,324][transformers.trainer][INFO] - Num examples = 132
|
| 553 |
+
[2025-07-12 21:22:21,324][transformers.trainer][INFO] - Batch size = 4
|
| 554 |
+
[2025-07-12 21:22:30,713][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-352
|
| 555 |
+
[2025-07-12 21:22:31,067][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 556 |
+
[2025-07-12 21:22:31,068][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 557 |
+
"architectures": [
|
| 558 |
+
"LlamaForCausalLM"
|
| 559 |
+
],
|
| 560 |
+
"attention_bias": false,
|
| 561 |
+
"attention_dropout": 0.0,
|
| 562 |
+
"bos_token_id": 1,
|
| 563 |
+
"eos_token_id": 2,
|
| 564 |
+
"head_dim": 160,
|
| 565 |
+
"hidden_act": "silu",
|
| 566 |
+
"hidden_size": 2560,
|
| 567 |
+
"initializer_range": 0.02,
|
| 568 |
+
"intermediate_size": 10240,
|
| 569 |
+
"max_position_embeddings": 4096,
|
| 570 |
+
"mlp_bias": false,
|
| 571 |
+
"model_type": "llama",
|
| 572 |
+
"num_attention_heads": 16,
|
| 573 |
+
"num_hidden_layers": 24,
|
| 574 |
+
"num_key_value_heads": 4,
|
| 575 |
+
"pad_token_id": 3,
|
| 576 |
+
"pretraining_tp": 1,
|
| 577 |
+
"rms_norm_eps": 1e-05,
|
| 578 |
+
"rope_scaling": null,
|
| 579 |
+
"rope_theta": 10000.0,
|
| 580 |
+
"tie_word_embeddings": false,
|
| 581 |
+
"torch_dtype": "float32",
|
| 582 |
+
"transformers_version": "4.53.2",
|
| 583 |
+
"use_cache": false,
|
| 584 |
+
"vocab_size": 32002
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
[2025-07-12 21:22:31,296][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-320] due to args.save_total_limit
|
| 588 |
+
[2025-07-12 21:24:33,508][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 589 |
+
[2025-07-12 21:24:33,511][transformers.trainer][INFO] -
|
| 590 |
+
***** Running Evaluation *****
|
| 591 |
+
[2025-07-12 21:24:33,511][transformers.trainer][INFO] - Num examples = 132
|
| 592 |
+
[2025-07-12 21:24:33,511][transformers.trainer][INFO] - Batch size = 4
|
| 593 |
+
[2025-07-12 21:24:42,878][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-384
|
| 594 |
+
[2025-07-12 21:24:43,250][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 595 |
+
[2025-07-12 21:24:43,250][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 596 |
+
"architectures": [
|
| 597 |
+
"LlamaForCausalLM"
|
| 598 |
+
],
|
| 599 |
+
"attention_bias": false,
|
| 600 |
+
"attention_dropout": 0.0,
|
| 601 |
+
"bos_token_id": 1,
|
| 602 |
+
"eos_token_id": 2,
|
| 603 |
+
"head_dim": 160,
|
| 604 |
+
"hidden_act": "silu",
|
| 605 |
+
"hidden_size": 2560,
|
| 606 |
+
"initializer_range": 0.02,
|
| 607 |
+
"intermediate_size": 10240,
|
| 608 |
+
"max_position_embeddings": 4096,
|
| 609 |
+
"mlp_bias": false,
|
| 610 |
+
"model_type": "llama",
|
| 611 |
+
"num_attention_heads": 16,
|
| 612 |
+
"num_hidden_layers": 24,
|
| 613 |
+
"num_key_value_heads": 4,
|
| 614 |
+
"pad_token_id": 3,
|
| 615 |
+
"pretraining_tp": 1,
|
| 616 |
+
"rms_norm_eps": 1e-05,
|
| 617 |
+
"rope_scaling": null,
|
| 618 |
+
"rope_theta": 10000.0,
|
| 619 |
+
"tie_word_embeddings": false,
|
| 620 |
+
"torch_dtype": "float32",
|
| 621 |
+
"transformers_version": "4.53.2",
|
| 622 |
+
"use_cache": false,
|
| 623 |
+
"vocab_size": 32002
|
| 624 |
+
}
|
| 625 |
+
|
| 626 |
+
[2025-07-12 21:26:45,668][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 627 |
+
[2025-07-12 21:26:45,671][transformers.trainer][INFO] -
|
| 628 |
+
***** Running Evaluation *****
|
| 629 |
+
[2025-07-12 21:26:45,671][transformers.trainer][INFO] - Num examples = 132
|
| 630 |
+
[2025-07-12 21:26:45,671][transformers.trainer][INFO] - Batch size = 4
|
| 631 |
+
[2025-07-12 21:26:55,038][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-416
|
| 632 |
+
[2025-07-12 21:26:55,404][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 633 |
+
[2025-07-12 21:26:55,405][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 634 |
+
"architectures": [
|
| 635 |
+
"LlamaForCausalLM"
|
| 636 |
+
],
|
| 637 |
+
"attention_bias": false,
|
| 638 |
+
"attention_dropout": 0.0,
|
| 639 |
+
"bos_token_id": 1,
|
| 640 |
+
"eos_token_id": 2,
|
| 641 |
+
"head_dim": 160,
|
| 642 |
+
"hidden_act": "silu",
|
| 643 |
+
"hidden_size": 2560,
|
| 644 |
+
"initializer_range": 0.02,
|
| 645 |
+
"intermediate_size": 10240,
|
| 646 |
+
"max_position_embeddings": 4096,
|
| 647 |
+
"mlp_bias": false,
|
| 648 |
+
"model_type": "llama",
|
| 649 |
+
"num_attention_heads": 16,
|
| 650 |
+
"num_hidden_layers": 24,
|
| 651 |
+
"num_key_value_heads": 4,
|
| 652 |
+
"pad_token_id": 3,
|
| 653 |
+
"pretraining_tp": 1,
|
| 654 |
+
"rms_norm_eps": 1e-05,
|
| 655 |
+
"rope_scaling": null,
|
| 656 |
+
"rope_theta": 10000.0,
|
| 657 |
+
"tie_word_embeddings": false,
|
| 658 |
+
"torch_dtype": "float32",
|
| 659 |
+
"transformers_version": "4.53.2",
|
| 660 |
+
"use_cache": false,
|
| 661 |
+
"vocab_size": 32002
|
| 662 |
+
}
|
| 663 |
+
|
| 664 |
+
[2025-07-12 21:26:55,586][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-384] due to args.save_total_limit
|
| 665 |
+
[2025-07-12 21:28:57,774][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 666 |
+
[2025-07-12 21:28:57,777][transformers.trainer][INFO] -
|
| 667 |
+
***** Running Evaluation *****
|
| 668 |
+
[2025-07-12 21:28:57,777][transformers.trainer][INFO] - Num examples = 132
|
| 669 |
+
[2025-07-12 21:28:57,777][transformers.trainer][INFO] - Batch size = 4
|
| 670 |
+
[2025-07-12 21:29:07,148][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-448
|
| 671 |
+
[2025-07-12 21:29:07,543][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 672 |
+
[2025-07-12 21:29:07,544][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 673 |
+
"architectures": [
|
| 674 |
+
"LlamaForCausalLM"
|
| 675 |
+
],
|
| 676 |
+
"attention_bias": false,
|
| 677 |
+
"attention_dropout": 0.0,
|
| 678 |
+
"bos_token_id": 1,
|
| 679 |
+
"eos_token_id": 2,
|
| 680 |
+
"head_dim": 160,
|
| 681 |
+
"hidden_act": "silu",
|
| 682 |
+
"hidden_size": 2560,
|
| 683 |
+
"initializer_range": 0.02,
|
| 684 |
+
"intermediate_size": 10240,
|
| 685 |
+
"max_position_embeddings": 4096,
|
| 686 |
+
"mlp_bias": false,
|
| 687 |
+
"model_type": "llama",
|
| 688 |
+
"num_attention_heads": 16,
|
| 689 |
+
"num_hidden_layers": 24,
|
| 690 |
+
"num_key_value_heads": 4,
|
| 691 |
+
"pad_token_id": 3,
|
| 692 |
+
"pretraining_tp": 1,
|
| 693 |
+
"rms_norm_eps": 1e-05,
|
| 694 |
+
"rope_scaling": null,
|
| 695 |
+
"rope_theta": 10000.0,
|
| 696 |
+
"tie_word_embeddings": false,
|
| 697 |
+
"torch_dtype": "float32",
|
| 698 |
+
"transformers_version": "4.53.2",
|
| 699 |
+
"use_cache": false,
|
| 700 |
+
"vocab_size": 32002
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
+
[2025-07-12 21:29:07,700][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-416] due to args.save_total_limit
|
| 704 |
+
[2025-07-12 21:31:09,872][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 705 |
+
[2025-07-12 21:31:09,875][transformers.trainer][INFO] -
|
| 706 |
+
***** Running Evaluation *****
|
| 707 |
+
[2025-07-12 21:31:09,875][transformers.trainer][INFO] - Num examples = 132
|
| 708 |
+
[2025-07-12 21:31:09,876][transformers.trainer][INFO] - Batch size = 4
|
| 709 |
+
[2025-07-12 21:31:19,247][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-480
|
| 710 |
+
[2025-07-12 21:31:19,601][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 711 |
+
[2025-07-12 21:31:19,602][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 712 |
+
"architectures": [
|
| 713 |
+
"LlamaForCausalLM"
|
| 714 |
+
],
|
| 715 |
+
"attention_bias": false,
|
| 716 |
+
"attention_dropout": 0.0,
|
| 717 |
+
"bos_token_id": 1,
|
| 718 |
+
"eos_token_id": 2,
|
| 719 |
+
"head_dim": 160,
|
| 720 |
+
"hidden_act": "silu",
|
| 721 |
+
"hidden_size": 2560,
|
| 722 |
+
"initializer_range": 0.02,
|
| 723 |
+
"intermediate_size": 10240,
|
| 724 |
+
"max_position_embeddings": 4096,
|
| 725 |
+
"mlp_bias": false,
|
| 726 |
+
"model_type": "llama",
|
| 727 |
+
"num_attention_heads": 16,
|
| 728 |
+
"num_hidden_layers": 24,
|
| 729 |
+
"num_key_value_heads": 4,
|
| 730 |
+
"pad_token_id": 3,
|
| 731 |
+
"pretraining_tp": 1,
|
| 732 |
+
"rms_norm_eps": 1e-05,
|
| 733 |
+
"rope_scaling": null,
|
| 734 |
+
"rope_theta": 10000.0,
|
| 735 |
+
"tie_word_embeddings": false,
|
| 736 |
+
"torch_dtype": "float32",
|
| 737 |
+
"transformers_version": "4.53.2",
|
| 738 |
+
"use_cache": false,
|
| 739 |
+
"vocab_size": 32002
|
| 740 |
+
}
|
| 741 |
+
|
| 742 |
+
[2025-07-12 21:31:19,791][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-448] due to args.save_total_limit
|
| 743 |
+
[2025-07-12 21:33:22,017][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 744 |
+
[2025-07-12 21:33:22,020][transformers.trainer][INFO] -
|
| 745 |
+
***** Running Evaluation *****
|
| 746 |
+
[2025-07-12 21:33:22,020][transformers.trainer][INFO] - Num examples = 132
|
| 747 |
+
[2025-07-12 21:33:22,020][transformers.trainer][INFO] - Batch size = 4
|
| 748 |
+
[2025-07-12 21:33:31,389][transformers.trainer][INFO] - Saving model checkpoint to /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-512
|
| 749 |
+
[2025-07-12 21:33:31,748][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 750 |
+
[2025-07-12 21:33:31,748][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 751 |
+
"architectures": [
|
| 752 |
+
"LlamaForCausalLM"
|
| 753 |
+
],
|
| 754 |
+
"attention_bias": false,
|
| 755 |
+
"attention_dropout": 0.0,
|
| 756 |
+
"bos_token_id": 1,
|
| 757 |
+
"eos_token_id": 2,
|
| 758 |
+
"head_dim": 160,
|
| 759 |
+
"hidden_act": "silu",
|
| 760 |
+
"hidden_size": 2560,
|
| 761 |
+
"initializer_range": 0.02,
|
| 762 |
+
"intermediate_size": 10240,
|
| 763 |
+
"max_position_embeddings": 4096,
|
| 764 |
+
"mlp_bias": false,
|
| 765 |
+
"model_type": "llama",
|
| 766 |
+
"num_attention_heads": 16,
|
| 767 |
+
"num_hidden_layers": 24,
|
| 768 |
+
"num_key_value_heads": 4,
|
| 769 |
+
"pad_token_id": 3,
|
| 770 |
+
"pretraining_tp": 1,
|
| 771 |
+
"rms_norm_eps": 1e-05,
|
| 772 |
+
"rope_scaling": null,
|
| 773 |
+
"rope_theta": 10000.0,
|
| 774 |
+
"tie_word_embeddings": false,
|
| 775 |
+
"torch_dtype": "float32",
|
| 776 |
+
"transformers_version": "4.53.2",
|
| 777 |
+
"use_cache": false,
|
| 778 |
+
"vocab_size": 32002
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
[2025-07-12 21:33:31,981][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-480] due to args.save_total_limit
|
| 782 |
+
[2025-07-12 21:33:31,990][transformers.trainer][INFO] -
|
| 783 |
+
|
| 784 |
+
Training completed. Do not forget to share your model on huggingface.co/models =)
|
| 785 |
+
|
| 786 |
+
|
| 787 |
+
[2025-07-12 21:33:31,990][transformers.trainer][INFO] - Loading best model from /workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-352 (score: 0.49125475285171105).
|
| 788 |
+
[2025-07-12 21:33:32,079][transformers.trainer][INFO] - Deleting older checkpoint [/workspace/jbcs2025/outputs/2025-07-12/20-57-50/results/checkpoint-512] due to args.save_total_limit
|
| 789 |
+
[2025-07-12 21:33:32,089][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 790 |
+
[2025-07-12 21:33:32,093][transformers.trainer][INFO] -
|
| 791 |
+
***** Running Evaluation *****
|
| 792 |
+
[2025-07-12 21:33:32,093][transformers.trainer][INFO] - Num examples = 132
|
| 793 |
+
[2025-07-12 21:33:32,093][transformers.trainer][INFO] - Batch size = 4
|
| 794 |
+
[2025-07-12 21:33:41,455][__main__][INFO] - Training completed successfully.
|
| 795 |
+
[2025-07-12 21:33:41,455][__main__][INFO] - Running on Test
|
| 796 |
+
[2025-07-12 21:33:41,455][transformers.trainer][INFO] - The following columns in the Evaluation set don't have a corresponding argument in `PeftModelForSequenceClassification.forward` and have been ignored: grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference. If grades, prompt, id, essay_text, essay_year, id_prompt, supporting_text, reference are not expected by `PeftModelForSequenceClassification.forward`, you can safely ignore this message.
|
| 797 |
+
[2025-07-12 21:33:41,458][transformers.trainer][INFO] -
|
| 798 |
+
***** Running Evaluation *****
|
| 799 |
+
[2025-07-12 21:33:41,458][transformers.trainer][INFO] - Num examples = 138
|
| 800 |
+
[2025-07-12 21:33:41,458][transformers.trainer][INFO] - Batch size = 4
|
| 801 |
+
[2025-07-12 21:33:51,406][__main__][INFO] - Test metrics: {'eval_loss': 1.835325837135315, 'eval_model_preparation_time': 0.0094, 'eval_accuracy': 0.34057971014492755, 'eval_RMSE': 68.10052246069989, 'eval_QWK': 0.2794486215538846, 'eval_HDIV': 0.13043478260869568, 'eval_Macro_F1': 0.21200289447400067, 'eval_Micro_F1': 0.34057971014492755, 'eval_Weighted_F1': 0.32981396845147415, 'eval_TP_0': 0, 'eval_TN_0': 137, 'eval_FP_0': 0, 'eval_FN_0': 1, 'eval_TP_1': 15, 'eval_TN_1': 74, 'eval_FP_1': 29, 'eval_FN_1': 20, 'eval_TP_2': 0, 'eval_TN_2': 127, 'eval_FP_2': 6, 'eval_FN_2': 5, 'eval_TP_3': 22, 'eval_TN_3': 61, 'eval_FP_3': 26, 'eval_FN_3': 29, 'eval_TP_4': 2, 'eval_TN_4': 100, 'eval_FP_4': 12, 'eval_FN_4': 24, 'eval_TP_5': 8, 'eval_TN_5': 100, 'eval_FP_5': 18, 'eval_FN_5': 12, 'eval_runtime': 9.9378, 'eval_samples_per_second': 13.886, 'eval_steps_per_second': 3.522, 'epoch': 16.0}
|
| 802 |
+
[2025-07-12 21:33:51,407][transformers.trainer][INFO] - Saving model checkpoint to ./results/best_model
|
| 803 |
+
[2025-07-12 21:33:51,760][transformers.configuration_utils][INFO] - loading configuration file config.json from cache at /workspace/.hf_home/hub/models--TucanoBR--Tucano-2b4-Instruct/snapshots/d763c3ed97909de3b664742dd955bf35d1cca620/config.json
|
| 804 |
+
[2025-07-12 21:33:51,760][transformers.configuration_utils][INFO] - Model config LlamaConfig {
|
| 805 |
+
"architectures": [
|
| 806 |
+
"LlamaForCausalLM"
|
| 807 |
+
],
|
| 808 |
+
"attention_bias": false,
|
| 809 |
+
"attention_dropout": 0.0,
|
| 810 |
+
"bos_token_id": 1,
|
| 811 |
+
"eos_token_id": 2,
|
| 812 |
+
"head_dim": 160,
|
| 813 |
+
"hidden_act": "silu",
|
| 814 |
+
"hidden_size": 2560,
|
| 815 |
+
"initializer_range": 0.02,
|
| 816 |
+
"intermediate_size": 10240,
|
| 817 |
+
"max_position_embeddings": 4096,
|
| 818 |
+
"mlp_bias": false,
|
| 819 |
+
"model_type": "llama",
|
| 820 |
+
"num_attention_heads": 16,
|
| 821 |
+
"num_hidden_layers": 24,
|
| 822 |
+
"num_key_value_heads": 4,
|
| 823 |
+
"pad_token_id": 3,
|
| 824 |
+
"pretraining_tp": 1,
|
| 825 |
+
"rms_norm_eps": 1e-05,
|
| 826 |
+
"rope_scaling": null,
|
| 827 |
+
"rope_theta": 10000.0,
|
| 828 |
+
"tie_word_embeddings": false,
|
| 829 |
+
"torch_dtype": "float32",
|
| 830 |
+
"transformers_version": "4.53.2",
|
| 831 |
+
"use_cache": false,
|
| 832 |
+
"vocab_size": 32002
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
+
[2025-07-12 21:33:51,879][transformers.tokenization_utils_base][INFO] - chat template saved in ./results/best_model/chat_template.jinja
|
| 836 |
+
[2025-07-12 21:33:51,880][transformers.tokenization_utils_base][INFO] - tokenizer config file saved in ./results/best_model/tokenizer_config.json
|
| 837 |
+
[2025-07-12 21:33:51,880][transformers.tokenization_utils_base][INFO] - Special tokens file saved in ./results/best_model/special_tokens_map.json
|
| 838 |
+
[2025-07-12 21:33:51,894][__main__][INFO] - Model and tokenizer saved to ./results/best_model
|
| 839 |
+
[2025-07-12 21:33:51,897][__main__][INFO] - Fine Tuning Finished.
|
| 840 |
+
[2025-07-12 21:33:52,406][__main__][INFO] - Total emissions: 0.2185 kg CO2eq
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<instruction>",
|
| 4 |
+
"</instruction>"
|
| 5 |
+
],
|
| 6 |
+
"bos_token": {
|
| 7 |
+
"content": "<s>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false
|
| 12 |
+
},
|
| 13 |
+
"eos_token": {
|
| 14 |
+
"content": "</s>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": false,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
},
|
| 20 |
+
"pad_token": "<|finetune_right_pad_id|>",
|
| 21 |
+
"unk_token": {
|
| 22 |
+
"content": "<unk>",
|
| 23 |
+
"lstrip": false,
|
| 24 |
+
"normalized": false,
|
| 25 |
+
"rstrip": false,
|
| 26 |
+
"single_word": false
|
| 27 |
+
}
|
| 28 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
+
"add_eos_token": false,
|
| 4 |
+
"add_prefix_space": null,
|
| 5 |
+
"added_tokens_decoder": {
|
| 6 |
+
"0": {
|
| 7 |
+
"content": "<unk>",
|
| 8 |
+
"lstrip": false,
|
| 9 |
+
"normalized": false,
|
| 10 |
+
"rstrip": false,
|
| 11 |
+
"single_word": false,
|
| 12 |
+
"special": true
|
| 13 |
+
},
|
| 14 |
+
"1": {
|
| 15 |
+
"content": "<s>",
|
| 16 |
+
"lstrip": false,
|
| 17 |
+
"normalized": false,
|
| 18 |
+
"rstrip": false,
|
| 19 |
+
"single_word": false,
|
| 20 |
+
"special": true
|
| 21 |
+
},
|
| 22 |
+
"2": {
|
| 23 |
+
"content": "</s>",
|
| 24 |
+
"lstrip": false,
|
| 25 |
+
"normalized": false,
|
| 26 |
+
"rstrip": false,
|
| 27 |
+
"single_word": false,
|
| 28 |
+
"special": true
|
| 29 |
+
},
|
| 30 |
+
"3": {
|
| 31 |
+
"content": "<pad>",
|
| 32 |
+
"lstrip": false,
|
| 33 |
+
"normalized": false,
|
| 34 |
+
"rstrip": false,
|
| 35 |
+
"single_word": false,
|
| 36 |
+
"special": true
|
| 37 |
+
},
|
| 38 |
+
"32000": {
|
| 39 |
+
"content": "<instruction>",
|
| 40 |
+
"lstrip": false,
|
| 41 |
+
"normalized": true,
|
| 42 |
+
"rstrip": false,
|
| 43 |
+
"single_word": false,
|
| 44 |
+
"special": true
|
| 45 |
+
},
|
| 46 |
+
"32001": {
|
| 47 |
+
"content": "</instruction>",
|
| 48 |
+
"lstrip": false,
|
| 49 |
+
"normalized": true,
|
| 50 |
+
"rstrip": false,
|
| 51 |
+
"single_word": false,
|
| 52 |
+
"special": true
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
"additional_special_tokens": [
|
| 56 |
+
"<instruction>",
|
| 57 |
+
"</instruction>"
|
| 58 |
+
],
|
| 59 |
+
"bos_token": "<s>",
|
| 60 |
+
"bos_token_id": 1,
|
| 61 |
+
"clean_up_tokenization_spaces": false,
|
| 62 |
+
"eos_token": "</s>",
|
| 63 |
+
"eos_token_id": 2,
|
| 64 |
+
"extra_special_tokens": {},
|
| 65 |
+
"legacy": false,
|
| 66 |
+
"model_max_length": 4096,
|
| 67 |
+
"pad_token": "<|finetune_right_pad_id|>",
|
| 68 |
+
"pad_token_id": 0,
|
| 69 |
+
"padding_side": "right",
|
| 70 |
+
"sp_model_kwargs": {},
|
| 71 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
| 72 |
+
"unk_token": "<unk>",
|
| 73 |
+
"unk_token_id": 0,
|
| 74 |
+
"use_default_system_prompt": false
|
| 75 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f03cf58751278993265b02ab20b716edf7560b4c55c2b4382d317dd90ba1f7f
|
| 3 |
+
size 5777
|