beamaia commited on
Commit
73e0b72
1 Parent(s): 62a65b4

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: HuggingFaceH4/zephyr-7b-beta
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
+ base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
4
  ---
5
 
6
  # Model Card for Model ID
checkpoint-100/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "HuggingFaceH4/zephyr-7b-beta",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "v_proj",
25
  "k_proj",
26
- "o_proj"
 
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "k_proj",
24
+ "o_proj",
25
+ "v_proj",
26
+ "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96e8d01683a4f1dd43f963a096cd007c509e505bcd4c33d14a6bee6a397b17e4
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a02f1947ab36f1fff09eb9a70d5c59e7422cfa2414db71c34b73281ebf1f79c
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b66811faa2c5d0dac760eb6a8ae05ac3df86f401503ea5dcb5f2c1bdfb59ab9d
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59d1470a8bb6b190be0ef0269a1eca8dac817719f679bee82a9cff3a2078f67
3
  size 109267450
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:845227c64f4739d4bbba122720ec7ac903273c61f209a8b9b524f4ffeef9433f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5c5d1c5dfacd2e118f646f270d4987c263d81d3756dd72c85b10cf85d21cad3
3
  size 1064
checkpoint-100/special_tokens_map.json CHANGED
@@ -1,9 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
  "bos_token": {
8
  "content": "<s>",
9
  "lstrip": false,
 
1
  {
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
checkpoint-100/tokenizer_config.json CHANGED
@@ -27,13 +27,9 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<unk>",
32
- "<s>",
33
- "</s>"
34
- ],
35
  "bos_token": "<s>",
36
- "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
37
  "clean_up_tokenization_spaces": false,
38
  "eos_token": "</s>",
39
  "legacy": true,
@@ -44,7 +40,6 @@
44
  "sp_model_kwargs": {},
45
  "spaces_between_special_tokens": false,
46
  "tokenizer_class": "LlamaTokenizer",
47
- "truncation_side": "left",
48
  "unk_token": "<unk>",
49
- "use_default_system_prompt": true
50
  }
 
27
  "special": true
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
 
 
31
  "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,
 
40
  "sp_model_kwargs": {},
41
  "spaces_between_special_tokens": false,
42
  "tokenizer_class": "LlamaTokenizer",
 
43
  "unk_token": "<unk>",
44
+ "use_default_system_prompt": false
45
  }
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.022206587716937065,
3
- "best_model_checkpoint": "./Zephyr/28-03-24-Weni-WeniGPT-QA-Zephyr-7B-5.0.0-KTO_WeniGPT Experiment using KTO trainer with no collator, Mixstral model and random system prompt.-2_max_steps-786_batch_32_2024-03-28_ppid_9/checkpoint-100",
4
  "epoch": 0.7604562737642585,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -10,102 +10,102 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.15,
13
- "grad_norm": 3.2390341758728027,
14
- "kl": 1.4365953207015991,
15
- "learning_rate": 0.0001666666666666667,
16
- "logps/chosen": NaN,
17
- "logps/rejected": NaN,
18
- "loss": 1.2323,
19
- "rewards/chosen": NaN,
20
- "rewards/margins": NaN,
21
- "rewards/rejected": NaN,
22
- "step": 20
23
  },
24
  {
25
  "epoch": 0.3,
26
- "grad_norm": 0.6226775050163269,
27
- "kl": 0.1346512734889984,
28
- "learning_rate": 0.00019580052493438322,
29
- "logps/chosen": NaN,
30
- "logps/rejected": NaN,
31
- "loss": 0.1437,
32
- "rewards/chosen": NaN,
33
- "rewards/margins": NaN,
34
- "rewards/rejected": NaN,
35
- "step": 40
36
  },
37
  {
38
  "epoch": 0.38,
39
- "eval_kl": 0.0960722267627716,
40
- "eval_logps/chosen": -158.278564453125,
41
- "eval_logps/rejected": -400.1436767578125,
42
- "eval_loss": 0.028233768418431282,
43
- "eval_rewards/chosen": 5.2842488288879395,
44
- "eval_rewards/margins": 25.394336700439453,
45
- "eval_rewards/rejected": -20.11008644104004,
46
- "eval_runtime": 214.1148,
47
- "eval_samples_per_second": 2.232,
48
- "eval_steps_per_second": 0.56,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.46,
53
- "grad_norm": 1.9644193649291992,
54
- "kl": 0.026479745283722878,
55
- "learning_rate": 0.0001905511811023622,
56
- "logps/chosen": NaN,
57
- "logps/rejected": NaN,
58
- "loss": 0.1405,
59
- "rewards/chosen": NaN,
60
- "rewards/margins": NaN,
61
- "rewards/rejected": NaN,
62
- "step": 60
63
  },
64
  {
65
  "epoch": 0.61,
66
- "grad_norm": 0.28626948595046997,
67
- "kl": 0.018679404631257057,
68
- "learning_rate": 0.00018530183727034121,
69
- "logps/chosen": NaN,
70
- "logps/rejected": NaN,
71
- "loss": 0.1041,
72
- "rewards/chosen": NaN,
73
- "rewards/margins": NaN,
74
- "rewards/rejected": NaN,
75
- "step": 80
76
  },
77
  {
78
  "epoch": 0.76,
79
- "grad_norm": 0.0979766920208931,
80
- "kl": 0.0,
81
- "learning_rate": 0.00018005249343832022,
82
- "logps/chosen": NaN,
83
- "logps/rejected": NaN,
84
- "loss": 0.0615,
85
- "rewards/chosen": NaN,
86
- "rewards/margins": NaN,
87
- "rewards/rejected": NaN,
88
- "step": 100
89
  },
90
  {
91
  "epoch": 0.76,
92
- "eval_kl": 0.5886130928993225,
93
- "eval_logps/chosen": -153.61863708496094,
94
- "eval_logps/rejected": -383.54296875,
95
- "eval_loss": 0.022206587716937065,
96
- "eval_rewards/chosen": 5.750241756439209,
97
- "eval_rewards/margins": 24.20025062561035,
98
- "eval_rewards/rejected": -18.450010299682617,
99
- "eval_runtime": 213.9917,
100
- "eval_samples_per_second": 2.234,
101
- "eval_steps_per_second": 0.561,
102
  "step": 100
103
  }
104
  ],
105
  "logging_steps": 20,
106
- "max_steps": 786,
107
  "num_input_tokens_seen": 0,
108
- "num_train_epochs": 6,
109
  "save_steps": 100,
110
  "total_flos": 0.0,
111
  "train_batch_size": 4,
 
1
  {
2
+ "best_metric": 0.041092585772275925,
3
+ "best_model_checkpoint": "./mixstral/01-04-24-Weni-WeniGPT-QA-Mixstral-7B-5.0.0-KTO_WeniGPT Experiment using KTO trainer with no collator, Mixstral model and random system prompt.-2_max_steps-131_batch_32_2024-04-01_ppid_727/checkpoint-100",
4
  "epoch": 0.7604562737642585,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.15,
13
+ "grad_norm": 1.3443933725357056,
14
+ "learning_rate": 0.00017480314960629923,
15
+ "loss": 0.3373,
16
+ "step": 20,
17
+ "train/kl": 0.27332180738449097,
18
+ "train/logps/chosen": -139.77467346191406,
19
+ "train/logps/rejected": -186.41754150390625,
20
+ "train/rewards/chosen": 0.8578631281852722,
21
+ "train/rewards/margins": 2.404827654361725,
22
+ "train/rewards/rejected": -1.5469645261764526
23
  },
24
  {
25
  "epoch": 0.3,
26
+ "grad_norm": 0.4922572672367096,
27
+ "learning_rate": 0.00014330708661417323,
28
+ "loss": 0.0944,
29
+ "step": 40,
30
+ "train/kl": 0.05194361135363579,
31
+ "train/logps/chosen": -141.4928741455078,
32
+ "train/logps/rejected": -236.62286376953125,
33
+ "train/rewards/chosen": 2.591707468032837,
34
+ "train/rewards/margins": 10.605849504470825,
35
+ "train/rewards/rejected": -8.014142036437988
36
  },
37
  {
38
  "epoch": 0.38,
39
+ "eval/kl": 0.05559438839554787,
40
+ "eval/logps/chosen": -123.70791625976562,
41
+ "eval/logps/rejected": -280.04022216796875,
42
+ "eval/rewards/chosen": 3.6042592525482178,
43
+ "eval/rewards/margins": 15.286608457565308,
44
+ "eval/rewards/rejected": -11.68234920501709,
45
+ "eval_loss": 0.05842626839876175,
46
+ "eval_runtime": 959.2742,
47
+ "eval_samples_per_second": 0.496,
48
+ "eval_steps_per_second": 0.124,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.46,
53
+ "grad_norm": 1.1183429956436157,
54
+ "learning_rate": 0.00011181102362204725,
55
+ "loss": 0.0543,
56
+ "step": 60,
57
+ "train/kl": 0.0,
58
+ "train/logps/chosen": -123.6012191772461,
59
+ "train/logps/rejected": -282.85455322265625,
60
+ "train/rewards/chosen": 3.592074155807495,
61
+ "train/rewards/margins": 16.24874472618103,
62
+ "train/rewards/rejected": -12.656670570373535
63
  },
64
  {
65
  "epoch": 0.61,
66
+ "grad_norm": 0.41082897782325745,
67
+ "learning_rate": 8.031496062992126e-05,
68
+ "loss": 0.049,
69
+ "step": 80,
70
+ "train/kl": 0.0,
71
+ "train/logps/chosen": -113.79830932617188,
72
+ "train/logps/rejected": -275.3311767578125,
73
+ "train/rewards/chosen": 3.7077345848083496,
74
+ "train/rewards/margins": 15.970922946929932,
75
+ "train/rewards/rejected": -12.263188362121582
76
  },
77
  {
78
  "epoch": 0.76,
79
+ "grad_norm": 0.810788094997406,
80
+ "learning_rate": 4.881889763779528e-05,
81
+ "loss": 0.0446,
82
+ "step": 100,
83
+ "train/kl": 0.02022113837301731,
84
+ "train/logps/chosen": -120.12004852294922,
85
+ "train/logps/rejected": -306.6509704589844,
86
+ "train/rewards/chosen": 3.6804699897766113,
87
+ "train/rewards/margins": 18.751341342926025,
88
+ "train/rewards/rejected": -15.070871353149414
89
  },
90
  {
91
  "epoch": 0.76,
92
+ "eval/kl": 0.0,
93
+ "eval/logps/chosen": -122.67949676513672,
94
+ "eval/logps/rejected": -363.1288146972656,
95
+ "eval/rewards/chosen": 3.7070999145507812,
96
+ "eval/rewards/margins": 23.698307037353516,
97
+ "eval/rewards/rejected": -19.991207122802734,
98
+ "eval_loss": 0.041092585772275925,
99
+ "eval_runtime": 961.5768,
100
+ "eval_samples_per_second": 0.495,
101
+ "eval_steps_per_second": 0.124,
102
  "step": 100
103
  }
104
  ],
105
  "logging_steps": 20,
106
+ "max_steps": 131,
107
  "num_input_tokens_seen": 0,
108
+ "num_train_epochs": 1,
109
  "save_steps": 100,
110
  "total_flos": 0.0,
111
  "train_batch_size": 4,
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2143b63cfb768928620763e12d44a1780f6206ec9bca22cf2aab4ebe0626cbeb
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3a1382759b0a828826f90087526cfc46185b8f0940950b7995edb66ba51fff
3
  size 5752