Femboyuwu2000 commited on
Commit
e4f8bbb
1 Parent(s): eb8bce9

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: peft
3
- base_model: bigscience/bloomz-1b1
4
  ---
5
 
6
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
+ base_model: JackFram/llama-160m
4
  ---
5
 
6
  # Model Card for Model ID
last-checkpoint/adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "bigscience/bloomz-1b1",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -20,7 +20,15 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "query_key_value"
 
 
 
 
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM",
26
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "JackFram/llama-160m",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "up_proj",
24
+ "embed_tokens",
25
+ "v_proj",
26
+ "down_proj",
27
+ "k_proj",
28
+ "lm_head",
29
+ "q_proj",
30
+ "o_proj",
31
+ "gate_proj"
32
  ],
33
  "task_type": "CAUSAL_LM",
34
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1801d110451c0363b6c96a6beb709896d1e97932e109f064917b003009fc7531
3
- size 4725640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec6a12bd652862c2785d139bd15d9582616e16bfdf59174630b76670fc9a963e
3
+ size 205511192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dad47e1bd71fe5d6ce8ab2ddc71e1f433caf30cc0edbeaa8bee27bcbcb73645c
3
- size 2423802
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4a9363a2a673c3fe1089907dfeb0e8180df9ee7402d1e8ff77d6dd7b4aa5650
3
+ size 4644026
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd42e404833ad689f8f0afea3e690a640621e31bc600a66bf369e5fbdf9e45d8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed0777ad4342578dcc46ca5f5a6d5fcf48dd8094a61fe1c87b41cb2f83ff1c6a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:092d025c9fcbd51424bdca7f48bfd52fefca1fab60b5376be0369f85dfec9dd1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064312e6375f494574ea3d2e75c4bb8dc97a4b36316db34a10f092589094ee40
3
  size 1064
last-checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fe0e32d5e685050c0787e7fa781db4fc73bddea1b6da68659010dedc06005b3
3
- size 14500638
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054b87d156d39458c2c9bc37f19d8dc373128f5545d309d1e58c83187d68113a
3
+ size 1842934
last-checkpoint/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
last-checkpoint/tokenizer_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "add_prefix_space": false,
 
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<unk>",
@@ -24,22 +25,17 @@
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
- },
28
- "3": {
29
- "content": "<pad>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
  }
36
  },
37
  "bos_token": "<s>",
38
  "clean_up_tokenization_spaces": false,
39
  "eos_token": "</s>",
 
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "</s>",
42
  "padding_side": "right",
43
- "tokenizer_class": "BloomTokenizer",
44
- "unk_token": "<unk>"
 
 
45
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
 
25
  "rstrip": false,
26
  "single_word": false,
27
  "special": true
 
 
 
 
 
 
 
 
28
  }
29
  },
30
  "bos_token": "<s>",
31
  "clean_up_tokenization_spaces": false,
32
  "eos_token": "</s>",
33
+ "legacy": false,
34
  "model_max_length": 1000000000000000019884624838656,
35
  "pad_token": "</s>",
36
  "padding_side": "right",
37
+ "sp_model_kwargs": {},
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,161 +1,28 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00031130100472399273,
5
  "eval_steps": 500,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "grad_norm": 2.555440902709961,
14
- "learning_rate": 3.3333333333333333e-06,
15
- "loss": 3.6397,
16
  "step": 20
17
- },
18
- {
19
- "epoch": 0.0,
20
- "grad_norm": 2.218903064727783,
21
- "learning_rate": 6.666666666666667e-06,
22
- "loss": 3.6917,
23
- "step": 40
24
- },
25
- {
26
- "epoch": 0.0,
27
- "grad_norm": 0.9262466430664062,
28
- "learning_rate": 1e-05,
29
- "loss": 3.5828,
30
- "step": 60
31
- },
32
- {
33
- "epoch": 0.0,
34
- "grad_norm": 2.782036542892456,
35
- "learning_rate": 1.3166666666666665e-05,
36
- "loss": 3.5865,
37
- "step": 80
38
- },
39
- {
40
- "epoch": 0.0,
41
- "grad_norm": 1.9482054710388184,
42
- "learning_rate": 1.65e-05,
43
- "loss": 3.3337,
44
- "step": 100
45
- },
46
- {
47
- "epoch": 0.0,
48
- "grad_norm": 4.047863006591797,
49
- "learning_rate": 1.9833333333333335e-05,
50
- "loss": 3.1903,
51
- "step": 120
52
- },
53
- {
54
- "epoch": 0.0,
55
- "grad_norm": 3.08722186088562,
56
- "learning_rate": 2.3166666666666666e-05,
57
- "loss": 3.5379,
58
- "step": 140
59
- },
60
- {
61
- "epoch": 0.0,
62
- "grad_norm": 3.540940046310425,
63
- "learning_rate": 2.6500000000000004e-05,
64
- "loss": 3.16,
65
- "step": 160
66
- },
67
- {
68
- "epoch": 0.0,
69
- "grad_norm": 5.391817092895508,
70
- "learning_rate": 2.9833333333333335e-05,
71
- "loss": 3.2489,
72
- "step": 180
73
- },
74
- {
75
- "epoch": 0.0,
76
- "grad_norm": 5.890682220458984,
77
- "learning_rate": 3.316666666666667e-05,
78
- "loss": 3.0499,
79
- "step": 200
80
- },
81
- {
82
- "epoch": 0.0,
83
- "grad_norm": 6.314597129821777,
84
- "learning_rate": 3.65e-05,
85
- "loss": 2.8568,
86
- "step": 220
87
- },
88
- {
89
- "epoch": 0.0,
90
- "grad_norm": 1.0859078168869019,
91
- "learning_rate": 3.983333333333333e-05,
92
- "loss": 2.8566,
93
- "step": 240
94
- },
95
- {
96
- "epoch": 0.0,
97
- "grad_norm": 4.688353538513184,
98
- "learning_rate": 4.316666666666667e-05,
99
- "loss": 3.0079,
100
- "step": 260
101
- },
102
- {
103
- "epoch": 0.0,
104
- "grad_norm": 4.502331256866455,
105
- "learning_rate": 4.6500000000000005e-05,
106
- "loss": 2.6839,
107
- "step": 280
108
- },
109
- {
110
- "epoch": 0.0,
111
- "grad_norm": 8.951983451843262,
112
- "learning_rate": 4.9833333333333336e-05,
113
- "loss": 2.7932,
114
- "step": 300
115
- },
116
- {
117
- "epoch": 0.0,
118
- "grad_norm": 4.788575172424316,
119
- "learning_rate": 4.9999526661182696e-05,
120
- "loss": 2.9341,
121
- "step": 320
122
- },
123
- {
124
- "epoch": 0.0,
125
- "grad_norm": 7.716049671173096,
126
- "learning_rate": 4.999800570348766e-05,
127
- "loss": 2.5987,
128
- "step": 340
129
- },
130
- {
131
- "epoch": 0.0,
132
- "grad_norm": 4.9223952293396,
133
- "learning_rate": 4.9995435879539254e-05,
134
- "loss": 2.7863,
135
- "step": 360
136
- },
137
- {
138
- "epoch": 0.0,
139
- "grad_norm": 7.647037506103516,
140
- "learning_rate": 4.999181729716214e-05,
141
- "loss": 2.6197,
142
- "step": 380
143
- },
144
- {
145
- "epoch": 0.0,
146
- "grad_norm": 1.073474407196045,
147
- "learning_rate": 4.998715010818479e-05,
148
- "loss": 2.6627,
149
- "step": 400
150
  }
151
  ],
152
  "logging_steps": 20,
153
- "max_steps": 10000,
154
  "num_input_tokens_seen": 0,
155
  "num_train_epochs": 1,
156
  "save_steps": 20,
157
- "total_flos": 1824387808739328.0,
158
- "train_batch_size": 1,
159
  "trial_name": null,
160
  "trial_params": null
161
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0001245204018895971,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "grad_norm": NaN,
14
+ "learning_rate": 9.999999999999999e-06,
15
+ "loss": 4.7178,
16
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 20,
20
+ "max_steps": 2000,
21
  "num_input_tokens_seen": 0,
22
  "num_train_epochs": 1,
23
  "save_steps": 20,
24
+ "total_flos": 297330828518400.0,
25
+ "train_batch_size": 2,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da5d41afb0ddac16c18350b7a14dbb4a1e14941c08946cda5f16f71c0aaf525c
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70e3a7379ab98397e1c0492218b73c68c0db563ddcb245966b7fd947f7930183
3
  size 4984