nathan0 commited on
Commit
93986ed
·
1 Parent(s): 6adb4cb
README.md DELETED
@@ -1,3 +0,0 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
adapter_config.json DELETED
@@ -1,20 +0,0 @@
1
- {
2
- "auto_mapping": null,
3
- "base_model_name_or_path": "/home/vmagent/app/dataset/mpt-7b",
4
- "bias": "none",
5
- "fan_in_fan_out": false,
6
- "inference_mode": true,
7
- "init_lora_weights": true,
8
- "layers_pattern": null,
9
- "layers_to_transform": null,
10
- "lora_alpha": 16,
11
- "lora_dropout": 0.05,
12
- "modules_to_save": null,
13
- "peft_type": "LORA",
14
- "r": 8,
15
- "revision": null,
16
- "target_modules": [
17
- "Wqkv"
18
- ],
19
- "task_type": "CAUSAL_LM"
20
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adapter_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b11dd03bbca4cc535ff26657feb93f34dbf3a8361c4b2bb6fa98a6e5d2c73d
3
- size 16799089
 
 
 
 
all_results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "epoch": 1.0,
3
- "eval_loss": 1.0701578855514526,
4
- "eval_runtime": 3713.8306,
5
- "eval_samples": 15601,
6
- "eval_samples_per_second": 4.201,
7
- "eval_steps_per_second": 0.525,
8
- "eval_tokens": 1722455
9
- }
 
 
 
 
 
 
 
 
 
 
eval_results.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "epoch": 1.0,
3
- "eval_loss": 1.0701578855514526,
4
- "eval_runtime": 3713.8306,
5
- "eval_samples": 15601,
6
- "eval_samples_per_second": 4.201,
7
- "eval_steps_per_second": 0.525,
8
- "eval_tokens": 1722455
9
- }
 
 
 
 
 
 
 
 
 
 
mpt-lora-run-1epoch.log DELETED
@@ -1,159 +0,0 @@
1
- 07/25/2023 13:42:21 - WARNING - __main__ - Process rank: 0, device: cpu
2
- distributed training: True, 16-bits training: True
3
- 07/25/2023 13:42:21 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
4
- _n_gpu=0,
5
- adafactor=False,
6
- adam_beta1=0.9,
7
- adam_beta2=0.999,
8
- adam_epsilon=1e-08,
9
- auto_find_batch_size=False,
10
- bf16=True,
11
- bf16_full_eval=False,
12
- data_seed=None,
13
- dataloader_drop_last=False,
14
- dataloader_num_workers=0,
15
- dataloader_pin_memory=True,
16
- ddp_backend=None,
17
- ddp_broadcast_buffers=None,
18
- ddp_bucket_cap_mb=None,
19
- ddp_find_unused_parameters=None,
20
- ddp_timeout=1800,
21
- debug=[],
22
- deepspeed=None,
23
- disable_tqdm=False,
24
- do_eval=True,
25
- do_predict=False,
26
- do_train=True,
27
- eval_accumulation_steps=None,
28
- eval_delay=0,
29
- eval_steps=None,
30
- evaluation_strategy=no,
31
- fp16=False,
32
- fp16_backend=auto,
33
- fp16_full_eval=False,
34
- fp16_opt_level=O1,
35
- fsdp=[],
36
- fsdp_config={'fsdp_min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
37
- fsdp_min_num_params=0,
38
- fsdp_transformer_layer_cls_to_wrap=None,
39
- full_determinism=False,
40
- gradient_accumulation_steps=1,
41
- gradient_checkpointing=False,
42
- greater_is_better=None,
43
- group_by_length=False,
44
- half_precision_backend=auto,
45
- hub_model_id=None,
46
- hub_private_repo=False,
47
- hub_strategy=every_save,
48
- hub_token=<HUB_TOKEN>,
49
- ignore_data_skip=False,
50
- include_inputs_for_metrics=False,
51
- jit_mode_eval=False,
52
- label_names=None,
53
- label_smoothing_factor=0.0,
54
- learning_rate=0.0001,
55
- length_column_name=length,
56
- load_best_model_at_end=False,
57
- local_rank=0,
58
- log_level=info,
59
- log_level_replica=warning,
60
- log_on_each_node=True,
61
- logging_dir=./mpt_peft_finetuned_model/runs/Jul25_13-42-21_a4bf019308e9,
62
- logging_first_step=False,
63
- logging_nan_inf_filter=True,
64
- logging_steps=100,
65
- logging_strategy=steps,
66
- lr_scheduler_type=linear,
67
- max_grad_norm=1.0,
68
- max_steps=-1,
69
- metric_for_best_model=None,
70
- mp_parameters=,
71
- no_cuda=True,
72
- num_train_epochs=1.0,
73
- optim=adamw_hf,
74
- optim_args=None,
75
- output_dir=./mpt_peft_finetuned_model,
76
- overwrite_output_dir=False,
77
- past_index=-1,
78
- per_device_eval_batch_size=8,
79
- per_device_train_batch_size=8,
80
- prediction_loss_only=False,
81
- push_to_hub=False,
82
- push_to_hub_model_id=None,
83
- push_to_hub_organization=None,
84
- push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
85
- ray_scope=last,
86
- remove_unused_columns=True,
87
- report_to=['wandb'],
88
- resume_from_checkpoint=None,
89
- run_name=./mpt_peft_finetuned_model,
90
- save_on_each_node=False,
91
- save_safetensors=False,
92
- save_steps=500,
93
- save_strategy=epoch,
94
- save_total_limit=1,
95
- seed=42,
96
- sharded_ddp=[],
97
- skip_memory_metrics=True,
98
- tf32=None,
99
- torch_compile=False,
100
- torch_compile_backend=None,
101
- torch_compile_mode=None,
102
- torchdynamo=None,
103
- tpu_metrics_debug=False,
104
- tpu_num_cores=None,
105
- use_ipex=False,
106
- use_legacy_prediction_loop=False,
107
- use_mps_device=False,
108
- warmup_ratio=0.0,
109
- warmup_steps=0,
110
- weight_decay=0.0,
111
- xpu_backend=None,
112
- )
113
- 07/25/2023 13:42:21 - WARNING - datasets.builder - Found cached dataset json (/root/.cache/huggingface/datasets/json/default-5bec83249d5e85ed/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
114
- 07/25/2023 13:42:22 - WARNING - datasets.builder - Found cached dataset json (/root/.cache/huggingface/datasets/json/default-5bec83249d5e85ed/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
115
- 07/25/2023 13:42:22 - WARNING - datasets.builder - Found cached dataset json (/root/.cache/huggingface/datasets/json/default-5bec83249d5e85ed/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96)
116
- You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.
117
- 07/25/2023 13:42:30 - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /root/.cache/huggingface/datasets/json/default-5bec83249d5e85ed/0.0.0/8bb11242116d547c741b2e8a1f18598ffdd40a1d4f2a2872c7a28b697434bc96/cache-100c7c6f488a1310.arrow
118
- 07/25/2023 13:42:37 - INFO - __main__ - Using data collator of type DataCollatorForSeq2Seq
119
- trainable params: 4,194,304 || all params: 6,653,480,960 || trainable%: 0.0630392425441013
120
- 07/25/2023 13:42:49 - INFO - __main__ - *** Training ***
121
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
122
- To disable this warning, you can either:
123
- - Avoid using `tokenizers` before the fork if possible
124
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
125
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
126
- To disable this warning, you can either:
127
- - Avoid using `tokenizers` before the fork if possible
128
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
129
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
130
- To disable this warning, you can either:
131
- - Avoid using `tokenizers` before the fork if possible
132
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
133
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
134
- To disable this warning, you can either:
135
- - Avoid using `tokenizers` before the fork if possible
136
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
137
- huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
138
- To disable this warning, you can either:
139
- - Avoid using `tokenizers` before the fork if possible
140
- - Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
141
- {'loss': 1.197, 'learning_rate': 8.984771573604062e-05, 'epoch': 0.1}
142
- {'loss': 1.0836, 'learning_rate': 7.969543147208121e-05, 'epoch': 0.2}
143
- {'loss': 1.0757, 'learning_rate': 6.954314720812183e-05, 'epoch': 0.3}
144
- {'loss': 1.0668, 'learning_rate': 5.939086294416244e-05, 'epoch': 0.41}
145
- {'loss': 1.0582, 'learning_rate': 4.9238578680203045e-05, 'epoch': 0.51}
146
- {'loss': 1.0494, 'learning_rate': 3.9086294416243655e-05, 'epoch': 0.61}
147
- {'loss': 1.0502, 'learning_rate': 2.8934010152284264e-05, 'epoch': 0.71}
148
- {'loss': 1.0407, 'learning_rate': 1.8781725888324874e-05, 'epoch': 0.81}
149
- {'loss': 1.0421, 'learning_rate': 8.629441624365483e-06, 'epoch': 0.91}
150
- {'train_runtime': 34313.0928, 'train_samples_per_second': 0.23, 'train_steps_per_second': 0.029, 'train_loss': 1.0698003004045051, 'epoch': 1.0}
151
- 07/25/2023 23:14:42 - INFO - __main__ - *** Evaluate ***
152
- ***** eval metrics *****
153
- epoch = 1.0
154
- eval_loss = 1.0702
155
- eval_runtime = 1:01:53.83
156
- eval_samples = 15601
157
- eval_samples_per_second = 4.201
158
- eval_steps_per_second = 0.525
159
- eval_tokens = 1722455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b11dd03bbca4cc535ff26657feb93f34dbf3a8361c4b2bb6fa98a6e5d2c73d
3
- size 16799089