Upload model
Browse files- config.json +9 -12
config.json
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
{
|
2 |
"_frozen": true,
|
3 |
"_n_gpu": 1,
|
4 |
-
"_name_or_path": "/home/wentingz/research/vec2text/vec2text/saves/llama-align-3/checkpoint-125000",
|
5 |
"adafactor": false,
|
6 |
"adam_beta1": 0.9,
|
7 |
"adam_beta2": 0.999,
|
@@ -21,7 +20,7 @@
|
|
21 |
"corrector_model_from_pretrained": null,
|
22 |
"data_seed": null,
|
23 |
"dataloader_drop_last": false,
|
24 |
-
"dataloader_num_workers":
|
25 |
"dataloader_pin_memory": true,
|
26 |
"dataset_name": "one_million_instructions",
|
27 |
"ddp_backend": null,
|
@@ -85,25 +84,25 @@
|
|
85 |
"jit_mode_eval": false,
|
86 |
"label_names": null,
|
87 |
"label_smoothing_factor": 0.0,
|
88 |
-
"learning_rate": 0.
|
89 |
"length_column_name": "length",
|
90 |
"load_best_model_at_end": true,
|
91 |
"local_rank": 0,
|
92 |
"log_level": "passive",
|
93 |
"log_level_replica": "warning",
|
94 |
"log_on_each_node": true,
|
95 |
-
"logging_dir": "saves/llama-align-
|
96 |
"logging_first_step": false,
|
97 |
"logging_nan_inf_filter": true,
|
98 |
"logging_steps": 50,
|
99 |
"logging_strategy": "steps",
|
100 |
-
"lr_scheduler_type": "
|
101 |
"max_eval_samples": 500,
|
102 |
"max_grad_norm": 1.0,
|
103 |
"max_seq_length": 64,
|
104 |
"max_steps": -1,
|
105 |
"metric_for_best_model": "one_million_instructions_loss",
|
106 |
-
"mock_embedder":
|
107 |
"model_name_or_path": "t5-base",
|
108 |
"model_revision": "main",
|
109 |
"mp_parameters": "",
|
@@ -113,7 +112,7 @@
|
|
113 |
"num_train_epochs": 200.0,
|
114 |
"optim": "adamw_torch",
|
115 |
"optim_args": null,
|
116 |
-
"output_dir": "saves/llama-align-
|
117 |
"overwrite_output_dir": false,
|
118 |
"past_index": -1,
|
119 |
"per_device_eval_batch_size": 256,
|
@@ -127,11 +126,9 @@
|
|
127 |
"push_to_hub_token": null,
|
128 |
"ray_scope": "last",
|
129 |
"remove_unused_columns": false,
|
130 |
-
"report_to": [
|
131 |
-
"wandb"
|
132 |
-
],
|
133 |
"resume_from_checkpoint": null,
|
134 |
-
"run_name": "saves/llama-align-
|
135 |
"save_on_each_node": false,
|
136 |
"save_safetensors": true,
|
137 |
"save_steps": 500,
|
@@ -159,7 +156,7 @@
|
|
159 |
"use_less_data": -1,
|
160 |
"use_lora": false,
|
161 |
"use_mps_device": false,
|
162 |
-
"use_wandb":
|
163 |
"warmup_ratio": 0.0,
|
164 |
"warmup_steps": 12500,
|
165 |
"weight_decay": 0.0
|
|
|
1 |
{
|
2 |
"_frozen": true,
|
3 |
"_n_gpu": 1,
|
|
|
4 |
"adafactor": false,
|
5 |
"adam_beta1": 0.9,
|
6 |
"adam_beta2": 0.999,
|
|
|
20 |
"corrector_model_from_pretrained": null,
|
21 |
"data_seed": null,
|
22 |
"dataloader_drop_last": false,
|
23 |
+
"dataloader_num_workers": 0,
|
24 |
"dataloader_pin_memory": true,
|
25 |
"dataset_name": "one_million_instructions",
|
26 |
"ddp_backend": null,
|
|
|
84 |
"jit_mode_eval": false,
|
85 |
"label_names": null,
|
86 |
"label_smoothing_factor": 0.0,
|
87 |
+
"learning_rate": 0.0002,
|
88 |
"length_column_name": "length",
|
89 |
"load_best_model_at_end": true,
|
90 |
"local_rank": 0,
|
91 |
"log_level": "passive",
|
92 |
"log_level_replica": "warning",
|
93 |
"log_on_each_node": true,
|
94 |
+
"logging_dir": "saves/llama-align-4/runs/Nov17_12-43-44_mosaic-cirrascale-37.reviz.ai2.in",
|
95 |
"logging_first_step": false,
|
96 |
"logging_nan_inf_filter": true,
|
97 |
"logging_steps": 50,
|
98 |
"logging_strategy": "steps",
|
99 |
+
"lr_scheduler_type": "linear",
|
100 |
"max_eval_samples": 500,
|
101 |
"max_grad_norm": 1.0,
|
102 |
"max_seq_length": 64,
|
103 |
"max_steps": -1,
|
104 |
"metric_for_best_model": "one_million_instructions_loss",
|
105 |
+
"mock_embedder": false,
|
106 |
"model_name_or_path": "t5-base",
|
107 |
"model_revision": "main",
|
108 |
"mp_parameters": "",
|
|
|
112 |
"num_train_epochs": 200.0,
|
113 |
"optim": "adamw_torch",
|
114 |
"optim_args": null,
|
115 |
+
"output_dir": "saves/llama-align-4",
|
116 |
"overwrite_output_dir": false,
|
117 |
"past_index": -1,
|
118 |
"per_device_eval_batch_size": 256,
|
|
|
126 |
"push_to_hub_token": null,
|
127 |
"ray_scope": "last",
|
128 |
"remove_unused_columns": false,
|
129 |
+
"report_to": [],
|
|
|
|
|
130 |
"resume_from_checkpoint": null,
|
131 |
+
"run_name": "saves/llama-align-4",
|
132 |
"save_on_each_node": false,
|
133 |
"save_safetensors": true,
|
134 |
"save_steps": 500,
|
|
|
156 |
"use_less_data": -1,
|
157 |
"use_lora": false,
|
158 |
"use_mps_device": false,
|
159 |
+
"use_wandb": false,
|
160 |
"warmup_ratio": 0.0,
|
161 |
"warmup_steps": 12500,
|
162 |
"weight_decay": 0.0
|