jxm commited on
Commit
c41cb5d
1 Parent(s): 49a8027

Upload model

Browse files
Files changed (1) hide show
  1. config.json +9 -12
config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_frozen": true,
3
  "_n_gpu": 1,
4
- "_name_or_path": "/home/wentingz/research/vec2text/vec2text/saves/llama-align-3/checkpoint-125000",
5
  "adafactor": false,
6
  "adam_beta1": 0.9,
7
  "adam_beta2": 0.999,
@@ -21,7 +20,7 @@
21
  "corrector_model_from_pretrained": null,
22
  "data_seed": null,
23
  "dataloader_drop_last": false,
24
- "dataloader_num_workers": 7,
25
  "dataloader_pin_memory": true,
26
  "dataset_name": "one_million_instructions",
27
  "ddp_backend": null,
@@ -85,25 +84,25 @@
85
  "jit_mode_eval": false,
86
  "label_names": null,
87
  "label_smoothing_factor": 0.0,
88
- "learning_rate": 0.0003,
89
  "length_column_name": "length",
90
  "load_best_model_at_end": true,
91
  "local_rank": 0,
92
  "log_level": "passive",
93
  "log_level_replica": "warning",
94
  "log_on_each_node": true,
95
- "logging_dir": "saves/llama-align-3/runs/Nov16_07-33-02_mosaic-cirrascale-37.reviz.ai2.in",
96
  "logging_first_step": false,
97
  "logging_nan_inf_filter": true,
98
  "logging_steps": 50,
99
  "logging_strategy": "steps",
100
- "lr_scheduler_type": "constant_with_warmup",
101
  "max_eval_samples": 500,
102
  "max_grad_norm": 1.0,
103
  "max_seq_length": 64,
104
  "max_steps": -1,
105
  "metric_for_best_model": "one_million_instructions_loss",
106
- "mock_embedder": true,
107
  "model_name_or_path": "t5-base",
108
  "model_revision": "main",
109
  "mp_parameters": "",
@@ -113,7 +112,7 @@
113
  "num_train_epochs": 200.0,
114
  "optim": "adamw_torch",
115
  "optim_args": null,
116
- "output_dir": "saves/llama-align-3",
117
  "overwrite_output_dir": false,
118
  "past_index": -1,
119
  "per_device_eval_batch_size": 256,
@@ -127,11 +126,9 @@
127
  "push_to_hub_token": null,
128
  "ray_scope": "last",
129
  "remove_unused_columns": false,
130
- "report_to": [
131
- "wandb"
132
- ],
133
  "resume_from_checkpoint": null,
134
- "run_name": "saves/llama-align-3",
135
  "save_on_each_node": false,
136
  "save_safetensors": true,
137
  "save_steps": 500,
@@ -159,7 +156,7 @@
159
  "use_less_data": -1,
160
  "use_lora": false,
161
  "use_mps_device": false,
162
- "use_wandb": true,
163
  "warmup_ratio": 0.0,
164
  "warmup_steps": 12500,
165
  "weight_decay": 0.0
 
1
  {
2
  "_frozen": true,
3
  "_n_gpu": 1,
 
4
  "adafactor": false,
5
  "adam_beta1": 0.9,
6
  "adam_beta2": 0.999,
 
20
  "corrector_model_from_pretrained": null,
21
  "data_seed": null,
22
  "dataloader_drop_last": false,
23
+ "dataloader_num_workers": 0,
24
  "dataloader_pin_memory": true,
25
  "dataset_name": "one_million_instructions",
26
  "ddp_backend": null,
 
84
  "jit_mode_eval": false,
85
  "label_names": null,
86
  "label_smoothing_factor": 0.0,
87
+ "learning_rate": 0.0002,
88
  "length_column_name": "length",
89
  "load_best_model_at_end": true,
90
  "local_rank": 0,
91
  "log_level": "passive",
92
  "log_level_replica": "warning",
93
  "log_on_each_node": true,
94
+ "logging_dir": "saves/llama-align-4/runs/Nov17_12-43-44_mosaic-cirrascale-37.reviz.ai2.in",
95
  "logging_first_step": false,
96
  "logging_nan_inf_filter": true,
97
  "logging_steps": 50,
98
  "logging_strategy": "steps",
99
+ "lr_scheduler_type": "linear",
100
  "max_eval_samples": 500,
101
  "max_grad_norm": 1.0,
102
  "max_seq_length": 64,
103
  "max_steps": -1,
104
  "metric_for_best_model": "one_million_instructions_loss",
105
+ "mock_embedder": false,
106
  "model_name_or_path": "t5-base",
107
  "model_revision": "main",
108
  "mp_parameters": "",
 
112
  "num_train_epochs": 200.0,
113
  "optim": "adamw_torch",
114
  "optim_args": null,
115
+ "output_dir": "saves/llama-align-4",
116
  "overwrite_output_dir": false,
117
  "past_index": -1,
118
  "per_device_eval_batch_size": 256,
 
126
  "push_to_hub_token": null,
127
  "ray_scope": "last",
128
  "remove_unused_columns": false,
129
+ "report_to": [],
 
 
130
  "resume_from_checkpoint": null,
131
+ "run_name": "saves/llama-align-4",
132
  "save_on_each_node": false,
133
  "save_safetensors": true,
134
  "save_steps": 500,
 
156
  "use_less_data": -1,
157
  "use_lora": false,
158
  "use_mps_device": false,
159
+ "use_wandb": false,
160
  "warmup_ratio": 0.0,
161
  "warmup_steps": 12500,
162
  "weight_decay": 0.0