Commit
路
30575fe
1
Parent(s):
91e8d4a
Update README.md
Browse files
README.md
CHANGED
@@ -151,21 +151,20 @@ This was necessary due to the maximum input token limit accepted by the RoBERTa-
|
|
151 |
- **Training regime:** fp32
|
152 |
- **base_model_name_or_path:** roberta-base
|
153 |
- **max_tokens_length:** 512
|
154 |
-
- **weighted_loss** true
|
155 |
- **training_arguments:** TrainingArguments(
|
156 |
output_dir=results_dir,
|
157 |
num_train_epochs=5,
|
158 |
per_device_train_batch_size=8,
|
159 |
per_device_eval_batch_size=8,
|
160 |
gradient_accumulation_steps=1,
|
161 |
-
learning_rate=0.
|
162 |
lr_scheduler_type="linear",
|
163 |
optim="adamw_torch",
|
164 |
eval_accumulation_steps=1,
|
165 |
evaluation_strategy="steps",
|
166 |
-
eval_steps=0.
|
167 |
save_strategy="steps",
|
168 |
-
save_steps=0.
|
169 |
logging_strategy="steps",
|
170 |
logging_steps=1,
|
171 |
report_to="tensorboard",
|
@@ -173,7 +172,7 @@ This was necessary due to the maximum input token limit accepted by the RoBERTa-
|
|
173 |
do_eval=True,
|
174 |
max_grad_norm=0.3,
|
175 |
warmup_ratio=0.03,
|
176 |
-
group_by_length=True,
|
177 |
dataloader_drop_last=False,
|
178 |
fp16=False,
|
179 |
bf16=False
|
|
|
151 |
- **Training regime:** fp32
|
152 |
- **base_model_name_or_path:** roberta-base
|
153 |
- **max_tokens_length:** 512
|
|
|
154 |
- **training_arguments:** TrainingArguments(
|
155 |
output_dir=results_dir,
|
156 |
num_train_epochs=5,
|
157 |
per_device_train_batch_size=8,
|
158 |
per_device_eval_batch_size=8,
|
159 |
gradient_accumulation_steps=1,
|
160 |
+
learning_rate=0.00001,
|
161 |
lr_scheduler_type="linear",
|
162 |
optim="adamw_torch",
|
163 |
eval_accumulation_steps=1,
|
164 |
evaluation_strategy="steps",
|
165 |
+
eval_steps=0.2,
|
166 |
save_strategy="steps",
|
167 |
+
save_steps=0.2,
|
168 |
logging_strategy="steps",
|
169 |
logging_steps=1,
|
170 |
report_to="tensorboard",
|
|
|
172 |
do_eval=True,
|
173 |
max_grad_norm=0.3,
|
174 |
warmup_ratio=0.03,
|
175 |
+
#group_by_length=True,
|
176 |
dataloader_drop_last=False,
|
177 |
fp16=False,
|
178 |
bf16=False
|