yhavinga commited on
Commit
a1d2f2c
1 Parent(s): 565e5fc

Saving weights and logs of step 2000

Browse files
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfe1b36176c2200bf1451814194ad46ad5a6cde61cfaece83b99f3b30a8b8634
3
  size 891548548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c8aedd34c528d3a7806d216941cc23732a751a8d687f8bf1db06eb1e1e75a3
3
  size 891548548
run_t5.sh CHANGED
@@ -25,7 +25,7 @@ mkdir -p "${MODEL_DIR}/runs"
25
  --logging_steps="50" \
26
  --save_steps="2000" \
27
  --eval_steps="10000000" \
28
- --resume_from_checkpoint="${MODEL_DIR}/ckpt-16000" \
29
  --warmup_steps="3413" \
30
  --push_to_hub
31
 
 
25
  --logging_steps="50" \
26
  --save_steps="2000" \
27
  --eval_steps="10000000" \
28
+ --resume_from_checkpoint="${MODEL_DIR}/ckpt-18000" \
29
  --warmup_steps="3413" \
30
  --push_to_hub
31
 
run_t5_mlm_flax_custom_dataset.py CHANGED
@@ -580,7 +580,7 @@ if __name__ == "__main__":
580
 
581
  train, val = train_val_files()
582
 
583
- load_grouped = False
584
 
585
  if not load_grouped:
586
  datasets = load_dataset('json', data_files={'train': train, 'validation': val})
@@ -899,8 +899,8 @@ if __name__ == "__main__":
899
  for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1)):
900
  cur_step = epoch * (num_train_samples // train_batch_size) + step
901
  # skip to the step from which we are resuming
902
- if cur_step < resume_step:
903
- continue
904
 
905
  samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
906
  model_inputs = data_collator(samples)
 
580
 
581
  train, val = train_val_files()
582
 
583
+ load_grouped = True
584
 
585
  if not load_grouped:
586
  datasets = load_dataset('json', data_files={'train': train, 'validation': val})
 
899
  for step, batch_idx in enumerate(tqdm(train_batch_idx, desc="Training...", position=1)):
900
  cur_step = epoch * (num_train_samples // train_batch_size) + step
901
  # skip to the step from which we are resuming
902
+ # if cur_step < resume_step:
903
+ # continue
904
 
905
  samples = [tokenized_datasets["train"][int(idx)] for idx in batch_idx]
906
  model_inputs = data_collator(samples)
runs/Jul11_12-53-41_t1v-n-0e7426e8-w-0/events.out.tfevents.1626008983.t1v-n-0e7426e8-w-0.161493.3.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba46ea33a14787fe4fc81fe586551875621a7f46301e0a680643702851773220
3
- size 300067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ddeed93b5615c1239be282f05bf781971c8a799be72c9bebc4de1d596fbd63
3
+ size 585827
runs/Jul11_17-06-36_t1v-n-0e7426e8-w-0/events.out.tfevents.1626023202.t1v-n-0e7426e8-w-0.178001.3.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b89824cdb72fe97627209c68074b163e725d00349a36ed38b233e7d579e1b92
3
+ size 296685