supawichwac commited on
Commit
97087bc
1 Parent(s): fcace10

Saving train state of step 5

Browse files
distil-whisper/events.out.tfevents.1715170439.server02.1907732.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f244db9e60fe4f96d3efe5ffc86d99cdf07af033d92613ce0ebfc2522073a140
3
+ size 392
run_distillation.py CHANGED
@@ -1567,6 +1567,7 @@ def main():
1567
  train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
1568
  resume_step = None
1569
 
 
1570
  for batch in train_dataloader:
1571
  with accelerator.accumulate(student_model):
1572
  loss, train_metric = train_step(batch, temperature=training_args.temperature)
@@ -1615,29 +1616,34 @@ def main():
1615
  )
1616
 
1617
  if training_args.do_eval and (cur_step % eval_steps == 0 or cur_step == total_train_steps):
 
1618
  train_time += time.time() - train_start
1619
  student_model.eval()
 
1620
  # ======================== Evaluating ==============================
 
1621
  for eval_split in all_eval_splits:
1622
  eval_metrics = []
1623
  eval_preds = []
1624
  eval_labels = []
1625
  eval_start = time.time()
1626
 
1627
- validation_dataloader = DataLoader(
1628
- vectorized_datasets[eval_split],
1629
- collate_fn=data_collator,
1630
- batch_size=per_device_eval_batch_size,
1631
- drop_last=False,
1632
- num_workers=dataloader_num_workers,
1633
- prefetch_factor=prefetch_factor,
1634
  pin_memory=training_args.dataloader_pin_memory,
1635
  )
1636
- validation_dataloader = accelerator.prepare(validation_dataloader)
1637
 
 
 
 
1638
  for batch in tqdm(
1639
  validation_dataloader,
1640
- desc=f"Evaluating {eval_split}...",
1641
  position=2,
1642
  disable=not accelerator.is_local_main_process,
1643
  ):
@@ -1648,6 +1654,7 @@ def main():
1648
 
1649
  # generation
1650
  if training_args.predict_with_generate:
 
1651
  generated_ids = generate_step(batch)
1652
  # Gather all predictions and targets
1653
  generated_ids, labels = accelerator.gather_for_metrics(
 
1567
  train_dataloader = accelerator.skip_first_batches(train_dataloader, resume_step)
1568
  resume_step = None
1569
 
1570
+
1571
  for batch in train_dataloader:
1572
  with accelerator.accumulate(student_model):
1573
  loss, train_metric = train_step(batch, temperature=training_args.temperature)
 
1616
  )
1617
 
1618
  if training_args.do_eval and (cur_step % eval_steps == 0 or cur_step == total_train_steps):
1619
+ print("evaluating dsakdlaskdfl;skl;afksdl;fdasl;fkdl;askfl;asdkfldskfl;das")
1620
  train_time += time.time() - train_start
1621
  student_model.eval()
1622
+
1623
  # ======================== Evaluating ==============================
1624
+
1625
  for eval_split in all_eval_splits:
1626
  eval_metrics = []
1627
  eval_preds = []
1628
  eval_labels = []
1629
  eval_start = time.time()
1630
 
1631
+ validation_dataloader = DataLoader(
1632
+ vectorized_datasets[eval_split],
1633
+ collate_fn=data_collator,
1634
+ batch_size=per_device_eval_batch_size,
1635
+ drop_last=False,
1636
+ num_workers=dataloader_num_workers,
1637
+ prefetch_factor=prefetch_factor,
1638
  pin_memory=training_args.dataloader_pin_memory,
1639
  )
 
1640
 
1641
+
1642
+ validation_dataloader = accelerator.prepare(validation_dataloader)
1643
+
1644
  for batch in tqdm(
1645
  validation_dataloader,
1646
+ desc=f"Evaluating {eval_split}...",
1647
  position=2,
1648
  disable=not accelerator.is_local_main_process,
1649
  ):
 
1654
 
1655
  # generation
1656
  if training_args.predict_with_generate:
1657
+
1658
  generated_ids = generate_step(batch)
1659
  # Gather all predictions and targets
1660
  generated_ids, labels = accelerator.gather_for_metrics(