marinone94
/

whisper-tiny-sv

Automatic Speech Recognition

Transformers

whisper

whisper-event

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

marinone94 commited on Dec 13, 2022

Commit

ed53c37

1 Parent(s): 2786d7b

add logs

Browse files

Files changed (1) hide show

run_speech_recognition_seq2seq_streaming.py +15 -1

run_speech_recognition_seq2seq_streaming.py CHANGED Viewed

@@ -337,6 +337,7 @@ def main():
     # See all possible arguments in src/transformers/training_args.py
     # or by passing the --help flag to this script.
     # We now keep distinct sets of args, for a cleaner separation of concerns.
     parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
@@ -351,6 +352,7 @@ def main():
     send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
     # 2. Setup logging
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
         datefmt="%m/%d/%Y %H:%M:%S",
@@ -396,6 +398,7 @@ def main():
     set_seed(training_args.seed)
     # 4. Load dataset
     raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
     if training_args.do_train:
@@ -433,7 +436,7 @@ def main():
         )
     # 5. Load pretrained model, tokenizer, and feature extractor
-    #
     # Distributed training:
     # The .from_pretrained methods guarantee that only one local process can concurrently
     config = AutoConfig.from_pretrained(
@@ -483,6 +486,7 @@ def main():
         tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
     # 6. Resample speech dataset if necessary
     dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
     if dataset_sampling_rate != feature_extractor.sampling_rate:
         raw_datasets = raw_datasets.cast_column(
@@ -491,6 +495,7 @@ def main():
     # 7. Preprocessing the datasets.
     # We need to read the audio files as arrays and tokenize the targets.
     max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
     min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
     audio_column_name = data_args.audio_column_name
@@ -554,6 +559,7 @@ def main():
         )
     # 8. Load Metric
     metric = evaluate.load("wer")
     do_normalize_eval = data_args.do_normalize_eval
@@ -578,6 +584,7 @@ def main():
         return {"wer": wer}
     # 9. Create a single speech processor
     if is_main_process(training_args.local_rank):
         # save feature extractor, tokenizer and config
         feature_extractor.save_pretrained(training_args.output_dir)
@@ -595,6 +602,7 @@ def main():
     # 11. Configure Trainer
     # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
     # Only required for streaming: Trainer automatically shuffles non-streaming datasets
     class ShuffleCallback(TrainerCallback):
         def on_epoch_begin(self, args, state, control, train_dataloader, **kwargs):
             if isinstance(train_dataloader.dataset, IterableDatasetShard):
@@ -603,6 +611,7 @@ def main():
                 train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
     # Initialize Trainer
     trainer = Seq2SeqTrainer(
         model=model,
         args=training_args,
@@ -616,6 +625,7 @@ def main():
     # 12. Training
     if training_args.do_train:
         checkpoint = None
         if training_args.resume_from_checkpoint is not None:
             checkpoint = training_args.resume_from_checkpoint
@@ -663,14 +673,18 @@ def main():
         if model_args.model_index_name is not None:
             kwargs["model_name"] = model_args.model_index_name
     if training_args.push_to_hub:
         trainer.push_to_hub(**kwargs)
     else:
         trainer.create_model_card(**kwargs)
     # Training complete notification
     notify_me(recipient="marinone94@gmail.com", message=json.dumps(kwargs, indent=4))
     return results

     # See all possible arguments in src/transformers/training_args.py
     # or by passing the --help flag to this script.
     # We now keep distinct sets of args, for a cleaner separation of concerns.
+    logger.info("*** Parse args ***")
     parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments))
     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
     send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
     # 2. Setup logging
+    logger.info("*** Setup logging ***")
     logging.basicConfig(
         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
         datefmt="%m/%d/%Y %H:%M:%S",
     set_seed(training_args.seed)
     # 4. Load dataset
+    logger.info("*** Load dataset ***")
     raw_datasets = IterableDatasetDict() if data_args.streaming else DatasetDict()
     if training_args.do_train:
         )
     # 5. Load pretrained model, tokenizer, and feature extractor
+    logger.info("*** Load pretrained model, tokenizer, and feature extractor ***")
     # Distributed training:
     # The .from_pretrained methods guarantee that only one local process can concurrently
     config = AutoConfig.from_pretrained(
         tokenizer.set_prefix_tokens(language=data_args.language, task=data_args.task)
     # 6. Resample speech dataset if necessary
+    logger.info("*** Resample dataset ***")
     dataset_sampling_rate = next(iter(raw_datasets.values())).features[data_args.audio_column_name].sampling_rate
     if dataset_sampling_rate != feature_extractor.sampling_rate:
         raw_datasets = raw_datasets.cast_column(
     # 7. Preprocessing the datasets.
     # We need to read the audio files as arrays and tokenize the targets.
+    logger.info("*** Preprocess dataset ***")
     max_input_length = data_args.max_duration_in_seconds * feature_extractor.sampling_rate
     min_input_length = data_args.min_duration_in_seconds * feature_extractor.sampling_rate
     audio_column_name = data_args.audio_column_name
         )
     # 8. Load Metric
+    logger.info("*** Load metric ***")
     metric = evaluate.load("wer")
     do_normalize_eval = data_args.do_normalize_eval
         return {"wer": wer}
     # 9. Create a single speech processor
+    logger.info("*** Init processor ***")
     if is_main_process(training_args.local_rank):
         # save feature extractor, tokenizer and config
         feature_extractor.save_pretrained(training_args.output_dir)
     # 11. Configure Trainer
     # Trainer callback to reinitialise and reshuffle the streamable datasets at the beginning of each epoch
     # Only required for streaming: Trainer automatically shuffles non-streaming datasets
+    logger.info("*** Set shuffle callback ***")
     class ShuffleCallback(TrainerCallback):
         def on_epoch_begin(self, args, state, control, train_dataloader, **kwargs):
             if isinstance(train_dataloader.dataset, IterableDatasetShard):
                 train_dataloader.dataset.set_epoch(train_dataloader.dataset._epoch + 1)
     # Initialize Trainer
+    logger.info("*** Init trainer ***")
     trainer = Seq2SeqTrainer(
         model=model,
         args=training_args,
     # 12. Training
     if training_args.do_train:
+        logger.info("*** Train ***")
         checkpoint = None
         if training_args.resume_from_checkpoint is not None:
             checkpoint = training_args.resume_from_checkpoint
         if model_args.model_index_name is not None:
             kwargs["model_name"] = model_args.model_index_name
+    logger.info("*** Pushing to hub ***")
     if training_args.push_to_hub:
         trainer.push_to_hub(**kwargs)
     else:
         trainer.create_model_card(**kwargs)
     # Training complete notification
+    logger.info("*** Sending notification ***")
     notify_me(recipient="marinone94@gmail.com", message=json.dumps(kwargs, indent=4))
+    logger.info("*** Training complete!!! ***")
     return results