marinone94
/

whisper-tiny-sv

@@ -63,6 +63,14 @@ logger = logging.getLogger(__name__)
 wandb_token = os.environ.get("WANDB_TOKEN", "None")
 hf_token = os.environ.get("HF_TOKEN", None)
 if hf_token is not None:
     os.makedirs("/root/.huggingface", exist_ok=True)
     with open("/root/.huggingface/token", "w") as f:
@@ -348,7 +356,7 @@ def main():
         model_args, data_args, training_args = parser.parse_args_into_dataclasses()
     training_args.do_train = True
     training_args.do_eval = True
     # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
     # information sent is the one passed as arguments along with your Python/PyTorch versions.
     send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
@@ -624,6 +632,7 @@ def main():
         compute_metrics=compute_metrics if training_args.predict_with_generate else None,
         callbacks=[ShuffleCallback()] if data_args.streaming else None,
     )
     # 12. Training
     if training_args.do_train:

 wandb_token = os.environ.get("WANDB_TOKEN", "None")
 hf_token = os.environ.get("HF_TOKEN", None)
+if hf_token is None and os.path.exists("./creds.txt"):
+    with open("./creds.txt", "r") as f:
+        lines = f.readlines()
+    for line in lines:
+        key, valye = line.split("=")
+        if key == "HF_TOKEN":
+            hf_token = value.strip()
 if hf_token is not None:
     os.makedirs("/root/.huggingface", exist_ok=True)
     with open("/root/.huggingface/token", "w") as f:
         model_args, data_args, training_args = parser.parse_args_into_dataclasses()
     training_args.do_train = True
     training_args.do_eval = True
     # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
     # information sent is the one passed as arguments along with your Python/PyTorch versions.
     send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
         compute_metrics=compute_metrics if training_args.predict_with_generate else None,
         callbacks=[ShuffleCallback()] if data_args.streaming else None,
     )
+    logger.info("*** Trainer initialized ***")
     # 12. Training
     if training_args.do_train:

test_run.sh CHANGED Viewed

@@ -3,7 +3,7 @@ python $1run_speech_recognition_seq2seq_streaming.py \
 	--dataset_name="mozilla-foundation/common_voice_11_0" \
 	--dataset_config_name="sv-SE" \
 	--language="swedish" \
-	--train_split_name="train" \
 	--eval_split_name="test" \
 	--model_index_name="Whisper Tiny Swedish" \
 	--max_train_samples="64" \

 	--dataset_name="mozilla-foundation/common_voice_11_0" \
 	--dataset_config_name="sv-SE" \
 	--language="swedish" \
+	--train_split_name="train+validation" \
 	--eval_split_name="test" \
 	--model_index_name="Whisper Tiny Swedish" \
 	--max_train_samples="64" \