marinone94
commited on
Commit
•
124e0f4
1
Parent(s):
e07fd66
test model card
Browse files
run_speech_recognition_seq2seq_streaming.py
CHANGED
@@ -834,13 +834,13 @@ def main():
|
|
834 |
"tags": "whisper-event",
|
835 |
}
|
836 |
if data_args.dataset_train_name is not None:
|
837 |
-
|
838 |
-
kwargs["dataset_tags"] =
|
839 |
if data_args.dataset_train_config_name is not None:
|
840 |
-
|
841 |
-
kwargs["dataset"] = f"{
|
842 |
else:
|
843 |
-
kwargs["dataset"] =
|
844 |
# if "common_voice" in data_args.dataset_name:
|
845 |
# kwargs["language"] = data_args.dataset_config_name[:2]
|
846 |
if data_args.language_train is not None:
|
@@ -849,24 +849,35 @@ def main():
|
|
849 |
if model_args.model_index_name is not None:
|
850 |
kwargs["model_name"] = model_args.model_index_name
|
851 |
|
|
|
|
|
|
|
852 |
# Training complete notification
|
|
|
853 |
logger.info(SENDING_NOTIFICATION)
|
854 |
-
|
855 |
-
|
856 |
-
|
|
|
|
|
|
|
|
|
857 |
|
858 |
if training_args.push_to_hub:
|
859 |
logger.info("*** Pushing to hub ***")
|
860 |
trainer.push_to_hub(**kwargs)
|
861 |
logger.info("*** Pushed to hub ***")
|
862 |
logger.info(SENDING_NOTIFICATION)
|
863 |
-
notify_me(recipient=RECIPIENT_ADDRESS, message="Model pushed to hub")
|
864 |
else:
|
865 |
logger.info("*** Creating model card ***")
|
866 |
trainer.create_model_card(**kwargs)
|
867 |
logger.info("*** Model card created ***")
|
868 |
logger.info(SENDING_NOTIFICATION)
|
869 |
-
|
|
|
|
|
|
|
|
|
870 |
|
871 |
return results
|
872 |
|
|
|
834 |
"tags": "whisper-event",
|
835 |
}
|
836 |
if data_args.dataset_train_name is not None:
|
837 |
+
dataset_names = list(data_args.dataset_train_name.split(","))
|
838 |
+
kwargs["dataset_tags"] = dataset_names
|
839 |
if data_args.dataset_train_config_name is not None:
|
840 |
+
dataset_config_names = list(data_args.dataset_train_config_name.split(","))
|
841 |
+
kwargs["dataset"] = [f"{ds_name} {ds_cfg_name}" for ds_name, ds_cfg_name in zip(dataset_names, dataset_config_names)]
|
842 |
else:
|
843 |
+
kwargs["dataset"] = dataset_names
|
844 |
# if "common_voice" in data_args.dataset_name:
|
845 |
# kwargs["language"] = data_args.dataset_config_name[:2]
|
846 |
if data_args.language_train is not None:
|
|
|
849 |
if model_args.model_index_name is not None:
|
850 |
kwargs["model_name"] = model_args.model_index_name
|
851 |
|
852 |
+
logger.info("*** Training stats written ***")
|
853 |
+
logger.info(json.dumps(kwargs, indent=4))
|
854 |
+
|
855 |
# Training complete notification
|
856 |
+
logger.info("*** Training and eval complete ***")
|
857 |
logger.info(SENDING_NOTIFICATION)
|
858 |
+
with open(os.path.join(training_args.output_dir, "train_results.json"), "r") as f:
|
859 |
+
train_results = json.load(f)
|
860 |
+
with open(os.path.join(training_args.output_dir, "eval_results.json"), "r") as f:
|
861 |
+
eval_results = json.load(f)
|
862 |
+
notify_me(recipient=RECIPIENT_ADDRESS,
|
863 |
+
message=f"Training complete! {train_results = } {eval_results = }")
|
864 |
+
|
865 |
|
866 |
if training_args.push_to_hub:
|
867 |
logger.info("*** Pushing to hub ***")
|
868 |
trainer.push_to_hub(**kwargs)
|
869 |
logger.info("*** Pushed to hub ***")
|
870 |
logger.info(SENDING_NOTIFICATION)
|
|
|
871 |
else:
|
872 |
logger.info("*** Creating model card ***")
|
873 |
trainer.create_model_card(**kwargs)
|
874 |
logger.info("*** Model card created ***")
|
875 |
logger.info(SENDING_NOTIFICATION)
|
876 |
+
|
877 |
+
with open(os.path.join(training_args.output_dir, "README.md"), "r") as f:
|
878 |
+
readme = f.read()
|
879 |
+
notify_me(recipient=RECIPIENT_ADDRESS,
|
880 |
+
message=f"Model pushed to hub! {readme = }")
|
881 |
|
882 |
return results
|
883 |
|
test_run_nordic.sh
CHANGED
@@ -11,10 +11,10 @@ python $1run_speech_recognition_seq2seq_streaming.py \
|
|
11 |
--model_index_name="Whisper Tiny Nordic" \
|
12 |
--max_train_samples="64" \
|
13 |
--max_eval_samples="32" \
|
14 |
-
--max_steps="
|
15 |
--output_dir="./" \
|
16 |
-
--per_device_train_batch_size="
|
17 |
-
--per_device_eval_batch_size="
|
18 |
--logging_steps="25" \
|
19 |
--learning_rate="1e-5" \
|
20 |
--warmup_steps="500" \
|
@@ -35,8 +35,8 @@ python $1run_speech_recognition_seq2seq_streaming.py \
|
|
35 |
--overwrite_output_dir \
|
36 |
--do_train \
|
37 |
--do_eval \
|
|
|
38 |
--predict_with_generate \
|
39 |
--do_normalize_eval \
|
40 |
--streaming \
|
41 |
-
--use_auth_token
|
42 |
-
--push_to_hub
|
|
|
11 |
--model_index_name="Whisper Tiny Nordic" \
|
12 |
--max_train_samples="64" \
|
13 |
--max_eval_samples="32" \
|
14 |
+
--max_steps="1" \
|
15 |
--output_dir="./" \
|
16 |
+
--per_device_train_batch_size="1" \
|
17 |
+
--per_device_eval_batch_size="1" \
|
18 |
--logging_steps="25" \
|
19 |
--learning_rate="1e-5" \
|
20 |
--warmup_steps="500" \
|
|
|
35 |
--overwrite_output_dir \
|
36 |
--do_train \
|
37 |
--do_eval \
|
38 |
+
--fp16 \
|
39 |
--predict_with_generate \
|
40 |
--do_normalize_eval \
|
41 |
--streaming \
|
42 |
+
--use_auth_token
|
|