Automatic Speech Recognition
Transformers
4 languages
whisper
whisper-event
Generated from Trainer
Inference Endpoints
marinone94 commited on
Commit
fd7be5b
1 Parent(s): 2affbb8

get token from creds if not set in venv

Browse files
run_speech_recognition_seq2seq_streaming.py CHANGED
@@ -63,6 +63,14 @@ logger = logging.getLogger(__name__)
63
 
64
  wandb_token = os.environ.get("WANDB_TOKEN", "None")
65
  hf_token = os.environ.get("HF_TOKEN", None)
 
 
 
 
 
 
 
 
66
  if hf_token is not None:
67
  os.makedirs("/root/.huggingface", exist_ok=True)
68
  with open("/root/.huggingface/token", "w") as f:
@@ -348,7 +356,7 @@ def main():
348
  model_args, data_args, training_args = parser.parse_args_into_dataclasses()
349
  training_args.do_train = True
350
  training_args.do_eval = True
351
-
352
  # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
353
  # information sent is the one passed as arguments along with your Python/PyTorch versions.
354
  send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
@@ -624,6 +632,7 @@ def main():
624
  compute_metrics=compute_metrics if training_args.predict_with_generate else None,
625
  callbacks=[ShuffleCallback()] if data_args.streaming else None,
626
  )
 
627
 
628
  # 12. Training
629
  if training_args.do_train:
 
63
 
64
  wandb_token = os.environ.get("WANDB_TOKEN", "None")
65
  hf_token = os.environ.get("HF_TOKEN", None)
66
+ if hf_token is None and os.path.exists("./creds.txt"):
67
+ with open("./creds.txt", "r") as f:
68
+ lines = f.readlines()
69
+ for line in lines:
70
+ key, valye = line.split("=")
71
+ if key == "HF_TOKEN":
72
+ hf_token = value.strip()
73
+
74
  if hf_token is not None:
75
  os.makedirs("/root/.huggingface", exist_ok=True)
76
  with open("/root/.huggingface/token", "w") as f:
 
356
  model_args, data_args, training_args = parser.parse_args_into_dataclasses()
357
  training_args.do_train = True
358
  training_args.do_eval = True
359
+
360
  # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
361
  # information sent is the one passed as arguments along with your Python/PyTorch versions.
362
  send_example_telemetry("run_speech_recognition_seq2seq_streaming", model_args, data_args)
 
632
  compute_metrics=compute_metrics if training_args.predict_with_generate else None,
633
  callbacks=[ShuffleCallback()] if data_args.streaming else None,
634
  )
635
+ logger.info("*** Trainer initialized ***")
636
 
637
  # 12. Training
638
  if training_args.do_train:
test_run.sh CHANGED
@@ -3,7 +3,7 @@ python $1run_speech_recognition_seq2seq_streaming.py \
3
  --dataset_name="mozilla-foundation/common_voice_11_0" \
4
  --dataset_config_name="sv-SE" \
5
  --language="swedish" \
6
- --train_split_name="train" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Tiny Swedish" \
9
  --max_train_samples="64" \
 
3
  --dataset_name="mozilla-foundation/common_voice_11_0" \
4
  --dataset_config_name="sv-SE" \
5
  --language="swedish" \
6
+ --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Tiny Swedish" \
9
  --max_train_samples="64" \