sanchit-gandhi HF staff commited on
Commit
0a8142a
1 Parent(s): d2f8541

Correct scripts

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. get_ctc_tokenizer.py +1 -1
  3. run_spgispeech.sh +2 -2
README.md CHANGED
@@ -11,9 +11,9 @@ To reproduce this run, first call `get_ctc_tokenizer.py` to train the CTC tokeni
11
  ```python
12
  #!/usr/bin/env bash
13
  python run_flax_speech_recognition_ctc.py \
14
- --model_name_or_path="esc/wav2vec2-pretrained" \
15
  --tokenizer_name="wav2vec2-ctc-spgispeech-tokenizer" \
16
- --dataset_name="esc/esc-datasets" \
17
  --dataset_config_name="spgispeech" \
18
  --output_dir="./" \
19
  --wandb_project="wav2vec2-ctc" \
 
11
  ```python
12
  #!/usr/bin/env bash
13
  python run_flax_speech_recognition_ctc.py \
14
+ --model_name_or_path="esc-benchmark/wav2vec2-pretrained" \
15
  --tokenizer_name="wav2vec2-ctc-spgispeech-tokenizer" \
16
+ --dataset_name="esc-benchmark/esc-datasets" \
17
  --dataset_config_name="spgispeech" \
18
  --output_dir="./" \
19
  --wandb_project="wav2vec2-ctc" \
get_ctc_tokenizer.py CHANGED
@@ -19,7 +19,7 @@ tokenizer_name = f"wav2vec2-ctc-{dataset_name}-tokenizer"
19
  cutoff_freq = 0.01
20
 
21
  dataset = load_dataset(
22
- "esc/esc-datasets",
23
  dataset_name,
24
  split=split,
25
  use_auth_token=use_auth_token,
 
19
  cutoff_freq = 0.01
20
 
21
  dataset = load_dataset(
22
+ "esc-benchmark/esc-datasets",
23
  dataset_name,
24
  split=split,
25
  use_auth_token=use_auth_token,
run_spgispeech.sh CHANGED
@@ -1,8 +1,8 @@
1
  #!/usr/bin/env bash
2
  python run_flax_speech_recognition_ctc.py \
3
- --model_name_or_path="esc/wav2vec2-pretrained" \
4
  --tokenizer_name="wav2vec2-ctc-spgispeech-tokenizer" \
5
- --dataset_name="esc/esc-datasets" \
6
  --dataset_config_name="spgispeech" \
7
  --output_dir="./" \
8
  --wandb_project="wav2vec2-ctc" \
 
1
  #!/usr/bin/env bash
2
  python run_flax_speech_recognition_ctc.py \
3
+ --model_name_or_path="esc-benchmark/wav2vec2-pretrained" \
4
  --tokenizer_name="wav2vec2-ctc-spgispeech-tokenizer" \
5
+ --dataset_name="esc-benchmark/esc-datasets" \
6
  --dataset_config_name="spgispeech" \
7
  --output_dir="./" \
8
  --wandb_project="wav2vec2-ctc" \