gchhablani commited on
Commit
fb1fe48
1 Parent(s): a5716eb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -24
README.md CHANGED
@@ -87,7 +87,7 @@ processor = Wav2Vec2Processor.from_pretrained("gchhablani/wav2vec2-large-xlsr-pt
87
  model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-pt")
88
  model.to("cuda")
89
 
90
- chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“\'\�]'
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
 
93
  # Preprocessing the datasets.
@@ -126,29 +126,29 @@ The Common Voice `train` and `validation` datasets were used for training. The s
126
 
127
  ```bash
128
  #!/usr/bin/env bash
129
- python run_common_voice.py \\
130
- --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \\
131
- --dataset_config_name="pt" \\
132
- --output_dir=/workspace/output_models/pt/wav2vec2-large-xlsr-pt \\
133
- --cache_dir=/workspace/data \\
134
- --overwrite_output_dir \\
135
- --num_train_epochs="30" \\
136
- --per_device_train_batch_size="32" \\
137
- --per_device_eval_batch_size="32" \\
138
- --evaluation_strategy="steps" \\
139
- --learning_rate="3e-4" \\
140
- --warmup_steps="500" \\
141
- --fp16 \\
142
- --freeze_feature_extractor \\
143
- --save_steps="500" \\
144
- --eval_steps="500" \\
145
- --save_total_limit="1" \\
146
- --logging_steps="500" \\
147
- --group_by_length \\
148
- --feat_proj_dropout="0.0" \\
149
- --layerdrop="0.1" \\
150
- --gradient_checkpointing \\
151
- --do_train --do_eval \\
152
  ```
153
 
154
  Notebook containing the evaluation can be found [here](https://colab.research.google.com/drive/14e-zNK_5pm8EMY9EbeZerpHx7WsGycqG?usp=sharing).
87
  model = Wav2Vec2ForCTC.from_pretrained("gchhablani/wav2vec2-large-xlsr-pt")
88
  model.to("cuda")
89
 
90
+ chars_to_ignore_regex = '[\,\?\.\!\-\;\;\"\“\'\�]'
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
 
93
  # Preprocessing the datasets.
126
 
127
  ```bash
128
  #!/usr/bin/env bash
129
+ python run_common_voice.py \
130
+ --model_name_or_path="facebook/wav2vec2-large-xlsr-53" \
131
+ --dataset_config_name="pt" \
132
+ --output_dir=/workspace/output_models/pt/wav2vec2-large-xlsr-pt \
133
+ --cache_dir=/workspace/data \
134
+ --overwrite_output_dir \
135
+ --num_train_epochs="30" \
136
+ --per_device_train_batch_size="32" \
137
+ --per_device_eval_batch_size="32" \
138
+ --evaluation_strategy="steps" \
139
+ --learning_rate="3e-4" \
140
+ --warmup_steps="500" \
141
+ --fp16 \
142
+ --freeze_feature_extractor \
143
+ --save_steps="500" \
144
+ --eval_steps="500" \
145
+ --save_total_limit="1" \
146
+ --logging_steps="500" \
147
+ --group_by_length \
148
+ --feat_proj_dropout="0.0" \
149
+ --layerdrop="0.1" \
150
+ --gradient_checkpointing \
151
+ --do_train --do_eval \
152
  ```
153
 
154
  Notebook containing the evaluation can be found [here](https://colab.research.google.com/drive/14e-zNK_5pm8EMY9EbeZerpHx7WsGycqG?usp=sharing).