Update run_speech_recognition_ctc.py
Browse files- run_speech_recognition_ctc.py +15 -13
run_speech_recognition_ctc.py
CHANGED
@@ -125,7 +125,9 @@ class ModelArguments:
|
|
125 |
ctc_loss_reduction: Optional[str] = field(
|
126 |
default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
|
127 |
)
|
128 |
-
|
|
|
|
|
129 |
|
130 |
@dataclass
|
131 |
class DataTrainingArguments:
|
@@ -395,16 +397,16 @@ def main():
|
|
395 |
import re
|
396 |
def filter_numeric(entry):
|
397 |
return (
|
398 |
-
"0" not in
|
399 |
-
and "1" not in
|
400 |
-
and "2" not in
|
401 |
-
and "3" not in
|
402 |
-
and "4" not in
|
403 |
-
and "5" not in
|
404 |
-
and "6" not in
|
405 |
-
and "7" not in
|
406 |
-
and "8" not in
|
407 |
-
and "9" not in
|
408 |
)
|
409 |
|
410 |
def filter_inaudible(entry):
|
@@ -415,7 +417,7 @@ def main():
|
|
415 |
|
416 |
def filter_tooshort(entry):
|
417 |
#print(f"The audio sample ({entry["audio"]["path"]}) is too small, and has been omitted. "
|
418 |
-
return len(entry["text"])
|
419 |
|
420 |
def map_dataset(entry):
|
421 |
batch = {"text": entry["text"].lower()}
|
@@ -586,7 +588,7 @@ def main():
|
|
586 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
587 |
"layerdrop": model_args.layerdrop,
|
588 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
589 |
-
"ctc_zero_infinity":
|
590 |
"pad_token_id": tokenizer.pad_token_id,
|
591 |
"vocab_size": len(tokenizer),
|
592 |
"activation_dropout": model_args.activation_dropout,
|
|
|
125 |
ctc_loss_reduction: Optional[str] = field(
|
126 |
default="mean", metadata={"help": "The way the ctc loss should be reduced. Should be one of 'mean' or 'sum'."}
|
127 |
)
|
128 |
+
ctc_zero_infinity Optional[bool] = field(
|
129 |
+
default=False, metadata={"help": "If True, will try yo aboud the CTC loss goinf to infinity."}
|
130 |
+
)
|
131 |
|
132 |
@dataclass
|
133 |
class DataTrainingArguments:
|
|
|
397 |
import re
|
398 |
def filter_numeric(entry):
|
399 |
return (
|
400 |
+
"0" not in entry["text"]
|
401 |
+
and "1" not in entry["text"]
|
402 |
+
and "2" not in entry["text"]
|
403 |
+
and "3" not in entry["text"]
|
404 |
+
and "4" not in entry["text"]
|
405 |
+
and "5" not in entry["text"]
|
406 |
+
and "6" not in entry["text"]
|
407 |
+
and "7" not in entry["text"]
|
408 |
+
and "8" not in entry["text"]
|
409 |
+
and "9" not in entry["text"]
|
410 |
)
|
411 |
|
412 |
def filter_inaudible(entry):
|
|
|
417 |
|
418 |
def filter_tooshort(entry):
|
419 |
#print(f"The audio sample ({entry["audio"]["path"]}) is too small, and has been omitted. "
|
420 |
+
return (len(entry["text"]) <= len(entry["audio"]["array"]) // 320) and (len(entry["text"].strip()) >= 3)
|
421 |
|
422 |
def map_dataset(entry):
|
423 |
batch = {"text": entry["text"].lower()}
|
|
|
588 |
"gradient_checkpointing": training_args.gradient_checkpointing,
|
589 |
"layerdrop": model_args.layerdrop,
|
590 |
"ctc_loss_reduction": model_args.ctc_loss_reduction,
|
591 |
+
"ctc_zero_infinity": model_args.ctc_zero_infinity,
|
592 |
"pad_token_id": tokenizer.pad_token_id,
|
593 |
"vocab_size": len(tokenizer),
|
594 |
"activation_dropout": model_args.activation_dropout,
|