ArthurZ HF staff commited on
Commit
7a824db
1 Parent(s): 56b0c7d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -189,7 +189,7 @@ To each task corresponds a sequence of tokens that are given to the decoder as *
189
 
190
  # Usage
191
 
192
- To transcribe or translate audio files, the model has to be used along a `WhisperFeatureExtractor`.
193
 
194
 
195
  ## Transcription
@@ -238,7 +238,7 @@ transcription.
238
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
239
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
240
  >>> input_speech = next(iter(ds))["audio"]["array"]
241
- >>> # tokenize
242
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
243
  >>> predicted_ids = model.generate(input_features)
244
  >>> transcription = processor.batch_decode(predicted_ids)
@@ -268,7 +268,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
268
  >>> input_speech = next(iter(ds))["audio"]["array"]
269
  >>> # tokenize
270
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
271
- >>> forced_decoder_ids = processor._get_decoder_prompt_ids(language = "fr", task = "translate")
272
 
273
  >>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
274
  >>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
 
189
 
190
  # Usage
191
 
192
+ To transcribe or translate audio files, the model has to be used along a `WhisperProcessor`. The `WhisperProcessor.get_decoder_prompt_ids` function is used to get a list of `( idx, token )` tuples, which can either be set in the config, or directly passed to the generate function, as `forced_decoder_ids`.
193
 
194
 
195
  ## Transcription
 
238
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
239
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
240
  >>> input_speech = next(iter(ds))["audio"]["array"]
241
+ >>> model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "transcribe")
242
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
243
  >>> predicted_ids = model.generate(input_features)
244
  >>> transcription = processor.batch_decode(predicted_ids)
 
268
  >>> input_speech = next(iter(ds))["audio"]["array"]
269
  >>> # tokenize
270
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
271
+ >>> forced_decoder_ids = processor.get_decoder_prompt_ids(language = "fr", task = "translate")
272
 
273
  >>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
274
  >>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)