ArthurZ HF staff commited on
Commit
f63ed3d
1 Parent(s): 9da6161

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -6
README.md CHANGED
@@ -236,15 +236,13 @@ transcription.
236
  >>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
237
  >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
238
 
239
- >>> decoder_input_ids = processor.tokenizer.encode("<|startoftranscript|><|fr|><|transcribe|><|notimestamps|>", return_tensors="pt")
240
-
241
  >>> # load dummy dataset and read soundfiles
242
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
243
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
244
  >>> input_speech = next(iter(ds))["audio"]["array"]
245
  >>> # tokenize
246
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
247
- >>> predicted_ids = model.generate(input_features, decoder_input_ids = decoder_input_ids, max_lenght = 460_000)
248
  >>> transcription = processor.batch_decode(predicted_ids)
249
  ['<|startoftranscript|><|fr|><|transcribe|><|notimestamps|> Un vrai travail intéressant va enfin être mené sur ce sujet.<|endoftext|>']
250
 
@@ -266,15 +264,15 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
266
  >>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
267
  >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
268
 
269
- >>> decoder_input_ids = processor.tokenizer.encode("<|startoftranscript|><|fr|><|translate|><|notimestamps|>", return_tensors="pt")
270
-
271
  >>> # load dummy dataset and read soundfiles
272
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
273
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
274
  >>> input_speech = next(iter(ds))["audio"]["array"]
275
  >>> # tokenize
276
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
277
- >>> predicted_ids = model.generate(input_features, decoder_input_ids = decoder_input_ids, max_lenght = 460_000)
 
 
278
  >>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
279
  [' A real interesting work will be done on this subject.']
280
  ```
 
236
  >>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
237
  >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
238
 
 
 
239
  >>> # load dummy dataset and read soundfiles
240
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
241
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
242
  >>> input_speech = next(iter(ds))["audio"]["array"]
243
  >>> # tokenize
244
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
245
+ >>> predicted_ids = model.generate(input_features)
246
  >>> transcription = processor.batch_decode(predicted_ids)
247
  ['<|startoftranscript|><|fr|><|transcribe|><|notimestamps|> Un vrai travail intéressant va enfin être mené sur ce sujet.<|endoftext|>']
248
 
 
264
  >>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
265
  >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
266
 
 
 
267
  >>> # load dummy dataset and read soundfiles
268
  >>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
269
  >>> ds = ds.cast_column("audio", datasets.Audio(sampling_rate=16_000))
270
  >>> input_speech = next(iter(ds))["audio"]["array"]
271
  >>> # tokenize
272
  >>> input_features = processor(input_speech, return_tensors="pt").input_features
273
+ >>> forced_decoder_ids = processor._get_decoder_prompt_ids(language = "fr", task = "translate")
274
+
275
+ >>> predicted_ids = model.generate(input_features, forced_decoder_ids = forced_decoder_ids)
276
  >>> transcription = processor.batch_decode(predicted_ids, skip_special_tokens = True)
277
  [' A real interesting work will be done on this subject.']
278
  ```