Daniel Tse commited on
Commit
bf628b5
1 Parent(s): 36688d2

Use whisper

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -25,15 +25,16 @@ def transcribe_audio(audiofile):
25
  podcast_chunks = podcast[::chunk_length_five_minutes]
26
 
27
  st.info('Transcribing...')
 
28
 
29
- #transcriptions = []
30
-
31
- #for i, chunk in enumerate(podcast_chunks):
32
- # chunk.export(f'output/chunk_{i}.mp4', format='mp4')
33
-
34
- # following blogpost here: https://huggingface.co/blog/asr-chunking
35
- transcribe_pipe = pipeline(model="facebook/wav2vec2-base-960h")
36
- transcription = transcribe_pipe(audiofile, chunk_length_s=10, stride_length_s=(4, 2))
37
 
38
  st.session_state['transcription'] = transcription
39
  print(f"transcription: {transcription}")
 
25
  podcast_chunks = podcast[::chunk_length_five_minutes]
26
 
27
  st.info('Transcribing...')
28
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
29
 
30
+ pipe = pipeline(
31
+ "automatic-speech-recognition",
32
+ model="openai/whisper-large-v2",
33
+ chunk_length_s=30,
34
+ device=device,
35
+ )
36
+
37
+ transcription = pipe(audiofile, batch_size=8)["text"]
38
 
39
  st.session_state['transcription'] = transcription
40
  print(f"transcription: {transcription}")