lyhourt commited on
Commit
c1b9b25
1 Parent(s): d5d2a64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -4
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  import torch
 
3
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
 
5
  model_id = "lyhourt/whisper-small-clean_6-v4"
@@ -18,16 +19,28 @@ pipe = pipeline(
18
  tokenizer=processor.tokenizer,
19
  feature_extractor=processor.feature_extractor,
20
  max_new_tokens=128,
21
- chunk_length_s=30,
22
  batch_size=16,
23
  return_timestamps=True,
24
  torch_dtype=torch_dtype,
25
  device=device,
26
  )
27
 
28
- def transcribe(audio):
29
- text = pipe(audio)["text"]
30
- return text
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  iface = gr.Interface(
33
  fn=transcribe,
 
1
  import gradio as gr
2
  import torch
3
+ import torchaudio
4
  from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
5
 
6
  model_id = "lyhourt/whisper-small-clean_6-v4"
 
19
  tokenizer=processor.tokenizer,
20
  feature_extractor=processor.feature_extractor,
21
  max_new_tokens=128,
22
+ chunk_length_s=30, # You can increase this if needed
23
  batch_size=16,
24
  return_timestamps=True,
25
  torch_dtype=torch_dtype,
26
  device=device,
27
  )
28
 
29
+ def transcribe(audio_path):
30
+ waveform, sample_rate = torchaudio.load(audio_path)
31
+ # Split the audio into chunks of 30 seconds (or your desired chunk length)
32
+ chunk_length = 30 * sample_rate # 30 seconds
33
+ chunks = [waveform[:, i:i + chunk_length] for i in range(0, waveform.size(1), chunk_length)]
34
+
35
+ texts = []
36
+ for chunk in chunks:
37
+ chunk = chunk.to(device)
38
+ text = pipe(chunk)["text"]
39
+ texts.append(text)
40
+
41
+ # Concatenate all texts
42
+ full_text = " ".join(texts)
43
+ return full_text
44
 
45
  iface = gr.Interface(
46
  fn=transcribe,