Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
|
|
3 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
4 |
|
5 |
model_id = "lyhourt/whisper-small-clean_6-v4"
|
@@ -18,16 +19,28 @@ pipe = pipeline(
|
|
18 |
tokenizer=processor.tokenizer,
|
19 |
feature_extractor=processor.feature_extractor,
|
20 |
max_new_tokens=128,
|
21 |
-
chunk_length_s=30,
|
22 |
batch_size=16,
|
23 |
return_timestamps=True,
|
24 |
torch_dtype=torch_dtype,
|
25 |
device=device,
|
26 |
)
|
27 |
|
28 |
-
def transcribe(
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
iface = gr.Interface(
|
33 |
fn=transcribe,
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
import torchaudio
|
4 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
|
5 |
|
6 |
model_id = "lyhourt/whisper-small-clean_6-v4"
|
|
|
19 |
tokenizer=processor.tokenizer,
|
20 |
feature_extractor=processor.feature_extractor,
|
21 |
max_new_tokens=128,
|
22 |
+
chunk_length_s=30, # You can increase this if needed
|
23 |
batch_size=16,
|
24 |
return_timestamps=True,
|
25 |
torch_dtype=torch_dtype,
|
26 |
device=device,
|
27 |
)
|
28 |
|
29 |
+
def transcribe(audio_path):
|
30 |
+
waveform, sample_rate = torchaudio.load(audio_path)
|
31 |
+
# Split the audio into chunks of 30 seconds (or your desired chunk length)
|
32 |
+
chunk_length = 30 * sample_rate # 30 seconds
|
33 |
+
chunks = [waveform[:, i:i + chunk_length] for i in range(0, waveform.size(1), chunk_length)]
|
34 |
+
|
35 |
+
texts = []
|
36 |
+
for chunk in chunks:
|
37 |
+
chunk = chunk.to(device)
|
38 |
+
text = pipe(chunk)["text"]
|
39 |
+
texts.append(text)
|
40 |
+
|
41 |
+
# Concatenate all texts
|
42 |
+
full_text = " ".join(texts)
|
43 |
+
return full_text
|
44 |
|
45 |
iface = gr.Interface(
|
46 |
fn=transcribe,
|