whisper-nb / app.py
Tollef Jørgensen
first test of nb small
253c4ed
import gradio as gr
from transformers import pipeline
import numpy as np
model_id = "NbAiLab/nb-whisper-small-beta"
transcriber = pipeline("automatic-speech-recognition", model=model_id)
total_time = 0
counter = 0
def make_timestamp(ref):
global total_time
hh = int((total_time + ref) / 3600)
mm = int((total_time + ref) / 60) % 60
ss = int((total_time + ref) % 60)
mmm = int((total_time + ref) % 1000)
return f"{hh:02d}:{mm:02d}:{ss:02d},{mmm:03d}"
def transcribe(audio):
global counter
global total_time
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
conf = {"sampling_rate": sr, "raw": y}
kwargs = {"task": "transcribe", "language": "no"}
res = transcriber(conf, generate_kwargs=kwargs, return_timestamps=True)
chunks = res["chunks"]
timestamps = [c["timestamp"] for c in chunks]
text = [c["text"].strip() for c in chunks]
entries = []
for (start, end), txt in zip(timestamps, text):
start_srt = make_timestamp(start)
end_srt = make_timestamp(end)
srt_entry = f"{counter}\n{start_srt} --> {end_srt}\n{txt}\n"
entries.append(srt_entry)
total_time += end
counter += 1
return "\n".join(entries)
demo = gr.Interface(
transcribe,
gr.Audio(source="microphone"),
"text",
)
demo.launch()