Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,771 Bytes
5d52c32 6c226f9 d790c0b 88183ad 1e8d252 6cd6646 6c226f9 17f14b2 f696e7e 6c226f9 f696e7e 6c226f9 5d52c32 3da85d4 1e8d252 6cd6646 1e8d252 6cd6646 1e8d252 15b00fb 3da85d4 3df1d51 72be79d 46704ba 4731eae 1e8d252 15b00fb 3da85d4 3df1d51 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import spaces
import torch
import gradio as gr
from transformers import pipeline
import tempfile
import os
import uuid
import scipy.io.wavfile
MODEL_NAME = "ylacombe/whisper-large-v3-turbo"
BATCH_SIZE = 8
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
@spaces.GPU
def transcribe(inputs, previous_transcription):
try:
# Generate a unique filename using UUID
filename = f"{uuid.uuid4().hex}.wav"
filepath = os.path.join(tempfile.gettempdir(), filename)
# Extract sample rate and audio data from the tuple
sample_rate, audio_data = inputs
# Save the audio data to the temporary file
scipy.io.wavfile.write(filepath, sample_rate, audio_data)
previous_transcription += pipe(filepath, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)["text"]
# Remove the temporary file after transcription
os.remove(filepath)
return previous_transcription
except Exception as e:
print(f"Error during transcription: {e}")
return previous_transcription
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(f"# Realtime Whisper Large V3 Turbo: Transcribe Audio\n Transcribe inputs in Realtime. This Demo uses the checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers.")
input_audio_microphone = gr.Audio(streaming=True)
output = gr.Textbox(label="Transcription", value="")
input_audio_microphone.stream(transcribe, [input_audio_microphone, output], [output], time_limit=45, stream_every=2, concurrency_limit=None)
demo.queue().launch() |