Spaces:
Sleeping
Sleeping
File size: 1,334 Bytes
e1a10bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
from transformers import pipeline
import gradio as gr
from pydub import AudioSegment
import os
# Initialize the pipeline
pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked"
# Function to split audio into chunks
def split_audio(audio_path, chunk_length_ms):
audio = AudioSegment.from_file(audio_path)
chunks = []
for i in range(0, len(audio), chunk_length_ms):
chunk = audio[i:i + chunk_length_ms]
chunks.append(chunk)
return chunks
# Function to transcribe audio
def transcribe(audio_path):
chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds
audio_chunks = split_audio(audio_path, chunk_length_ms)
full_transcript = ""
for i, chunk in enumerate(audio_chunks):
temp_file = f"temp_chunk_{i}.wav"
chunk.export(temp_file, format="wav")
text = pipe(temp_file)["text"]
full_transcript += text + " "
os.remove(temp_file) # Clean up the temporary file
return full_transcript.strip()
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs="text",
title="Whisper Small English",
description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.",
)
# Launch the interface
iface.launch()
|