Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
from pydub import AudioSegment | |
import os | |
# Initialize the pipeline | |
pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked" | |
# Function to split audio into chunks | |
def split_audio(audio_path, chunk_length_ms): | |
audio = AudioSegment.from_file(audio_path) | |
chunks = [] | |
for i in range(0, len(audio), chunk_length_ms): | |
chunk = audio[i:i + chunk_length_ms] | |
chunks.append(chunk) | |
return chunks | |
# Function to transcribe audio | |
def transcribe(audio_path): | |
chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds | |
audio_chunks = split_audio(audio_path, chunk_length_ms) | |
full_transcript = "" | |
for i, chunk in enumerate(audio_chunks): | |
temp_file = f"temp_chunk_{i}.wav" | |
chunk.export(temp_file, format="wav") | |
text = pipe(temp_file)["text"] | |
full_transcript += text + " " | |
os.remove(temp_file) # Clean up the temporary file | |
return full_transcript.strip() | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
outputs="text", | |
title="Whisper Small English", | |
description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.", | |
) | |
# Launch the interface | |
iface.launch() | |