from transformers import pipeline import gradio as gr from pydub import AudioSegment import os # Initialize the pipeline pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked" # Function to split audio into chunks def split_audio(audio_path, chunk_length_ms): audio = AudioSegment.from_file(audio_path) chunks = [] for i in range(0, len(audio), chunk_length_ms): chunk = audio[i:i + chunk_length_ms] chunks.append(chunk) return chunks # Function to transcribe audio def transcribe(audio_path): chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds audio_chunks = split_audio(audio_path, chunk_length_ms) full_transcript = "" for i, chunk in enumerate(audio_chunks): temp_file = f"temp_chunk_{i}.wav" chunk.export(temp_file, format="wav") text = pipe(temp_file)["text"] full_transcript += text + " " os.remove(temp_file) # Clean up the temporary file return full_transcript.strip() iface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources=["microphone"], type="filepath"), outputs="text", title="Whisper Small English", description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.", ) # Launch the interface iface.launch()