dentalTransit / app.py
srirama's picture
gradio demo
e1a10bd
raw
history blame contribute delete
No virus
1.33 kB
from transformers import pipeline
import gradio as gr
from pydub import AudioSegment
import os
# Initialize the pipeline
pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked"
# Function to split audio into chunks
def split_audio(audio_path, chunk_length_ms):
audio = AudioSegment.from_file(audio_path)
chunks = []
for i in range(0, len(audio), chunk_length_ms):
chunk = audio[i:i + chunk_length_ms]
chunks.append(chunk)
return chunks
# Function to transcribe audio
def transcribe(audio_path):
chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds
audio_chunks = split_audio(audio_path, chunk_length_ms)
full_transcript = ""
for i, chunk in enumerate(audio_chunks):
temp_file = f"temp_chunk_{i}.wav"
chunk.export(temp_file, format="wav")
text = pipe(temp_file)["text"]
full_transcript += text + " "
os.remove(temp_file) # Clean up the temporary file
return full_transcript.strip()
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources=["microphone"], type="filepath"),
outputs="text",
title="Whisper Small English",
description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.",
)
# Launch the interface
iface.launch()