File size: 891 Bytes
9f5d540 93c6594 9f5d540 5afd83b 9f5d540 9f13d0c 153e956 1e401c5 153e956 1e401c5 ddf950c 9f13d0c 93c6594 9f13d0c ddf950c 11dabbc 93c6594 9f5d540 93c6594 9f5d540 9f13d0c 9f5d540 5358b5a 93c6594 98b2436 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
import gradio as gr
from transformers import pipeline
# initialize ASR pipeline
asr = pipeline(
task="automatic-speech-recognition",
model="vhdm/whisper-large-fa-v1",
device=-1 # CPU; set device=0 for GPU
)
def transcribe(audio_file):
"""
audio_file: path to WAV file (Gradio mic or upload)
"""
if not audio_file:
return "No audio input detected."
try:
# run ASR
result = asr(audio_file, chunk_length_s=30, stride_length_s=[5,5])
except Exception as e:
return f"ASR error: {e}"
text = result.get("text", "")
return text
# gradio interface
iface = gr.Interface(
fn=transcribe,
inputs=gr.Audio(type="filepath", label="Record or upload audio"),
outputs="text",
title="Persian ASR",
description=""" Speak in Persian or upload an audio file."""
)
if __name__ == "__main__":
iface.launch()
|