Spaces:

zanemotiwala
/

audio-to-text

Running

File size: 1,427 Bytes

7400283
9854ee3
a4e347a
a8a6273
a4e347a
 
 
 
ccee400
 
 
9854ee3
7400283
9854ee3
ccee400
 
9854ee3
 
 
 
 
 
7400283
8876f3b
72823aa
 
cea3a6b
84b6f15
72823aa
63cf35c
ee2727c
72823aa
 
7f412be
4d6774a
7400283

import gradio as gr
import logging
from transformers import pipeline
import torch

asr = pipeline(task="automatic-speech-recognition",
               model="distil-whisper/distil-small.en")

# Adjusted function assuming 'asr' expects a file path as input
def transcribe_speech(audio_file_path):
    if not audio_file_path:
        logging.error("No audio file provided.")
        return "No audio found, please retry."
    try:
        logging.info(f"Processing file: {audio_file_path}")
        output = asr(audio_file_path)  # Assuming `asr` directly takes a file path
        return output["text"]
    except Exception as e:
        logging.error(f"Error during transcription: {str(e)}")
        return f"Error processing the audio file: {str(e)}"

logging.basicConfig(level=logging.INFO)

with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("# Simple Speech Recognition App")
    with gr.Row():
        gr.Markdown("### This app allows you to record or upload audio and see its transcription.")
    with gr.Row():
        mic = gr.Audio(label="Record from Microphone or Upload File", type="filepath")
        transcribe_button = gr.Button("Transcribe Audio")
    with gr.Row():
        transcription = gr.Textbox(label="Transcription", lines=3, placeholder="Transcription will appear here...")
        
    transcribe_button.click(transcribe_speech, inputs=mic, outputs=transcription)

demo.launch(share=True)