Spaces:

barbaroo
/

ASR_Faroese

Sleeping

File size: 2,525 Bytes

7b18d60
 
502159a
eb134bd
d9a4d6b
7b18d60
68a9c43
 
587a51a
68a9c43
 
0c0c610
 
68a9c43
0c0c610
 
0659665
 
 
 
 
 
 
 
 
0c0c610
0659665
 
d9a4d6b
ebd3d99
65129d9
d9a4d6b
ebd3d99
d9a4d6b
 
ebd3d99
d9a4d6b
 
 
 
ebd3d99
d9a4d6b
 
 
 
0c0c610
 
 
12974e0
1d903b8
d9a4d6b
 
0c0c610
 
 
 
 
12974e0
0c0c610
 
12974e0
0c0c610
 
 
12974e0
0c0c610
 
 
 
 
12974e0
0c0c610
 
 
 
 
12974e0
1d903b8

import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg  # Make sure it's ffmpeg-python

# Check if GPU is available
use_gpu = torch.cuda.is_available()
 
# Configure the pipeline to use the GPU if available
if use_gpu:
    p = pipeline("automatic-speech-recognition",
                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
else:
    p = pipeline("automatic-speech-recognition",
                 model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")

def extract_audio_from_m3u8(url):
    try:
        output_file = "output_audio.aac"
        ffmpeg.input(url).output(output_file).run(overwrite_output=True)
        return output_file
    except Exception as e:
        return f"An error occurred: {e}"

def transcribe_function(audio, state, uploaded_audio, m3u8_url):
    if m3u8_url:
        audio = extract_audio_from_m3u8(m3u8_url)

    if uploaded_audio is not None:
        audio = uploaded_audio

    if not audio:
        return {state_var: state, transcription_var: state}  # Return a meaningful message

    try:
        time.sleep(3)
        text = p(audio, chunk_length_s= 50)["text"]
        state += text + "\n"
        return {state_var: state, transcription_var: state}
    except Exception as e:
        return {transcription_var: "An error occurred during transcription.", state_var: state}  # Handle other exceptions

# ... [most of your code remains unchanged]

def reset_output(transcription, state):
    """Function to reset the state to an empty string."""
    return "", ""

with gr.Blocks() as demo:
    state_var = gr.State("")

    with gr.Row():
        with gr.Column():
            microphone = gr.Audio(source="microphone", type="filepath", label="Microphone")
            uploaded_audio = gr.Audio(label="Upload Audio File", type="filepath", source="upload")
            m3u8_url = gr.Textbox(label="m3u8 URL | E.g.: from kvf.fo or logting.fo")

        with gr.Column():
            transcription_var = gr.Textbox(type="text", label="Transcription", readonly=True)

    with gr.Row():
        transcribe_button = gr.Button("Transcribe")
        reset_button = gr.Button("Reset output")

    transcribe_button.click(
        transcribe_function,
        [microphone, state_var, uploaded_audio, m3u8_url],
        [transcription_var, state_var]
    )

    reset_button.click(
        reset_output,
        [transcription_var, state_var],
        [transcription_var, state_var]
    )

demo.launch()