File size: 2,766 Bytes
7b18d60
 
502159a
eb134bd
06b50d8
7b18d60
9038461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a09ccc5
9038461
933c61b
9038461
 
 
 
 
06b50d8
9038461
 
 
 
 
6942b70
9038461
06b50d8
9038461
 
 
 
 
 
 
 
 
 
 
 
06b50d8
9038461
 
a09ccc5
9038461
 
 
 
 
06b50d8
 
9038461
 
 
06b50d8
 
9038461
 
 
933c61b
6942b70
9038461
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg

def main():
    # Check if GPU is available
    use_gpu = torch.cuda.is_available()

    # Configure the pipeline to use the GPU if available
    if use_gpu:
        p = pipeline(
            "automatic-speech-recognition",
            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h",
            device=0
        )
    else:
        p = pipeline(
            "automatic-speech-recognition",
            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
        )

    def extract_audio_from_m3u8(url):
        try:
            output_file = "output_audio.aac"
            ffmpeg.input(url).output(output_file).run(overwrite_output=True)
            return output_file
        except Exception as e:
            return f"Hendan villan hendi: {e}"

    def transcribe_function(audio, state, m3u8_url):
        if m3u8_url:
            audio = extract_audio_from_m3u8(m3u8_url)

        if not audio:
            # Return a meaningful message; no audio found
            return state, "Einki ljóð er til talukenning."

        try:
            time.sleep(3)
            text = p(audio, chunk_length_s=50)["text"]
            state += text + "\n"
            return state, text
        except Exception as e:
            return state, "Okkurt riggaði ikki í talukenningini."

    def reset_output(transcription, state):
        """Function to reset the state to an empty string."""
        return "", ""

    with gr.Blocks() as demo:
        state_var = gr.State("")

        with gr.Row():
            with gr.Column():
                microphone = gr.Audio(
                    type="filepath",
                    label="Mikrofon ella ljóðfíla"
                )
                m3u8_url = gr.Textbox(
                    label="m3u8-leinki (t.d. frá kvf.fo ella logting.fo)"
                )

            with gr.Column():
                transcription_var = gr.Textbox(
                    type="text",
                    label="Tekstur frá talukennara",
                    interactive=False
                )

        with gr.Row():
            transcribe_button = gr.Button("Byrja talukenning")
            reset_button = gr.Button("Strika tekst frá talukennara")

        transcribe_button.click(
            transcribe_function,
            [microphone, state_var, m3u8_url],  # Removed uploaded_audio
            [state_var, transcription_var]
        )

        reset_button.click(
            reset_output,
            [transcription_var, state_var],
            [transcription_var, state_var]
        )

    # Launch with the latest Gradio features
    demo.launch()

if __name__ == "__main__":
    main()