File size: 1,593 Bytes
1e5cce1
 
0330954
1e5cce1
 
 
 
2a63b9e
1e5cce1
285256f
1e5cce1
 
 
 
 
 
 
 
 
 
 
0229275
1e5cce1
b999686
7dbe2e1
 
 
 
 
1e5cce1
 
 
 
 
 
 
7dbe2e1
 
 
 
 
 
 
3a0e917
7dbe2e1
add7e1d
78d6df1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import json
import requests
import os
import traceback
import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models.

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/"
    
def s2t(audio, model_name):
    with open(audio, "rb") as f:
        data = f.read()
    try:
        url = API_URL + model_name
        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        response = requests.request("POST", url, headers=headers, data=data)
        text = json.loads(response.content.decode("utf-8"))
        text = text['text']
    except:
        text = f"Transcription failed with error:\n{traceback.format_exc()}"

    yield text

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            model_name = gr.Dropdown(
                label="Models:",
                choices=[
                    "openai/whisper-large-v3",
                    "openai/whisper-large-v2",
                    "openai/whisper-large",
                    "openai/whisper-medium",
                    "openai/whisper-small",
                    "openai/whisper-base",
                    "openai/whisper-tiny",
                    ],
                value="openai/whisper-large-v3",
            )
            audio = gr.Audio(sources=["microphone","upload"], type="filepath", label="Audio")

        with gr.Column():
            output = gr.Textbox(label="Transcription results")
            
        audio.change(s2t, inputs=[audio, model_name], outputs=output)
    
demo.launch()