Spaces:

futranbg
/

S2T

Sleeping

File size: 1,593 Bytes

1e5cce1
 
0330954
1e5cce1
 
 
 
2a63b9e
1e5cce1
285256f
1e5cce1
 
 
 
 
 
 
 
 
 
 
0229275
1e5cce1
b999686
7dbe2e1
 
 
 
 
1e5cce1
 
 
 
 
 
 
7dbe2e1
 
 
 
 
 
 
3a0e917
7dbe2e1
add7e1d
78d6df1

import json
import requests
import os
import traceback
import gradio as gr # Imports the Gradio library, which is used to create user interfaces for machine learning models.

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/"
    
def s2t(audio, model_name):
    with open(audio, "rb") as f:
        data = f.read()
    try:
        url = API_URL + model_name
        headers = {"Authorization": f"Bearer {HF_TOKEN}"}
        response = requests.request("POST", url, headers=headers, data=data)
        text = json.loads(response.content.decode("utf-8"))
        text = text['text']
    except:
        text = f"Transcription failed with error：\n{traceback.format_exc()}"

    yield text

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            model_name = gr.Dropdown(
                label="Models:",
                choices=[
                    "openai/whisper-large-v3",
                    "openai/whisper-large-v2",
                    "openai/whisper-large",
                    "openai/whisper-medium",
                    "openai/whisper-small",
                    "openai/whisper-base",
                    "openai/whisper-tiny",
                    ],
                value="openai/whisper-large-v3",
            )
            audio = gr.Audio(sources=["microphone","upload"], type="filepath", label="Audio")

        with gr.Column():
            output = gr.Textbox(label="Transcription results")
            
        audio.change(s2t, inputs=[audio, model_name], outputs=output)
    
demo.launch()