import gradio as gr import requests def extract_first_text(json_data): return json_data.get('text', '') def transcribe_audio(file): api_key = "sk-proj-6J4EfDKmIduACsB38LXlT3BlbkFJSVTYDzfbVeAvbKHp1l1z" url = "https://api.openai.com/v1/audio/transcriptions" headers = { "Authorization": f"Bearer {api_key}" } # ファイルをバイナリモードで読み取る with open(file.name, 'rb') as f: files = { "file": (file.name, f, 'audio/wav'), "model": (None, "whisper-1"), "language": (None, "ja"), "response_format": (None, "verbose_json"), "timestamp_granularities": (None, "word") } response = requests.post(url, headers=headers, files=files) print(response) response_json = response.json() extract_first_text(response_json) return extract_first_text(response_json) with gr.Blocks() as demo: gr.Markdown("# 音声文字起こし") gr.Markdown("音声ファイルをアップロードして、文字起こしを表示します") with gr.Row(): audio_input = gr.File(label="wavファイルをアップロード") text_output = gr.Textbox(label="文字起こし結果") transcribe_button = gr.Button("文字起こし") transcribe_button.click( fn=transcribe_audio, inputs=audio_input, outputs=text_output ) demo.launch()