import gradio as gr import requests import json def extract_words(json_data): words = [item['word'] for item in json_data] return ''.join(words) def transcribe_audio(file): api_key = "sk-proj-6J4EfDKmIduACsB38LXlT3BlbkFJSVTYDzfbVeAvbKHp1l1z" url = "https://api.openai.com/v1/audio/transcriptions" headers = { "Authorization": f"Bearer {api_key}" } files = { "file": file, "model": (None, "whisper-1"), "language": (None, "ja"), "response_format": (None, "verbose_json"), "timestamp_granularities": (None, "word") } response = requests.post(url, headers=headers, files=files) response_json = response.json() if "words" in response_json: return extract_words(response_json['words']) else: return "Error in transcription: " + response_json.get("error", {}).get("message", "Unknown error") iface = gr.Interface( fn=transcribe_audio, inputs=gr.inputs.Audio(source="upload", type="file"), outputs="text", title="音声文字起こし", description="音声ファイルをアップロードして、文字起こしを表示します" ) iface.launch()