import gradio as gr
import requests

def extract_first_text(json_data):
    return json_data.get('text', '')

def transcribe_audio(file):
    api_key = "sk-proj-6J4EfDKmIduACsB38LXlT3BlbkFJSVTYDzfbVeAvbKHp1l1z"
    url = "https://api.openai.com/v1/audio/transcriptions"
    headers = {
        "Authorization": f"Bearer {api_key}"
    }
    
    # ファイルをバイナリモードで読み取る
    with open(file.name, 'rb') as f:
        files = {
            "file": (file.name, f, 'audio/wav'),
            "model": (None, "whisper-1"),
            "language": (None, "ja"),
            "response_format": (None, "verbose_json"),
            "timestamp_granularities": (None, "word")
        }
        
        response = requests.post(url, headers=headers, files=files)
        print(response)
    
    response_json = response.json()
    extract_first_text(response_json)
    
    return extract_first_text(response_json)

with gr.Blocks() as demo:
    gr.Markdown("# 音声文字起こし")
    gr.Markdown("音声ファイルをアップロードして、文字起こしを表示します")
    
    with gr.Row():
        audio_input = gr.File(label="wavファイルをアップロード")
        text_output = gr.Textbox(label="文字起こし結果")
    
    transcribe_button = gr.Button("文字起こし")
    
    transcribe_button.click(
        fn=transcribe_audio,
        inputs=audio_input,
        outputs=text_output
    )

demo.launch()