Spaces:

yachimat
/

easy-whisper

Sleeping

App Files Files Community

yachimat commited on May 6

Commit

2cbdc24

•

1 Parent(s): 528a96f

Add application file

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import gradio as gr
+import os
+from openai import OpenAI
+from io import BytesIO
+from pydub import AudioSegment
+import imageio_ffmpeg
+# imageio-ffmpegからffmpegの実行可能ファイルのパスを取得
+ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe()
+# pydubで使用するffmpegのパスを設定
+AudioSegment.converter = ffmpeg_path
+AudioSegment.ffprobe = ffmpeg_path
+# OpenAIクライアントの初期化
+client = OpenAI()
+def process_audio(audio_file, output_format):
+    # 音声ファイルを読み込む
+    audio_data = audio_file.read()
+    file_type = audio_file.name.split('.')[-1]
+    audio = AudioSegment.from_file(BytesIO(audio_data), format=file_type)
+    if file_type != "mp3":
+        mp3_buffer = BytesIO()
+        audio.export(mp3_buffer, format="mp3")
+        mp3_buffer.seek(0)
+        audio_data = mp3_buffer
+    # OpenAIを使用してテキストに変換
+    transcript = client.audio.transcriptions.create(
+        model="whisper-1",
+        file=BytesIO(audio_data)
+    )
+    if output_format == "テキスト":
+        return transcript.text
+    elif output_format == "Docx":
+        from docx import Document
+        doc = Document()
+        doc.add_paragraph(transcript.text)
+        docx_buffer = BytesIO()
+        doc.save(docx_buffer)
+        docx_buffer.seek(0)
+        return docx_buffer
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=[
+        gr.inputs.Audio(type="file", label="音声ファイルをアップロード"),
+        gr.inputs.Radio(choices=["テキスト", "Docx"], label="出力フォーマットを選択")
+    ],
+    outputs=[
+        gr.outputs.Textbox(label="テキスト出力") if output_format == "テキスト" else gr.outputs.File(label="Word文書をダウンロード")
+    ],
+    title="音声ファイルをテキストに変換",
+    description="このツールは音声ファイルをテキストに変換します。出力形式としてテキストまたはWord文書を選択できます。"
+)
+iface.launch()