Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pydub import AudioSegment | |
from io import BytesIO | |
import imageio_ffmpeg | |
from docx import Document | |
from openai import OpenAI | |
# Setting up paths and initial configurations | |
ffmpeg_path = imageio_ffmpeg.get_ffmpeg_exe() | |
AudioSegment.converter = ffmpeg_path | |
AudioSegment.ffprobe = ffmpeg_path | |
client = OpenAI() | |
def is_valid_mp3(data): | |
headers = data.read(10) | |
data.seek(0) | |
return headers.startswith(b'ID3') or headers[0:2] == b'\xff\xfb' | |
def process_audio(audio_file_path, output_format, progress=None): | |
if not audio_file_path: | |
return "No audio file provided.", None | |
file_type = audio_file_path.split('.')[-1] | |
audio = AudioSegment.from_file(audio_file_path, format=file_type) | |
if file_type != "mp3": | |
audio_data = BytesIO() | |
audio.export(audio_data, format="mp3") | |
audio_data.seek(0) | |
audio = AudioSegment.from_file(audio_data, format="mp3") | |
duration_ms = len(audio) | |
max_size_bytes = 26_214_400 | |
max_duration_per_part = duration_ms * max_size_bytes / len(audio.raw_data) | |
parts = [] | |
start_ms = 0 | |
total_parts = int(duration_ms / max_duration_per_part) + 1 | |
part_index = 0 | |
while start_ms < duration_ms: | |
end_ms = min(start_ms + max_duration_per_part, duration_ms) | |
part = audio[start_ms:end_ms] | |
parts.append(part) | |
start_ms = end_ms | |
part_index += 1 | |
if progress: | |
progress(part_index / total_parts) | |
full_transcript = "" | |
for part_index, part in enumerate(parts): | |
part_data = BytesIO() | |
part.export(part_data, format="mp3") | |
part_data.seek(0) | |
if is_valid_mp3(part_data): | |
part_data.name = f"part_{part_index}.mp3" | |
transcript_part = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=part_data | |
) | |
full_transcript += transcript_part.text + "\n" | |
# text_bytes = BytesIO(full_transcript.encode('utf-8')) | |
return full_transcript #, text_bytes | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath", label="音声ファイルをアップロード"), | |
outputs=[ | |
gr.Textbox(label="出力", show_copy_button=True), | |
# gr.File(label=".txtダウンロード", type='binary') | |
], | |
title="音声ファイルをテキストに変換", | |
description="このツールは音声ファイルをテキストに変換します。", | |
allow_flagging="never" | |
) | |
iface.launch() | |