File size: 4,392 Bytes
bf9542e 2446386 1b12140 922050b 1b12140 bf9542e 922050b bf9542e 6aeab44 51a9e4f bf9542e 2446386 bf9542e 0648a36 922050b 7e868b7 922050b 7e868b7 6aeab44 922050b bf9542e 2446386 5f6bd2c 2446386 bf9542e 2446386 5f6bd2c 2446386 922050b 1f1863f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import openai
import yt_dlp
import os
import io
import tempfile
from pydub import AudioSegment
def split_audio(file_path, chunk_length_ms):
audio = AudioSegment.from_file(file_path)
duration = len(audio)
chunks = []
start_time = 0
while start_time < duration:
end_time = start_time + chunk_length_ms
if end_time > duration:
end_time = duration
chunk = audio[start_time:end_time]
chunks.append(chunk)
start_time += chunk_length_ms
return chunks
def split_string_by_tokens(text, max_tokens=500):
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= max_tokens:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
openai.api_key = os.environ['OPENAI_API_KEY']
def asr(url):
# delete the video
os.system("rm *audio_download*")
# download audio
# Options for youtube-dl
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'audio_downloaded.%(ext)s',
'no_continue': True,
}
# Create a youtube-dl object
ydl = yt_dlp.YoutubeDL(ydl_opts)
# Download the video
info_dict = ydl.extract_info(url, download=True)
if info_dict is not None:
audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
else:
return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
yield "下载视频完成. 开始分割视频...", ""
chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
transcripts = []
for idx, chunk in enumerate(chunks):
temp_file_path = None
with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
temp_file_path = temp_file.name
chunk.export(temp_file.name, format="wav")
with open(temp_file_path, "rb") as temp_file:
transcript = openai.Audio.transcribe("whisper-1", temp_file)
os.remove(temp_file_path)
transcripts.append(transcript["text"])
yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
# delete the video
os.system("rm {}".format(audio_file_name))
translations = []
full_transcript = " ".join(transcripts)
# split into 500 tokens
transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
# split transcripts if its too long
for idx, transcript in enumerate(transcript_chunks):
output = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user", "content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
]
)
translation = output['choices'][0]['message']['content']
translations.append(translation)
yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + " ".join(translations), " ".join(transcripts)
full_translation = " ".join(translations)
yield full_translation, full_transcript
title = """
轻声细译"""
# Create an instruction input component
instruction = """
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;">
一键输入视频链接,轻松实现中文翻译,畅享视频无障碍沟通 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
</div>"""
# Create a text input component
text_input = gr.inputs.Textbox()
demo = gr.Interface(fn=asr,
inputs=gr.inputs.Textbox(label="粘贴视频链接"),
outputs=[
gr.outputs.Textbox(label="中文"),
gr.outputs.Textbox(label="英文")
],
title=title,
description=instruction,theme='huggingface')
demo.queue()
demo.launch() |