Spaces:
Running
Running
import gradio as gr | |
import pathlib | |
import subprocess | |
import math | |
from pydub import AudioSegment | |
import openai | |
import re | |
import MeCab | |
import deepl | |
import os | |
import zipfile | |
import shutil | |
def split_japanese_line(line, max_length): | |
"""Split a Japanese line into multiple lines using MeCab for morphological analysis.""" | |
if len(line) < max_length: | |
return line | |
max_line = 2 | |
line = re.sub(r'[、。]', '', line) | |
m = MeCab.Tagger() | |
nodes = m.parse(line).split("\n") | |
words = [node.split("\t")[0] for node in nodes if node and not node.startswith("EOS")] | |
word_features = [re.split('[\t,]', node)[4] if len(re.split('[\t,]', node)) > 3 else None for node in nodes if | |
node and not node.startswith("EOS")] | |
lines = [] | |
current_line = "" | |
idx = 0 | |
allowed_features = ["助詞-格助詞", "助詞-副助詞", "助詞-終助詞", "助詞-係助詞", "補助記号-句点"] | |
whitelisted_words = ["という", "っていう", "になって", "みたいな", "として", "に対して", "とする", "というか", "ていう", "になって", "について", "にかけて", | |
"とか", "とかで", "にも", "には", "について", | |
"がある", "があって", "でも", "では", "のような", "のように", "のいる", "のある", "がします", "がする"] | |
while idx < len(words): | |
word = words[idx] | |
# If adding the next word exceeds the max_length or if the word is a particle, break the line | |
if idx + 1 < len(words) and any( | |
whitelisted_word == word + words[idx + 1] for whitelisted_word in whitelisted_words): | |
current_line += word + words[idx + 1] | |
idx += 2 # Increment to skip next word | |
if len(word_features) > idx: | |
if word_features[idx] not in allowed_features: | |
lines.append(current_line) | |
current_line = "" | |
elif len(word_features) > idx: | |
if word_features[idx - 1] in allowed_features and word_features[idx] not in allowed_features: | |
lines.append(current_line) | |
current_line = "" | |
current_line += word | |
idx += 1 | |
else: | |
current_line += word | |
idx += 1 | |
# Append the last line if it exists | |
if current_line: | |
lines.append(current_line) | |
# Merge lines to ensure each line is less than or equal to max_length | |
merged_lines = [] | |
temp_line = '' | |
for line in lines: | |
if len(temp_line) <= max_length / 2: | |
temp_line += line | |
elif len(temp_line + line) <= max_length: | |
temp_line += line | |
elif len(merged_lines) >= max_line - 1: | |
temp_line += line | |
else: | |
if temp_line == '': | |
merged_lines.append(line) | |
else: | |
merged_lines.append(temp_line) | |
temp_line = line | |
if temp_line: | |
merged_lines.append(temp_line) | |
return "\n".join(merged_lines) | |
def split_japanese_srt_text(srt_content, max_length): | |
"""Split the lines of the srt content for Japanese text.""" | |
srt_lines = srt_content.split("\n") | |
modified_lines = [] | |
for line in srt_lines: | |
# Check if the line looks like a subtitle text (not an index or a timestamp) | |
if re.match(r"^[0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3}$", line): | |
modified_lines.append(line) | |
elif line.isdigit(): | |
modified_lines.append(line) | |
elif line == "": | |
modified_lines.append(line) | |
else: | |
# Split the Japanese subtitle text line | |
modified_lines.extend(split_japanese_line(line, max_length).split("\n")) | |
return "\n".join(modified_lines) | |
def format_transcription_for_japanese(file_path, max_length): | |
with open(file_path, "r", encoding="utf-8") as file: | |
srt_content = file.read() | |
modified_japanese_srt_content = split_japanese_srt_text(srt_content, max_length) | |
with open("formatted_transcription.srt", "w", encoding="utf-8") as file: | |
file.write(modified_japanese_srt_content) | |
def audioToSrt(api_key, temperature, sampletext): | |
openai.api_key = api_key # Set the API key | |
# Initialize prompt as empty | |
prompt_content = "" | |
# If sampletext_path is provided, read the content and modify it | |
if sampletext: | |
with open(sampletext.name, "r", encoding="utf-8") as file: | |
content = file.read() | |
# Replace newline characters with "\n" string | |
prompt_content = content.replace('\n', '\\n') | |
with open(str('converted.mp4'), "rb") as f: | |
response = openai.Audio.transcribe( | |
"whisper-1", | |
f, | |
temperature=temperature, | |
language="ja", | |
prompt=prompt_content, | |
response_format="srt") | |
transcription = response | |
with open("transcription.srt", "wt", encoding="utf-8") as f: | |
f.writelines([transcription]) | |
def compress_mp4(path): | |
original_file = pathlib.Path(path) | |
audio_file = pathlib.Path("./audio").with_suffix(original_file.suffix) | |
# If the audio file already exists, delete it | |
if audio_file.exists(): | |
audio_file.unlink() | |
subprocess.run(["ffmpeg", "-i", str(original_file) | |
, "-codec:a", "copy", "-vn", str(audio_file)]) | |
TARGET_FILE_SIZE = 25000000 | |
print(f"{audio_file.stat().st_size=}") | |
if audio_file.stat().st_size > TARGET_FILE_SIZE: | |
print("This file needs to be converted.") | |
audio_segment = AudioSegment.from_file(str(audio_file)) | |
audio_length_sec = len(audio_segment) / 1000 | |
target_kbps = int(math.floor(TARGET_FILE_SIZE * 8 / audio_length_sec / 1000 * 0.95)) | |
if target_kbps < 8: | |
assert f"{target_kbps=} is not supported." | |
converted_file = pathlib.Path("./converted").with_suffix(".mp4") | |
# If the converted file already exists, delete it | |
if converted_file.exists(): | |
converted_file.unlink() | |
subprocess.run(["ffmpeg", "-i", str(audio_file) | |
, "-codec:a", "aac", "-ar", "16000", "-ac", "1", "-b:a", f"{target_kbps}k" | |
, str(converted_file)]) | |
print(f"{converted_file.stat().st_size=}") | |
def videoToSrt(video_obj, temperature, sampletext_obj, max_length, api_key): | |
compress_mp4(video_obj.name) | |
audioToSrt(api_key, temperature, sampletext_obj) | |
format_transcription_for_japanese("transcription.srt", max_length) | |
with open("formatted_transcription.srt", "r", encoding="utf-8") as file: | |
content = file.read() | |
return [content, "formatted_transcription.srt"] | |
def remove_linebreaks_from_subtitle(srt_content): | |
# Pattern to match the time segment and the following subtitles | |
pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n)([\s\S]+?)(?=\n\n|\Z)', re.DOTALL) | |
return pattern.sub(lambda m: m.group(1) + m.group(2).replace('\n', ''), srt_content) | |
def translateSrt(text, languages, api_key): | |
translator = deepl.Translator(api_key) | |
# 既存のディレクトリがある場合は削除 | |
if os.path.exists('translated_srt_files'): | |
shutil.rmtree('translated_srt_files') | |
# 新しいディレクトリを作成 | |
os.makedirs('translated_srt_files') | |
# textから改行を除去 | |
remove_linebreaks_text = remove_linebreaks_from_subtitle(text) | |
# 各言語での翻訳をループ処理 | |
for lang in languages: | |
result = translator.translate_text(remove_linebreaks_text, target_lang=lang).text | |
with open(f'translated_srt_files/{lang}.srt', 'w') as f: | |
f.write(result) | |
# srtファイルをまとめたzipを作成 | |
zip_filename = "translated_srt_files.zip" | |
with zipfile.ZipFile(zip_filename, 'w') as zipf: | |
for root, _, files in os.walk('translated_srt_files'): | |
for file in files: | |
zipf.write(os.path.join(root, file)) | |
return zip_filename # zipファイルのパスを返す | |
with gr.Blocks() as app: | |
with gr.Tab("音声ファイルからsrt生成"): | |
with gr.Row(): | |
audio_input = gr.File(file_types=[".mp3", ".mp4"], label="音声ファイル(MP3, MP4)") # File input for the video | |
temparature_input = gr.Slider(maximum=1, value=0.8, step=0.1, label="temperature", | |
info="0.8のような高い値は、出力をよりランダムにし、0.2のような低い値は確定的な文章を出力するので、毎回同じ文章を生成します。") | |
with gr.Row(): | |
sample_input = gr.File(file_types=[".txt"], label="書き出したい文章の例") # File input for the text file | |
max_length_input = gr.Number(20, label="1行あたりの最大文字数") | |
api_key_input = gr.Textbox(placeholder="openAI API key", type="password", label="openAI API keyを入力してください") | |
text_output = [gr.Textbox(label="SRTの出力結果", max_lines=30), gr.File(label="出力結果ファイル")] | |
text_button = gr.Button("SRTに変換") | |
with gr.Tab("srt自動翻訳"): | |
srt_textbox = gr.Textbox(placeholder="srt本文", label="srt本文を貼り付けてください", max_lines=25) | |
with gr.Row(): | |
languages_dropdown = gr.Dropdown([ | |
'bg', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es', 'et', 'fi', | |
'fr', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'nb', 'nl', 'pl', 'pt', | |
'pt-BR', 'pt-PT', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'zh' | |
], multiselect=True, label="翻訳したい言語を選択") | |
api_key_textbox = gr.Textbox(placeholder="DeepL API key", type="password", label="DeepL API keyを入力してください") | |
translate_output = gr.File(label="翻訳結果") | |
translate_button = gr.Button("SRTを翻訳") | |
text_input = [audio_input, temparature_input, sample_input, max_length_input, api_key_input] | |
translate_input = [srt_textbox, languages_dropdown, api_key_textbox] | |
text_button.click(videoToSrt, inputs=text_input, outputs=text_output) | |
translate_button.click(translateSrt, inputs=translate_input, outputs=translate_output) | |
if __name__ == "__main__": | |
app.launch() | |