SrtUtilTools / app.py
hiDenorIYamano's picture
からデプロイ
78be241
import gradio as gr
import pathlib
import subprocess
import math
from pydub import AudioSegment
import openai
import re
import MeCab
import deepl
import os
import zipfile
import shutil
def split_japanese_line(line, max_length):
"""Split a Japanese line into multiple lines using MeCab for morphological analysis."""
if len(line) < max_length:
return line
max_line = 2
line = re.sub(r'[、。]', '', line)
m = MeCab.Tagger()
nodes = m.parse(line).split("\n")
words = [node.split("\t")[0] for node in nodes if node and not node.startswith("EOS")]
word_features = [re.split('[\t,]', node)[4] if len(re.split('[\t,]', node)) > 3 else None for node in nodes if
node and not node.startswith("EOS")]
lines = []
current_line = ""
idx = 0
allowed_features = ["助詞-格助詞", "助詞-副助詞", "助詞-終助詞", "助詞-係助詞", "補助記号-句点"]
whitelisted_words = ["という", "っていう", "になって", "みたいな", "として", "に対して", "とする", "というか", "ていう", "になって", "について", "にかけて",
"とか", "とかで", "にも", "には", "について",
"がある", "があって", "でも", "では", "のような", "のように", "のいる", "のある", "がします", "がする"]
while idx < len(words):
word = words[idx]
# If adding the next word exceeds the max_length or if the word is a particle, break the line
if idx + 1 < len(words) and any(
whitelisted_word == word + words[idx + 1] for whitelisted_word in whitelisted_words):
current_line += word + words[idx + 1]
idx += 2 # Increment to skip next word
if len(word_features) > idx:
if word_features[idx] not in allowed_features:
lines.append(current_line)
current_line = ""
elif len(word_features) > idx:
if word_features[idx - 1] in allowed_features and word_features[idx] not in allowed_features:
lines.append(current_line)
current_line = ""
current_line += word
idx += 1
else:
current_line += word
idx += 1
# Append the last line if it exists
if current_line:
lines.append(current_line)
# Merge lines to ensure each line is less than or equal to max_length
merged_lines = []
temp_line = ''
for line in lines:
if len(temp_line) <= max_length / 2:
temp_line += line
elif len(temp_line + line) <= max_length:
temp_line += line
elif len(merged_lines) >= max_line - 1:
temp_line += line
else:
if temp_line == '':
merged_lines.append(line)
else:
merged_lines.append(temp_line)
temp_line = line
if temp_line:
merged_lines.append(temp_line)
return "\n".join(merged_lines)
def split_japanese_srt_text(srt_content, max_length):
"""Split the lines of the srt content for Japanese text."""
srt_lines = srt_content.split("\n")
modified_lines = []
for line in srt_lines:
# Check if the line looks like a subtitle text (not an index or a timestamp)
if re.match(r"^[0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3}$", line):
modified_lines.append(line)
elif line.isdigit():
modified_lines.append(line)
elif line == "":
modified_lines.append(line)
else:
# Split the Japanese subtitle text line
modified_lines.extend(split_japanese_line(line, max_length).split("\n"))
return "\n".join(modified_lines)
def format_transcription_for_japanese(file_path, max_length):
with open(file_path, "r", encoding="utf-8") as file:
srt_content = file.read()
modified_japanese_srt_content = split_japanese_srt_text(srt_content, max_length)
with open("formatted_transcription.srt", "w", encoding="utf-8") as file:
file.write(modified_japanese_srt_content)
def audioToSrt(api_key, temperature, sampletext):
openai.api_key = api_key # Set the API key
# Initialize prompt as empty
prompt_content = ""
# If sampletext_path is provided, read the content and modify it
if sampletext:
with open(sampletext.name, "r", encoding="utf-8") as file:
content = file.read()
# Replace newline characters with "\n" string
prompt_content = content.replace('\n', '\\n')
with open(str('converted.mp4'), "rb") as f:
response = openai.Audio.transcribe(
"whisper-1",
f,
temperature=temperature,
language="ja",
prompt=prompt_content,
response_format="srt")
transcription = response
with open("transcription.srt", "wt", encoding="utf-8") as f:
f.writelines([transcription])
def compress_mp4(path):
original_file = pathlib.Path(path)
audio_file = pathlib.Path("./audio").with_suffix(original_file.suffix)
# If the audio file already exists, delete it
if audio_file.exists():
audio_file.unlink()
subprocess.run(["ffmpeg", "-i", str(original_file)
, "-codec:a", "copy", "-vn", str(audio_file)])
TARGET_FILE_SIZE = 25000000
print(f"{audio_file.stat().st_size=}")
if audio_file.stat().st_size > TARGET_FILE_SIZE:
print("This file needs to be converted.")
audio_segment = AudioSegment.from_file(str(audio_file))
audio_length_sec = len(audio_segment) / 1000
target_kbps = int(math.floor(TARGET_FILE_SIZE * 8 / audio_length_sec / 1000 * 0.95))
if target_kbps < 8:
assert f"{target_kbps=} is not supported."
converted_file = pathlib.Path("./converted").with_suffix(".mp4")
# If the converted file already exists, delete it
if converted_file.exists():
converted_file.unlink()
subprocess.run(["ffmpeg", "-i", str(audio_file)
, "-codec:a", "aac", "-ar", "16000", "-ac", "1", "-b:a", f"{target_kbps}k"
, str(converted_file)])
print(f"{converted_file.stat().st_size=}")
def videoToSrt(video_obj, temperature, sampletext_obj, max_length, api_key):
compress_mp4(video_obj.name)
audioToSrt(api_key, temperature, sampletext_obj)
format_transcription_for_japanese("transcription.srt", max_length)
with open("formatted_transcription.srt", "r", encoding="utf-8") as file:
content = file.read()
return [content, "formatted_transcription.srt"]
def remove_linebreaks_from_subtitle(srt_content):
# Pattern to match the time segment and the following subtitles
pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n)([\s\S]+?)(?=\n\n|\Z)', re.DOTALL)
return pattern.sub(lambda m: m.group(1) + m.group(2).replace('\n', ''), srt_content)
def translateSrt(text, languages, api_key):
translator = deepl.Translator(api_key)
# 既存のディレクトリがある場合は削除
if os.path.exists('translated_srt_files'):
shutil.rmtree('translated_srt_files')
# 新しいディレクトリを作成
os.makedirs('translated_srt_files')
# textから改行を除去
remove_linebreaks_text = remove_linebreaks_from_subtitle(text)
# 各言語での翻訳をループ処理
for lang in languages:
result = translator.translate_text(remove_linebreaks_text, target_lang=lang).text
with open(f'translated_srt_files/{lang}.srt', 'w') as f:
f.write(result)
# srtファイルをまとめたzipを作成
zip_filename = "translated_srt_files.zip"
with zipfile.ZipFile(zip_filename, 'w') as zipf:
for root, _, files in os.walk('translated_srt_files'):
for file in files:
zipf.write(os.path.join(root, file))
return zip_filename # zipファイルのパスを返す
with gr.Blocks() as app:
with gr.Tab("音声ファイルからsrt生成"):
with gr.Row():
audio_input = gr.File(file_types=[".mp3", ".mp4"], label="音声ファイル(MP3, MP4)") # File input for the video
temparature_input = gr.Slider(maximum=1, value=0.8, step=0.1, label="temperature",
info="0.8のような高い値は、出力をよりランダムにし、0.2のような低い値は確定的な文章を出力するので、毎回同じ文章を生成します。")
with gr.Row():
sample_input = gr.File(file_types=[".txt"], label="書き出したい文章の例") # File input for the text file
max_length_input = gr.Number(20, label="1行あたりの最大文字数")
api_key_input = gr.Textbox(placeholder="openAI API key", type="password", label="openAI API keyを入力してください")
text_output = [gr.Textbox(label="SRTの出力結果", max_lines=30), gr.File(label="出力結果ファイル")]
text_button = gr.Button("SRTに変換")
with gr.Tab("srt自動翻訳"):
srt_textbox = gr.Textbox(placeholder="srt本文", label="srt本文を貼り付けてください", max_lines=25)
with gr.Row():
languages_dropdown = gr.Dropdown([
'bg', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es', 'et', 'fi',
'fr', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'nb', 'nl', 'pl', 'pt',
'pt-BR', 'pt-PT', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'zh'
], multiselect=True, label="翻訳したい言語を選択")
api_key_textbox = gr.Textbox(placeholder="DeepL API key", type="password", label="DeepL API keyを入力してください")
translate_output = gr.File(label="翻訳結果")
translate_button = gr.Button("SRTを翻訳")
text_input = [audio_input, temparature_input, sample_input, max_length_input, api_key_input]
translate_input = [srt_textbox, languages_dropdown, api_key_textbox]
text_button.click(videoToSrt, inputs=text_input, outputs=text_output)
translate_button.click(translateSrt, inputs=translate_input, outputs=translate_output)
if __name__ == "__main__":
app.launch()