Spaces:

hidenoriyamano37
/

SrtUtilTools

Running

App Files Files Community

SrtUtilTools / app.py

hiDenorIYamano

からデプロイ

78be241 5 months ago

raw history blame contribute delete

No virus

10.3 kB

	import gradio as gr
	import pathlib
	import subprocess
	import math
	from pydub import AudioSegment
	import openai
	import re
	import MeCab
	import deepl
	import os
	import zipfile
	import shutil



	def split_japanese_line(line, max_length):
	"""Split a Japanese line into multiple lines using MeCab for morphological analysis."""
	if len(line) < max_length:
	return line

	max_line = 2
	line = re.sub(r'[、。]', '', line)

	m = MeCab.Tagger()
	nodes = m.parse(line).split("\n")

	words = [node.split("\t")[0] for node in nodes if node and not node.startswith("EOS")]
	word_features = [re.split('[\t,]', node)[4] if len(re.split('[\t,]', node)) > 3 else None for node in nodes if
	node and not node.startswith("EOS")]

	lines = []
	current_line = ""

	idx = 0
	allowed_features = ["助詞-格助詞", "助詞-副助詞", "助詞-終助詞", "助詞-係助詞", "補助記号-句点"]
	whitelisted_words = ["という", "っていう", "になって", "みたいな", "として", "に対して", "とする", "というか", "ていう", "になって", "について", "にかけて",
	"とか", "とかで", "にも", "には", "について",
	"がある", "があって", "でも", "では", "のような", "のように", "のいる", "のある", "がします", "がする"]

	while idx < len(words):
	word = words[idx]
	# If adding the next word exceeds the max_length or if the word is a particle, break the line
	if idx + 1 < len(words) and any(
	whitelisted_word == word + words[idx + 1] for whitelisted_word in whitelisted_words):
	current_line += word + words[idx + 1]
	idx += 2 # Increment to skip next word
	if len(word_features) > idx:
	if word_features[idx] not in allowed_features:
	lines.append(current_line)
	current_line = ""
	elif len(word_features) > idx:
	if word_features[idx - 1] in allowed_features and word_features[idx] not in allowed_features:
	lines.append(current_line)
	current_line = ""
	current_line += word
	idx += 1
	else:
	current_line += word
	idx += 1

	# Append the last line if it exists
	if current_line:
	lines.append(current_line)

	# Merge lines to ensure each line is less than or equal to max_length
	merged_lines = []
	temp_line = ''
	for line in lines:
	if len(temp_line) <= max_length / 2:
	temp_line += line
	elif len(temp_line + line) <= max_length:
	temp_line += line
	elif len(merged_lines) >= max_line - 1:
	temp_line += line
	else:
	if temp_line == '':
	merged_lines.append(line)
	else:
	merged_lines.append(temp_line)
	temp_line = line
	if temp_line:
	merged_lines.append(temp_line)

	return "\n".join(merged_lines)


	def split_japanese_srt_text(srt_content, max_length):
	"""Split the lines of the srt content for Japanese text."""
	srt_lines = srt_content.split("\n")
	modified_lines = []

	for line in srt_lines:
	# Check if the line looks like a subtitle text (not an index or a timestamp)
	if re.match(r"^[0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3} --> [0-9]{1,2}:[0-9]{2}:[0-9]{2},[0-9]{3}$", line):
	modified_lines.append(line)
	elif line.isdigit():
	modified_lines.append(line)
	elif line == "":
	modified_lines.append(line)
	else:
	# Split the Japanese subtitle text line
	modified_lines.extend(split_japanese_line(line, max_length).split("\n"))

	return "\n".join(modified_lines)


	def format_transcription_for_japanese(file_path, max_length):
	with open(file_path, "r", encoding="utf-8") as file:
	srt_content = file.read()
	modified_japanese_srt_content = split_japanese_srt_text(srt_content, max_length)
	with open("formatted_transcription.srt", "w", encoding="utf-8") as file:
	file.write(modified_japanese_srt_content)


	def audioToSrt(api_key, temperature, sampletext):
	openai.api_key = api_key # Set the API key

	# Initialize prompt as empty
	prompt_content = ""

	# If sampletext_path is provided, read the content and modify it
	if sampletext:
	with open(sampletext.name, "r", encoding="utf-8") as file:
	content = file.read()
	# Replace newline characters with "\n" string
	prompt_content = content.replace('\n', '\\n')

	with open(str('converted.mp4'), "rb") as f:
	response = openai.Audio.transcribe(
	"whisper-1",
	f,
	temperature=temperature,
	language="ja",
	prompt=prompt_content,
	response_format="srt")
	transcription = response

	with open("transcription.srt", "wt", encoding="utf-8") as f:
	f.writelines([transcription])


	def compress_mp4(path):
	original_file = pathlib.Path(path)
	audio_file = pathlib.Path("./audio").with_suffix(original_file.suffix)

	# If the audio file already exists, delete it
	if audio_file.exists():
	audio_file.unlink()

	subprocess.run(["ffmpeg", "-i", str(original_file)
	, "-codec:a", "copy", "-vn", str(audio_file)])

	TARGET_FILE_SIZE = 25000000

	print(f"{audio_file.stat().st_size=}")

	if audio_file.stat().st_size > TARGET_FILE_SIZE:
	print("This file needs to be converted.")

	audio_segment = AudioSegment.from_file(str(audio_file))

	audio_length_sec = len(audio_segment) / 1000

	target_kbps = int(math.floor(TARGET_FILE_SIZE * 8 / audio_length_sec / 1000 * 0.95))

	if target_kbps < 8:
	assert f"{target_kbps=} is not supported."

	converted_file = pathlib.Path("./converted").with_suffix(".mp4")

	# If the converted file already exists, delete it
	if converted_file.exists():
	converted_file.unlink()

	subprocess.run(["ffmpeg", "-i", str(audio_file)
	, "-codec:a", "aac", "-ar", "16000", "-ac", "1", "-b:a", f"{target_kbps}k"
	, str(converted_file)])

	print(f"{converted_file.stat().st_size=}")


	def videoToSrt(video_obj, temperature, sampletext_obj, max_length, api_key):
	compress_mp4(video_obj.name)
	audioToSrt(api_key, temperature, sampletext_obj)
	format_transcription_for_japanese("transcription.srt", max_length)
	with open("formatted_transcription.srt", "r", encoding="utf-8") as file:
	content = file.read()
	return [content, "formatted_transcription.srt"]

	def remove_linebreaks_from_subtitle(srt_content):
	# Pattern to match the time segment and the following subtitles
	pattern = re.compile(r'(\d{2}:\d{2}:\d{2},\d{3} --> \d{2}:\d{2}:\d{2},\d{3}\n)([\s\S]+?)(?=\n\n\|\Z)', re.DOTALL)
	return pattern.sub(lambda m: m.group(1) + m.group(2).replace('\n', ''), srt_content)

	def translateSrt(text, languages, api_key):
	translator = deepl.Translator(api_key)

	# 既存のディレクトリがある場合は削除
	if os.path.exists('translated_srt_files'):
	shutil.rmtree('translated_srt_files')

	# 新しいディレクトリを作成
	os.makedirs('translated_srt_files')

	# textから改行を除去
	remove_linebreaks_text = remove_linebreaks_from_subtitle(text)

	# 各言語での翻訳をループ処理
	for lang in languages:
	result = translator.translate_text(remove_linebreaks_text, target_lang=lang).text
	with open(f'translated_srt_files/{lang}.srt', 'w') as f:
	f.write(result)

	# srtファイルをまとめたzipを作成
	zip_filename = "translated_srt_files.zip"
	with zipfile.ZipFile(zip_filename, 'w') as zipf:
	for root, _, files in os.walk('translated_srt_files'):
	for file in files:
	zipf.write(os.path.join(root, file))

	return zip_filename # zipファイルのパスを返す


	with gr.Blocks() as app:
	with gr.Tab("音声ファイルからsrt生成"):
	with gr.Row():
	audio_input = gr.File(file_types=[".mp3", ".mp4"], label="音声ファイル（MP3, MP4）") # File input for the video
	temparature_input = gr.Slider(maximum=1, value=0.8, step=0.1, label="temperature",
	info="0.8のような高い値は、出力をよりランダムにし、0.2のような低い値は確定的な文章を出力するので、毎回同じ文章を生成します。")
	with gr.Row():
	sample_input = gr.File(file_types=[".txt"], label="書き出したい文章の例") # File input for the text file
	max_length_input = gr.Number(20, label="1行あたりの最大文字数")
	api_key_input = gr.Textbox(placeholder="openAI API key", type="password", label="openAI API keyを入力してください")
	text_output = [gr.Textbox(label="SRTの出力結果", max_lines=30), gr.File(label="出力結果ファイル")]
	text_button = gr.Button("SRTに変換")
	with gr.Tab("srt自動翻訳"):
	srt_textbox = gr.Textbox(placeholder="srt本文", label="srt本文を貼り付けてください", max_lines=25)

	with gr.Row():
	languages_dropdown = gr.Dropdown([
	'bg', 'cs', 'da', 'de', 'el', 'en-GB', 'en-US', 'es', 'et', 'fi',
	'fr', 'hu', 'id', 'it', 'ja', 'ko', 'lt', 'lv', 'nb', 'nl', 'pl', 'pt',
	'pt-BR', 'pt-PT', 'ro', 'ru', 'sk', 'sl', 'sv', 'tr', 'uk', 'zh'
	], multiselect=True, label="翻訳したい言語を選択")
	api_key_textbox = gr.Textbox(placeholder="DeepL API key", type="password", label="DeepL API keyを入力してください")

	translate_output = gr.File(label="翻訳結果")
	translate_button = gr.Button("SRTを翻訳")

	text_input = [audio_input, temparature_input, sample_input, max_length_input, api_key_input]
	translate_input = [srt_textbox, languages_dropdown, api_key_textbox]
	text_button.click(videoToSrt, inputs=text_input, outputs=text_output)
	translate_button.click(translateSrt, inputs=translate_input, outputs=translate_output)

	if __name__ == "__main__":
	app.launch()