Spaces:

RafaG
/

ViralCutterPRO

Sleeping

App Files Files Community

ViralCutterPRO / scripts /adjust_subtitles.py

RafaG

Upload 85 files

80b326d verified about 1 month ago

raw

history blame contribute delete

11.6 kB

	import json
	import re
	import os

	def format_time_ass(time_seconds):
	hours = int(time_seconds // 3600)
	minutes = int((time_seconds % 3600) // 60)
	seconds = int(time_seconds % 60)
	centiseconds = int((time_seconds % 1) * 100)
	return f"{hours:01}:{minutes:02}:{seconds:02}.{centiseconds:02}"

	def generate_ass_from_file(input_path, output_path, project_folder,
	base_color, base_size, highlight_size, highlight_color,
	words_per_block, gap_limit, mode, vertical_position, alignment,
	font, outline_color, shadow_color, bold, italic, underline,
	strikeout, border_style, outline_thickness, shadow_size, uppercase,
	face_modes={}, remove_punctuation=True):
	"""
	Generates a single ASS file from a JSON input.
	"""

	# 1. Load Timeline Data (if exists)
	# 1. Load Timeline Data (if exists)
	filename = os.path.basename(input_path)
	base_name = os.path.splitext(filename)[0]

	# Try renamed timeline first (e.g. 000_Title_timeline.json)
	# Subtitle is 000_Title_processed.json -> 000_Title_timeline.json
	renamed_timeline_name = base_name.replace("_processed", "") + "_timeline.json"
	renamed_timeline_path = os.path.join(project_folder, "final", renamed_timeline_name)

	timeline_data = None
	idx = None

	if os.path.exists(renamed_timeline_path):
	try:
	with open(renamed_timeline_path, "r") as tf:
	timeline_data = json.load(tf)
	except: pass

	# Check for Index (outputXXX or XXX_Title)
	match_output = re.search(r"output(\d+)", filename)
	match_index = re.search(r"^(\d{3})_", filename)

	if match_output:
	idx = int(match_output.group(1))
	elif match_index:
	idx = int(match_index.group(1))

	# Fallback to temp timeline if not already loaded and idx known
	if not timeline_data and idx is not None:
	csv_timeline = os.path.join(project_folder, "final", f"temp_video_no_audio_{idx}_timeline.json")
	if os.path.exists(csv_timeline):
	try:
	with open(csv_timeline, "r") as tf:
	timeline_data = json.load(tf)
	except: pass

	# 2. Determine Style Overrides (Face Mode)
	# Determine static alignment (fallback)
	key = base_name
	if idx is not None:
	key = f"output{str(idx).zfill(3)}"

	current_alignment = alignment
	current_vertical_position = vertical_position

	mode_face = face_modes.get(key)
	if mode_face == "2" and not timeline_data: # Only use static if no timeline
	current_alignment = 5
	current_vertical_position = 0

	# 3. Load JSON
	try:
	with open(input_path, "r", encoding="utf-8") as file:
	json_data = json.load(file)

	segments_count = len(json_data.get('segments', []))
	print(f"[DEBUG] Loaded {input_path}: Found {segments_count} segments.")
	except Exception as e:
	print(f"[ERROR] Loading JSON {input_path}: {e}")
	return

	# 4. Generate Content
	header_ass = f"""[Script Info]
	Title: Dynamic Subtitles
	ScriptType: v4.00+
	PlayDepth: 0
	PlayResX: 360
	PlayResY: 640

	[V4+ Styles]
	Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
	Style: Default,{font},{base_size},{base_color},&H00000000,{outline_color},{shadow_color},{bold},{italic},{underline},{strikeout},100,100,0,0,{border_style},{outline_thickness},{shadow_size},{alignment},-2,-2,{vertical_position},1

	[Events]
	Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
	"""

	total_lines_written = 0
	with open(output_path, "w", encoding="utf-8") as f:
	f.write(header_ass)


	last_end_time = 0.0

	for segment in json_data.get('segments', []):
	words = segment.get('words', [])
	total_words = len(words)

	i = 0
	while i < total_words:
	block = []
	while len(block) < words_per_block and i < total_words:
	current_word = words[i]
	if 'word' in current_word:
	if remove_punctuation:
	cleaned_word = re.sub(r'[.,!?;]', '', current_word['word'])
	block.append({**current_word, 'word': cleaned_word})
	else:
	block.append(current_word)

	if i + 1 < total_words:
	next_word = words[i + 1]
	if 'start' not in next_word or 'end' not in next_word:
	if remove_punctuation:
	next_cleaned_word = re.sub(r'[.,!?;]', '', next_word['word'])
	block[-1]['word'] += " " + next_cleaned_word
	else:
	block[-1]['word'] += " " + next_word['word']
	i += 1
	i += 1


	# Uppercase transformation
	if uppercase:
	for w_item in block:
	if 'word' in w_item:
	w_item['word'] = w_item['word'].upper()

	start_times = [word.get('start', 0) for word in block]
	end_times = [word.get('end', 0) for word in block]

	if not start_times: continue

	for j in range(len(block)):
	start_sec = start_times[j]
	end_sec = end_times[j]

	# Prevent overlap and close gaps
	if start_sec - last_end_time < gap_limit:
	start_sec = last_end_time

	# Ensure valid duration
	if end_sec < start_sec:
	end_sec = start_sec

	start_time_ass = format_time_ass(start_sec)
	end_time_ass = format_time_ass(end_sec)

	last_end_time = end_sec

	line = ""
	if mode == "highlight":
	for k, word_data in enumerate(block):
	word = word_data['word']
	if k == j:
	line += f"{{\\fs{highlight_size}\\c{highlight_color}}}{word} "
	else:
	line += f"{{\\fs{base_size}\\c{base_color}}}{word} "
	line = line.strip()

	line = line.strip()

	elif mode == "no_highlight" or mode == "sem_higlight":
	line = " ".join(word_data['word'] for word_data in block).strip()

	elif mode == "palavra_por_palavra":
	line = block[j]['word'].strip()

	else:
	# Fallback / No Highlight
	line = " ".join(word_data['word'] for word_data in block).strip()

	# Check dynamic timeline for this specific time
	pos_tag = ""

	if timeline_data:
	# Verify if middle of subtitle is in a '2' mode segment
	mid_time = (start_sec + end_sec) / 2
	found_mode = "1"
	for seg in timeline_data:
	if seg['start'] <= mid_time <= seg['end']:
	found_mode = seg['mode']
	break

	if found_mode == "2":
	# Force Center (Relative to PlayRes 360x640)
	x_pos = 360 // 2 # 180
	y_pos = 640 // 2 # 320
	current_line_alignment = 5 # Center

	# Apply Override Tags: {\an5\pos(x,y)}
	pos_tag = f"{{\\an{current_line_alignment}\\pos({x_pos},{y_pos})}}"
	final_line = f"{pos_tag}{line}"
	else:
	# Mode 1: Respect User Config (Standard Style)
	final_line = line
	else:
	final_line = line

	f.write(f"Dialogue: 0,{start_time_ass},{end_time_ass},Default,,0,0,0,,{final_line}\n")
	total_lines_written += 1

	if total_lines_written == 0:
	print(f"[WARN] No dialogue lines written for {input_path}")
	else:
	print(f"[DEBUG] Wrote {total_lines_written} lines to {output_path}")


	def adjust(base_color, base_size, highlight_size, highlight_color, words_per_block, gap_limit, mode, vertical_position, alignment, font, outline_color, shadow_color, bold, italic, underline, strikeout, border_style, outline_thickness, shadow_size, uppercase=False, project_folder="tmp", **kwargs):

	# Input and Output Directories
	input_dir = os.path.join(project_folder, "subs")
	output_dir = os.path.join(project_folder, "subs_ass")

	# Create output directory if it doesn't exist
	os.makedirs(output_dir, exist_ok=True)

	remove_punctuation = kwargs.get('remove_punctuation', True)

	# Load face modes if available
	face_modes = {}
	modes_file = os.path.join(project_folder, "face_modes.json")
	if os.path.exists(modes_file):
	try:
	with open(modes_file, "r") as f:
	face_modes = json.load(f)
	print("Loaded face modes for dynamic subtitle positioning.")
	except Exception as e:
	print(f"Could not load face modes: {e}")

	# Process all JSON files in input directory
	# Process all JSON files in input directory
	if not os.path.exists(input_dir):
	print(f"[ERROR] Subtitle folder missing: {input_dir}")
	raise FileNotFoundError(f"Subtitle folder missing at {input_dir}. Ensure transcription completed successfully.")

	for filename in os.listdir(input_dir):
	if filename.endswith(".json"):
	input_path = os.path.join(input_dir, filename)
	output_filename = os.path.splitext(filename)[0] + ".ass"
	output_path = os.path.join(output_dir, output_filename)

	generate_ass_from_file(input_path, output_path, project_folder,
	base_color, base_size, highlight_size, highlight_color,
	words_per_block, gap_limit, mode, vertical_position, alignment,
	font, outline_color, shadow_color, bold, italic, underline,
	strikeout, border_style, outline_thickness, shadow_size, uppercase,
	face_modes, remove_punctuation)

	print(f"Processed file: {filename} -> {output_filename}")

	print("All JSON files processed and converted to ASS.")