FOREIGN-WHISPERS

Sleeping

App Files Files Community

FOREIGN-WHISPERS / opus.py

RobCaamano

Update opus.py

c80f760 11 months ago

raw

history blame

2.24 kB

	from transformers import MarianMTModel, MarianTokenizer
	from tqdm import tqdm
	import os
	import re
	import argparse

	# Load Model and Tokenizer
	model_name = "Helsinki-NLP/opus-mt-en-es"
	tokenizer = MarianTokenizer.from_pretrained(model_name)
	model = MarianMTModel.from_pretrained(model_name)

	# Extract & separate timestamp and text
	def extract_timestamp_and_text(line):
	match = re.match(r'\[(\d+\.\d+\-\d+\.\d+)\]\s+(.*)', line)
	if match:
	return match.group(1), match.group(2)
	return '', line

	# Translate text
	def translate_text(text):
	lines = text.split('\n')
	translated_lines = []

	for line in tqdm(lines, desc="Translating lines", leave=False):
	# Check if line empty
	if not line.strip():
	translated_lines.append('')
	continue

	timestamp, line_text = extract_timestamp_and_text(line)

	# Translate text
	if line_text.strip():
	model_inputs = tokenizer(line_text, return_tensors="pt", truncation=True, padding="longest")
	translated = model.generate(**model_inputs)
	translated_text = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
	translated_line = f'[{timestamp}] {translated_text}'
	else:
	translated_line = f'[{timestamp}]'

	translated_lines.append(translated_line)

	return '\n'.join(translated_lines)

	# Main function to translate a file
	def translate_file(src_file_path, dst_file_path):
	try:
	with open(src_file_path, 'r') as file:
	english_text = file.read()
	spanish_text = translate_text(english_text)

	with open(dst_file_path, 'w') as file:
	file.write(spanish_text)
	print(f"Translation completed: {dst_file_path}")

	except Exception as e:
	print(f"Error processing file: {e}")

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Translate English text to Spanish")
	parser.add_argument("src_file_path", help="Path to the source file with English text")
	parser.add_argument("dst_file_path", help="Path to save the translated Spanish text")
	args = parser.parse_args()

	translate_file(args.src_file_path, args.dst_file_path)